Spaces:
Running
Running
| import numpy as np | |
| from scipy.sparse import csr_matrix | |
| from typing import Dict, List | |
| def convert_sparse_to_csr(sparse_dict: Dict[str, List]) -> csr_matrix: | |
| """ | |
| Convert sparse embedding to scipy CSR matrix | |
| API format: {"indices": [10, 25, 42], "values": [0.85, 0.62, 0.91]} | |
| Milvus format: scipy.sparse.csr_matrix with shape (1, max_dimension) | |
| Args: | |
| sparse_dict: Dictionary with 'indices' and 'values' | |
| Returns: | |
| scipy CSR matrix | |
| """ | |
| indices = sparse_dict["indices"] | |
| values = sparse_dict["values"] | |
| max_dim = max(indices) + 1 if indices else 1 | |
| # Create CSR matrix | |
| # Shape: (1, max_dim) karena ini single vector | |
| row_indices = [0] * len(indices) # Semua di row 0 | |
| col_indices = indices | |
| sparse_matrix = csr_matrix( | |
| (values, (row_indices, col_indices)), | |
| shape=(1, max_dim) | |
| ) | |
| return sparse_matrix | |
| def batch_convert_sparse_to_csr(sparse_list: List[Dict[str, List]]) -> csr_matrix: | |
| """ | |
| Convert batch of sparse embeddings to single CSR matrix | |
| Args: | |
| sparse_list: List of sparse dicts | |
| Returns: | |
| scipy CSR matrix with shape (batch_size, max_dim) | |
| """ | |
| if not sparse_list: | |
| return csr_matrix((0, 0)) | |
| max_dim = 0 | |
| for sparse_dict in sparse_list: | |
| if sparse_dict["indices"]: | |
| max_dim = max(max_dim, max(sparse_dict["indices"]) + 1) | |
| if max_dim == 0: | |
| max_dim = 30000 # Default vocab size for SPLADE | |
| # Build row indices, column indices, and values | |
| row_indices = [] | |
| col_indices = [] | |
| values = [] | |
| for row_idx, sparse_dict in enumerate(sparse_list): | |
| indices = sparse_dict["indices"] | |
| vals = sparse_dict["values"] | |
| row_indices.extend([row_idx] * len(indices)) | |
| col_indices.extend(indices) | |
| values.extend(vals) | |
| # Create CSR matrix | |
| sparse_matrix = csr_matrix( | |
| (values, (row_indices, col_indices)), | |
| shape=(len(sparse_list), max_dim) | |
| ) | |
| return sparse_matrix |