Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import torch | |
| from typing import List, Optional | |
| from sentence_transformers import SentenceTransformer | |
| # Global cache for embeddings | |
| _embedding_cache = {} | |
| _model_cache = {} | |
| def generate_embeddings( | |
| texts: List[str], | |
| model_name: str = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", | |
| batch_size: int = 32, | |
| use_mps: bool = True, | |
| ) -> np.ndarray: | |
| """ | |
| Generate embeddings for text chunks with caching. | |
| Args: | |
| texts: List of text chunks to embed | |
| model_name: SentenceTransformer model identifier | |
| batch_size: Processing batch size | |
| use_mps: Use Apple Silicon acceleration | |
| Returns: | |
| numpy array of shape (len(texts), embedding_dim) | |
| Performance Target: | |
| - 100 texts/second on M4-Pro | |
| - 384-dimensional embeddings | |
| - Memory usage <500MB | |
| """ | |
| # Check cache for all texts | |
| cache_keys = [f"{model_name}:{text}" for text in texts] | |
| cached_embeddings = [] | |
| texts_to_compute = [] | |
| compute_indices = [] | |
| for i, key in enumerate(cache_keys): | |
| if key in _embedding_cache: | |
| cached_embeddings.append((i, _embedding_cache[key])) | |
| else: | |
| texts_to_compute.append(texts[i]) | |
| compute_indices.append(i) | |
| # Load model if needed | |
| if model_name not in _model_cache: | |
| model = SentenceTransformer(model_name) | |
| device = 'mps' if use_mps and torch.backends.mps.is_available() else 'cpu' | |
| model = model.to(device) | |
| model.eval() | |
| _model_cache[model_name] = model | |
| else: | |
| model = _model_cache[model_name] | |
| # Compute new embeddings | |
| if texts_to_compute: | |
| with torch.no_grad(): | |
| new_embeddings = model.encode( | |
| texts_to_compute, | |
| batch_size=batch_size, | |
| convert_to_numpy=True, | |
| normalize_embeddings=False | |
| ).astype(np.float32) | |
| # Cache new embeddings | |
| for i, text in enumerate(texts_to_compute): | |
| key = f"{model_name}:{text}" | |
| _embedding_cache[key] = new_embeddings[i] | |
| # Reconstruct full embedding array | |
| result = np.zeros((len(texts), 384), dtype=np.float32) | |
| # Fill cached embeddings | |
| for idx, embedding in cached_embeddings: | |
| result[idx] = embedding | |
| # Fill newly computed embeddings | |
| if texts_to_compute: | |
| for i, original_idx in enumerate(compute_indices): | |
| result[original_idx] = new_embeddings[i] | |
| return result | |