first commit

2026-03-06 21:11:10 +08:00
commit 927b8a6cac
144 changed files with 26301 additions and 0 deletions
--- a/embeddings/vertexai.py
+++ b/embeddings/vertexai.py
@@ -0,0 +1,64 @@
+import os
+from typing import Literal, Optional
+
+from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
+
+from mem0.configs.embeddings.base import BaseEmbedderConfig
+from mem0.embeddings.base import EmbeddingBase
+from mem0.utils.gcp_auth import GCPAuthenticator
+
+
+class VertexAIEmbedding(EmbeddingBase):
+    def __init__(self, config: Optional[BaseEmbedderConfig] = None):
+        super().__init__(config)
+
+        self.config.model = self.config.model or "text-embedding-004"
+        self.config.embedding_dims = self.config.embedding_dims or 256
+
+        self.embedding_types = {
+            "add": self.config.memory_add_embedding_type or "RETRIEVAL_DOCUMENT",
+            "update": self.config.memory_update_embedding_type or "RETRIEVAL_DOCUMENT",
+            "search": self.config.memory_search_embedding_type or "RETRIEVAL_QUERY",
+        }
+
+        # Set up authentication using centralized GCP authenticator
+        # This supports multiple authentication methods while preserving environment variable support
+        try:
+            GCPAuthenticator.setup_vertex_ai(
+                service_account_json=getattr(self.config, 'google_service_account_json', None),
+                credentials_path=self.config.vertex_credentials_json,
+                project_id=getattr(self.config, 'google_project_id', None)
+            )
+        except Exception:
+            # Fall back to original behavior for backward compatibility
+            credentials_path = self.config.vertex_credentials_json
+            if credentials_path:
+                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
+            elif not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
+                raise ValueError(
+                    "Google application credentials JSON is not provided. Please provide a valid JSON path or set the 'GOOGLE_APPLICATION_CREDENTIALS' environment variable."
+                )
+
+        self.model = TextEmbeddingModel.from_pretrained(self.config.model)
+
+    def embed(self, text, memory_action: Optional[Literal["add", "search", "update"]] = None):
+        """
+        Get the embedding for the given text using Vertex AI.
+
+        Args:
+            text (str): The text to embed.
+            memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
+        Returns:
+            list: The embedding vector.
+        """
+        embedding_type = "SEMANTIC_SIMILARITY"
+        if memory_action is not None:
+            if memory_action not in self.embedding_types:
+                raise ValueError(f"Invalid memory action: {memory_action}")
+
+            embedding_type = self.embedding_types[memory_action]
+
+        text_input = TextEmbeddingInput(text=text, task_type=embedding_type)
+        embeddings = self.model.get_embeddings(texts=[text_input], output_dimensionality=self.config.embedding_dims)
+
+        return embeddings[0].values