first commit

2026-03-06 21:11:10 +08:00
commit 927b8a6cac
144 changed files with 26301 additions and 0 deletions
--- a/configs/rerankers/init.py
+++ b/configs/rerankers/init.py
--- a/configs/rerankers/base.py
+++ b/configs/rerankers/base.py
@@ -0,0 +1,17 @@
+from typing import Optional
+from pydantic import BaseModel, Field
+
+
+class BaseRerankerConfig(BaseModel):
+    """
+    Base configuration for rerankers with only common parameters.
+    Provider-specific configurations should be handled by separate config classes.
+
+    This class contains only the parameters that are common across all reranker providers.
+    For provider-specific parameters, use the appropriate provider config class.
+    """
+
+    provider: Optional[str] = Field(default=None, description="The reranker provider to use")
+    model: Optional[str] = Field(default=None, description="The reranker model to use")
+    api_key: Optional[str] = Field(default=None, description="The API key for the reranker service")
+    top_k: Optional[int] = Field(default=None, description="Maximum number of documents to return after reranking")
--- a/configs/rerankers/cohere.py
+++ b/configs/rerankers/cohere.py
@@ -0,0 +1,15 @@
+from typing import Optional
+from pydantic import Field
+
+from mem0.configs.rerankers.base import BaseRerankerConfig
+
+
+class CohereRerankerConfig(BaseRerankerConfig):
+    """
+    Configuration class for Cohere reranker-specific parameters.
+    Inherits from BaseRerankerConfig and adds Cohere-specific settings.
+    """
+
+    model: Optional[str] = Field(default="rerank-english-v3.0", description="The Cohere rerank model to use")
+    return_documents: bool = Field(default=False, description="Whether to return the document texts in the response")
+    max_chunks_per_doc: Optional[int] = Field(default=None, description="Maximum number of chunks per document")
--- a/configs/rerankers/config.py
+++ b/configs/rerankers/config.py
@@ -0,0 +1,12 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class RerankerConfig(BaseModel):
+    """Configuration for rerankers."""
+
+    provider: str = Field(description="Reranker provider (e.g., 'cohere', 'sentence_transformer')", default="cohere")
+    config: Optional[dict] = Field(description="Provider-specific reranker configuration", default=None)
+
+    model_config = {"extra": "forbid"}
--- a/configs/rerankers/huggingface.py
+++ b/configs/rerankers/huggingface.py
@@ -0,0 +1,17 @@
+from typing import Optional
+from pydantic import Field
+
+from mem0.configs.rerankers.base import BaseRerankerConfig
+
+
+class HuggingFaceRerankerConfig(BaseRerankerConfig):
+    """
+    Configuration class for HuggingFace reranker-specific parameters.
+    Inherits from BaseRerankerConfig and adds HuggingFace-specific settings.
+    """
+
+    model: Optional[str] = Field(default="BAAI/bge-reranker-base", description="The HuggingFace model to use for reranking")
+    device: Optional[str] = Field(default=None, description="Device to run the model on ('cpu', 'cuda', etc.)")
+    batch_size: int = Field(default=32, description="Batch size for processing documents")
+    max_length: int = Field(default=512, description="Maximum length for tokenization")
+    normalize: bool = Field(default=True, description="Whether to normalize scores")
--- a/configs/rerankers/llm.py
+++ b/configs/rerankers/llm.py
@@ -0,0 +1,48 @@
+from typing import Optional
+from pydantic import Field
+
+from mem0.configs.rerankers.base import BaseRerankerConfig
+
+
+class LLMRerankerConfig(BaseRerankerConfig):
+    """
+    Configuration for LLM-based reranker.
+    
+    Attributes:
+        model (str): LLM model to use for reranking. Defaults to "gpt-4o-mini".
+        api_key (str): API key for the LLM provider.
+        provider (str): LLM provider. Defaults to "openai".
+        top_k (int): Number of top documents to return after reranking.
+        temperature (float): Temperature for LLM generation. Defaults to 0.0 for deterministic scoring.
+        max_tokens (int): Maximum tokens for LLM response. Defaults to 100.
+        scoring_prompt (str): Custom prompt template for scoring documents.
+    """
+    
+    model: str = Field(
+        default="gpt-4o-mini",
+        description="LLM model to use for reranking"
+    )
+    api_key: Optional[str] = Field(
+        default=None,
+        description="API key for the LLM provider"
+    )
+    provider: str = Field(
+        default="openai",
+        description="LLM provider (openai, anthropic, etc.)"
+    )
+    top_k: Optional[int] = Field(
+        default=None,
+        description="Number of top documents to return after reranking"
+    )
+    temperature: float = Field(
+        default=0.0,
+        description="Temperature for LLM generation"
+    )
+    max_tokens: int = Field(
+        default=100,
+        description="Maximum tokens for LLM response"
+    )
+    scoring_prompt: Optional[str] = Field(
+        default=None,
+        description="Custom prompt template for scoring documents"
+    )
--- a/configs/rerankers/sentence_transformer.py
+++ b/configs/rerankers/sentence_transformer.py
@@ -0,0 +1,16 @@
+from typing import Optional
+from pydantic import Field
+
+from mem0.configs.rerankers.base import BaseRerankerConfig
+
+
+class SentenceTransformerRerankerConfig(BaseRerankerConfig):
+    """
+    Configuration class for Sentence Transformer reranker-specific parameters.
+    Inherits from BaseRerankerConfig and adds Sentence Transformer-specific settings.
+    """
+
+    model: Optional[str] = Field(default="cross-encoder/ms-marco-MiniLM-L-6-v2", description="The cross-encoder model name to use")
+    device: Optional[str] = Field(default=None, description="Device to run the model on ('cpu', 'cuda', etc.)")
+    batch_size: int = Field(default=32, description="Batch size for processing documents")
+    show_progress_bar: bool = Field(default=False, description="Whether to show progress bar during processing")
--- a/configs/rerankers/zero_entropy.py
+++ b/configs/rerankers/zero_entropy.py
@@ -0,0 +1,28 @@
+from typing import Optional
+from pydantic import Field
+
+from mem0.configs.rerankers.base import BaseRerankerConfig
+
+
+class ZeroEntropyRerankerConfig(BaseRerankerConfig):
+    """
+    Configuration for Zero Entropy reranker.
+    
+    Attributes:
+        model (str): Model to use for reranking. Defaults to "zerank-1".
+        api_key (str): Zero Entropy API key. If not provided, will try to read from ZERO_ENTROPY_API_KEY environment variable.
+        top_k (int): Number of top documents to return after reranking.
+    """
+    
+    model: str = Field(
+        default="zerank-1",
+        description="Model to use for reranking. Available models: zerank-1, zerank-1-small"
+    )
+    api_key: Optional[str] = Field(
+        default=None,
+        description="Zero Entropy API key"
+    )
+    top_k: Optional[int] = Field(
+        default=None,
+        description="Number of top documents to return after reranking"
+    )