first commit

This commit is contained in:
2026-03-06 21:11:10 +08:00
commit 927b8a6cac
144 changed files with 26301 additions and 0 deletions

View File

17
configs/rerankers/base.py Normal file
View File

@@ -0,0 +1,17 @@
from typing import Optional
from pydantic import BaseModel, Field
class BaseRerankerConfig(BaseModel):
"""
Base configuration for rerankers with only common parameters.
Provider-specific configurations should be handled by separate config classes.
This class contains only the parameters that are common across all reranker providers.
For provider-specific parameters, use the appropriate provider config class.
"""
provider: Optional[str] = Field(default=None, description="The reranker provider to use")
model: Optional[str] = Field(default=None, description="The reranker model to use")
api_key: Optional[str] = Field(default=None, description="The API key for the reranker service")
top_k: Optional[int] = Field(default=None, description="Maximum number of documents to return after reranking")

View File

@@ -0,0 +1,15 @@
from typing import Optional
from pydantic import Field
from mem0.configs.rerankers.base import BaseRerankerConfig
class CohereRerankerConfig(BaseRerankerConfig):
"""
Configuration class for Cohere reranker-specific parameters.
Inherits from BaseRerankerConfig and adds Cohere-specific settings.
"""
model: Optional[str] = Field(default="rerank-english-v3.0", description="The Cohere rerank model to use")
return_documents: bool = Field(default=False, description="Whether to return the document texts in the response")
max_chunks_per_doc: Optional[int] = Field(default=None, description="Maximum number of chunks per document")

View File

@@ -0,0 +1,12 @@
from typing import Optional
from pydantic import BaseModel, Field
class RerankerConfig(BaseModel):
"""Configuration for rerankers."""
provider: str = Field(description="Reranker provider (e.g., 'cohere', 'sentence_transformer')", default="cohere")
config: Optional[dict] = Field(description="Provider-specific reranker configuration", default=None)
model_config = {"extra": "forbid"}

View File

@@ -0,0 +1,17 @@
from typing import Optional
from pydantic import Field
from mem0.configs.rerankers.base import BaseRerankerConfig
class HuggingFaceRerankerConfig(BaseRerankerConfig):
"""
Configuration class for HuggingFace reranker-specific parameters.
Inherits from BaseRerankerConfig and adds HuggingFace-specific settings.
"""
model: Optional[str] = Field(default="BAAI/bge-reranker-base", description="The HuggingFace model to use for reranking")
device: Optional[str] = Field(default=None, description="Device to run the model on ('cpu', 'cuda', etc.)")
batch_size: int = Field(default=32, description="Batch size for processing documents")
max_length: int = Field(default=512, description="Maximum length for tokenization")
normalize: bool = Field(default=True, description="Whether to normalize scores")

48
configs/rerankers/llm.py Normal file
View File

@@ -0,0 +1,48 @@
from typing import Optional
from pydantic import Field
from mem0.configs.rerankers.base import BaseRerankerConfig
class LLMRerankerConfig(BaseRerankerConfig):
"""
Configuration for LLM-based reranker.
Attributes:
model (str): LLM model to use for reranking. Defaults to "gpt-4o-mini".
api_key (str): API key for the LLM provider.
provider (str): LLM provider. Defaults to "openai".
top_k (int): Number of top documents to return after reranking.
temperature (float): Temperature for LLM generation. Defaults to 0.0 for deterministic scoring.
max_tokens (int): Maximum tokens for LLM response. Defaults to 100.
scoring_prompt (str): Custom prompt template for scoring documents.
"""
model: str = Field(
default="gpt-4o-mini",
description="LLM model to use for reranking"
)
api_key: Optional[str] = Field(
default=None,
description="API key for the LLM provider"
)
provider: str = Field(
default="openai",
description="LLM provider (openai, anthropic, etc.)"
)
top_k: Optional[int] = Field(
default=None,
description="Number of top documents to return after reranking"
)
temperature: float = Field(
default=0.0,
description="Temperature for LLM generation"
)
max_tokens: int = Field(
default=100,
description="Maximum tokens for LLM response"
)
scoring_prompt: Optional[str] = Field(
default=None,
description="Custom prompt template for scoring documents"
)

View File

@@ -0,0 +1,16 @@
from typing import Optional
from pydantic import Field
from mem0.configs.rerankers.base import BaseRerankerConfig
class SentenceTransformerRerankerConfig(BaseRerankerConfig):
"""
Configuration class for Sentence Transformer reranker-specific parameters.
Inherits from BaseRerankerConfig and adds Sentence Transformer-specific settings.
"""
model: Optional[str] = Field(default="cross-encoder/ms-marco-MiniLM-L-6-v2", description="The cross-encoder model name to use")
device: Optional[str] = Field(default=None, description="Device to run the model on ('cpu', 'cuda', etc.)")
batch_size: int = Field(default=32, description="Batch size for processing documents")
show_progress_bar: bool = Field(default=False, description="Whether to show progress bar during processing")

View File

@@ -0,0 +1,28 @@
from typing import Optional
from pydantic import Field
from mem0.configs.rerankers.base import BaseRerankerConfig
class ZeroEntropyRerankerConfig(BaseRerankerConfig):
"""
Configuration for Zero Entropy reranker.
Attributes:
model (str): Model to use for reranking. Defaults to "zerank-1".
api_key (str): Zero Entropy API key. If not provided, will try to read from ZERO_ENTROPY_API_KEY environment variable.
top_k (int): Number of top documents to return after reranking.
"""
model: str = Field(
default="zerank-1",
description="Model to use for reranking. Available models: zerank-1, zerank-1-small"
)
api_key: Optional[str] = Field(
default=None,
description="Zero Entropy API key"
)
top_k: Optional[int] = Field(
default=None,
description="Number of top documents to return after reranking"
)