File size: 2,927 Bytes
8f4d405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e3f60e
a4d66c0
 
 
 
 
8f4d405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# config.py
import os
from pydantic_settings import BaseSettings

class Settings(BaseSettings):
    # HF Spaces specific settings
    hf_token: str = os.getenv("HF_TOKEN", "")
    hf_cache_dir: str = os.getenv("HF_HOME", "/tmp/huggingface")
    
    # Model settings
    default_model: str = "mistralai/Mistral-7B-Instruct-v0.2"
    embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
    classification_model: str = "cardiffnlp/twitter-roberta-base-emotion"
    
    # Performance settings
    max_workers: int = int(os.getenv("MAX_WORKERS", "4"))
    cache_ttl: int = int(os.getenv("CACHE_TTL", "3600"))
    
    # Database settings
    # Use /tmp for writable location in Docker containers
    # Check if we're in Docker (HF Spaces) - if so, use /tmp
    _default_db_path = "/tmp/sessions.db" if os.path.exists("/.dockerenv") or os.path.exists("/tmp") else "sessions.db"
    db_path: str = os.getenv("DB_PATH", _default_db_path)
    _default_faiss_path = "/tmp/embeddings.faiss" if os.path.exists("/.dockerenv") or os.path.exists("/tmp") else "embeddings.faiss"
    faiss_index_path: str = os.getenv("FAISS_INDEX_PATH", _default_faiss_path)
    
    # Session settings
    session_timeout: int = int(os.getenv("SESSION_TIMEOUT", "3600"))
    max_session_size_mb: int = int(os.getenv("MAX_SESSION_SIZE_MB", "10"))
    
    # Mobile optimization settings
    mobile_max_tokens: int = int(os.getenv("MOBILE_MAX_TOKENS", "800"))
    mobile_timeout: int = int(os.getenv("MOBILE_TIMEOUT", "15000"))
    
    # Gradio settings
    gradio_port: int = int(os.getenv("GRADIO_PORT", "7860"))
    gradio_host: str = os.getenv("GRADIO_HOST", "0.0.0.0")
    
    # Logging settings
    log_level: str = os.getenv("LOG_LEVEL", "INFO")
    log_format: str = os.getenv("LOG_FORMAT", "json")
    
    class Config:
        env_file = ".env"

settings = Settings()

# Context configuration
CONTEXT_CONFIG = {
    'max_context_tokens': int(os.getenv("MAX_CONTEXT_TOKENS", "4000")),
    'cache_ttl_seconds': int(os.getenv("CACHE_TTL_SECONDS", "300")),
    'max_cache_size': int(os.getenv("MAX_CACHE_SIZE", "100")),
    'parallel_processing': os.getenv("PARALLEL_PROCESSING", "True").lower() == "true",
    'context_decay_factor': float(os.getenv("CONTEXT_DECAY_FACTOR", "0.8")),
    'max_interactions_to_keep': int(os.getenv("MAX_INTERACTIONS_TO_KEEP", "10")),
    'enable_metrics': os.getenv("ENABLE_METRICS", "True").lower() == "true",
    'compression_enabled': os.getenv("COMPRESSION_ENABLED", "True").lower() == "true",
    'summarization_threshold': int(os.getenv("SUMMARIZATION_THRESHOLD", "2000"))  # tokens
}

# Model selection for context operations
CONTEXT_MODELS = {
    'summarization': os.getenv("CONTEXT_SUMMARIZATION_MODEL", "Qwen/Qwen2.5-7B-Instruct"),
    'intent': os.getenv("CONTEXT_INTENT_MODEL", "Qwen/Qwen2.5-7B-Instruct"),
    'synthesis': os.getenv("CONTEXT_SYNTHESIS_MODEL", "Qwen/Qwen2.5-72B-Instruct")
}