File size: 6,251 Bytes
927854c 0747201 927854c 0747201 927854c 0747201 927854c 0747201 927854c 5d37f3d 927854c 5d37f3d 927854c 5d37f3d 927854c a9135e0 0747201 a9135e0 0747201 a9135e0 927854c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# ============================================================================= # Research AI Assistant API - Environment Configuration # ============================================================================= # Copy this content to a file named .env and fill in your actual values # Never commit .env to version control! # ============================================================================= # ZeroGPU Chat API Configuration (REQUIRED) # ============================================================================= # Base URL for your ZeroGPU Chat API endpoint (RunPod) # Format: http://your-pod-ip:8000 or https://your-domain.com # Example: http://bm9njt1ypzvuqw-8000.proxy.runpod.net ZEROGPU_BASE_URL=http://your-pod-ip:8000 # Email for authentication (register first via /register endpoint) [email protected] # Password for authentication ZEROGPU_PASSWORD=your_secure_password_here # ============================================================================= # Token Allocation Configuration # ============================================================================= # Maximum tokens dedicated for user input (prioritized over context) # Recommended: 32000 tokens for DeepSeek R1 (128K context window) USER_INPUT_MAX_TOKENS=32000 # Maximum tokens for context preparation (includes user input + context) # Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output) CONTEXT_PREPARATION_BUDGET=115000 # Context pruning threshold (should match context_preparation_budget) CONTEXT_PRUNING_THRESHOLD=115000 # Always prioritize user input over historical context PRIORITIZE_USER_INPUT=True # Model context window (actual limit for your deployed model) # Default: 8192 tokens (adjust based on your model) # This is the maximum total tokens (input + output) the model can handle # Common values: 4096, 8192, 16384, 32768, etc. ZEROGPU_MODEL_CONTEXT_WINDOW=8192 # ============================================================================= # Database Configuration # ============================================================================= # SQLite database path (default: sessions.db) # Use /tmp/ for Docker/containerized environments DB_PATH=sessions.db # FAISS index path for embeddings (default: embeddings.faiss) FAISS_INDEX_PATH=embeddings.faiss # ============================================================================= # Cache Configuration # ============================================================================= # HuggingFace cache directory (for any remaining model downloads) HF_HOME=~/.cache/huggingface TRANSFORMERS_CACHE=~/.cache/huggingface # HuggingFace token (optional - only needed if using gated models) HF_TOKEN= # Cache TTL in seconds (default: 3600 = 1 hour) CACHE_TTL=3600 # ============================================================================= # Session Configuration # ============================================================================= # Session timeout in seconds (default: 3600 = 1 hour) SESSION_TIMEOUT=3600 # Maximum session size in megabytes (default: 10 MB) MAX_SESSION_SIZE_MB=10 # ============================================================================= # Performance Configuration # ============================================================================= # Maximum worker threads for parallel processing (default: 4) MAX_WORKERS=4 # ============================================================================= # Mobile Optimization # ============================================================================= # Maximum tokens for mobile responses (default: 1200) # Increased from 800 to allow better responses on mobile MOBILE_MAX_TOKENS=1200 # Mobile request timeout in milliseconds (default: 15000) MOBILE_TIMEOUT=15000 # ============================================================================= # API Configuration # ============================================================================= # Flask/Gradio server port (default: 7860) GRADIO_PORT=7860 # Server host (default: 0.0.0.0 for all interfaces) GRADIO_HOST=0.0.0.0 # ============================================================================= # Logging Configuration # ============================================================================= # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO) LOG_LEVEL=INFO # Log format: json or text (default: json) LOG_FORMAT=json # Log directory (default: /tmp/logs) LOG_DIR=/tmp/logs # ============================================================================= # Context Configuration # ============================================================================= # Maximum context tokens (default: 4000) # Note: This is overridden by CONTEXT_PREPARATION_BUDGET if set MAX_CONTEXT_TOKENS=4000 # Cache TTL for context in seconds (default: 300 = 5 minutes) CACHE_TTL_SECONDS=300 # Maximum cache size (default: 100) MAX_CACHE_SIZE=100 # Enable parallel processing (default: True) PARALLEL_PROCESSING=True # Context decay factor (default: 0.8) CONTEXT_DECAY_FACTOR=0.8 # Maximum interactions to keep in context (default: 10) MAX_INTERACTIONS_TO_KEEP=10 # Enable metrics collection (default: True) ENABLE_METRICS=True # Enable context compression (default: True) COMPRESSION_ENABLED=True # Summarization threshold in tokens (default: 2000) SUMMARIZATION_THRESHOLD=2000 # ============================================================================= # Model Selection (for context operations - if still using local models) # ============================================================================= # These are optional and only used if local models are still needed # for context summarization or other operations CONTEXT_SUMMARIZATION_MODEL=Qwen/Qwen2.5-7B-Instruct CONTEXT_INTENT_MODEL=Qwen/Qwen2.5-7B-Instruct CONTEXT_SYNTHESIS_MODEL=Qwen/Qwen2.5-7B-Instruct # ============================================================================= # Security Notes # ============================================================================= # - Never commit .env file to version control # - Keep API keys secret and rotate them regularly # - Use environment variables in production (not .env files) # - Set proper file permissions: chmod 600 .env |