File size: 6,251 Bytes
927854c
 
 
 
 
 
 
0747201
927854c
0747201
 
 
 
927854c
0747201
 
927854c
0747201
 
927854c
 
 
 
 
5d37f3d
 
927854c
 
5d37f3d
 
927854c
 
5d37f3d
927854c
 
 
 
a9135e0
0747201
a9135e0
0747201
 
a9135e0
927854c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# =============================================================================
# Research AI Assistant API - Environment Configuration
# =============================================================================
# Copy this content to a file named .env and fill in your actual values
# Never commit .env to version control!

# =============================================================================
# ZeroGPU Chat API Configuration (REQUIRED)
# =============================================================================
# Base URL for your ZeroGPU Chat API endpoint (RunPod)
# Format: http://your-pod-ip:8000 or https://your-domain.com
# Example: http://bm9njt1ypzvuqw-8000.proxy.runpod.net
ZEROGPU_BASE_URL=http://your-pod-ip:8000

# Email for authentication (register first via /register endpoint)
[email protected]

# Password for authentication
ZEROGPU_PASSWORD=your_secure_password_here

# =============================================================================
# Token Allocation Configuration
# =============================================================================
# Maximum tokens dedicated for user input (prioritized over context)
# Recommended: 32000 tokens for DeepSeek R1 (128K context window)
USER_INPUT_MAX_TOKENS=32000

# Maximum tokens for context preparation (includes user input + context)
# Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output)
CONTEXT_PREPARATION_BUDGET=115000

# Context pruning threshold (should match context_preparation_budget)
CONTEXT_PRUNING_THRESHOLD=115000

# Always prioritize user input over historical context
PRIORITIZE_USER_INPUT=True

# Model context window (actual limit for your deployed model)
# Default: 8192 tokens (adjust based on your model)
# This is the maximum total tokens (input + output) the model can handle
# Common values: 4096, 8192, 16384, 32768, etc.
ZEROGPU_MODEL_CONTEXT_WINDOW=8192

# =============================================================================
# Database Configuration
# =============================================================================
# SQLite database path (default: sessions.db)
# Use /tmp/ for Docker/containerized environments
DB_PATH=sessions.db

# FAISS index path for embeddings (default: embeddings.faiss)
FAISS_INDEX_PATH=embeddings.faiss

# =============================================================================
# Cache Configuration
# =============================================================================
# HuggingFace cache directory (for any remaining model downloads)
HF_HOME=~/.cache/huggingface
TRANSFORMERS_CACHE=~/.cache/huggingface

# HuggingFace token (optional - only needed if using gated models)
HF_TOKEN=

# Cache TTL in seconds (default: 3600 = 1 hour)
CACHE_TTL=3600

# =============================================================================
# Session Configuration
# =============================================================================
# Session timeout in seconds (default: 3600 = 1 hour)
SESSION_TIMEOUT=3600

# Maximum session size in megabytes (default: 10 MB)
MAX_SESSION_SIZE_MB=10

# =============================================================================
# Performance Configuration
# =============================================================================
# Maximum worker threads for parallel processing (default: 4)
MAX_WORKERS=4

# =============================================================================
# Mobile Optimization
# =============================================================================
# Maximum tokens for mobile responses (default: 1200)
# Increased from 800 to allow better responses on mobile
MOBILE_MAX_TOKENS=1200

# Mobile request timeout in milliseconds (default: 15000)
MOBILE_TIMEOUT=15000

# =============================================================================
# API Configuration
# =============================================================================
# Flask/Gradio server port (default: 7860)
GRADIO_PORT=7860

# Server host (default: 0.0.0.0 for all interfaces)
GRADIO_HOST=0.0.0.0

# =============================================================================
# Logging Configuration
# =============================================================================
# Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO)
LOG_LEVEL=INFO

# Log format: json or text (default: json)
LOG_FORMAT=json

# Log directory (default: /tmp/logs)
LOG_DIR=/tmp/logs

# =============================================================================
# Context Configuration
# =============================================================================
# Maximum context tokens (default: 4000)
# Note: This is overridden by CONTEXT_PREPARATION_BUDGET if set
MAX_CONTEXT_TOKENS=4000

# Cache TTL for context in seconds (default: 300 = 5 minutes)
CACHE_TTL_SECONDS=300

# Maximum cache size (default: 100)
MAX_CACHE_SIZE=100

# Enable parallel processing (default: True)
PARALLEL_PROCESSING=True

# Context decay factor (default: 0.8)
CONTEXT_DECAY_FACTOR=0.8

# Maximum interactions to keep in context (default: 10)
MAX_INTERACTIONS_TO_KEEP=10

# Enable metrics collection (default: True)
ENABLE_METRICS=True

# Enable context compression (default: True)
COMPRESSION_ENABLED=True

# Summarization threshold in tokens (default: 2000)
SUMMARIZATION_THRESHOLD=2000

# =============================================================================
# Model Selection (for context operations - if still using local models)
# =============================================================================
# These are optional and only used if local models are still needed
# for context summarization or other operations
CONTEXT_SUMMARIZATION_MODEL=Qwen/Qwen2.5-7B-Instruct
CONTEXT_INTENT_MODEL=Qwen/Qwen2.5-7B-Instruct
CONTEXT_SYNTHESIS_MODEL=Qwen/Qwen2.5-7B-Instruct

# =============================================================================
# Security Notes
# =============================================================================
# - Never commit .env file to version control
# - Keep API keys secret and rotate them regularly
# - Use environment variables in production (not .env files)
# - Set proper file permissions: chmod 600 .env