Mandark-droid commited on
Commit
a13e7fa
Β·
1 Parent(s): 34039c9

Changed model to v5.1 fp16

Browse files
enhanced_model_handler.py CHANGED
@@ -60,7 +60,7 @@ except Exception as e:
60
  # Global model and tokenizer variables
61
  model = None
62
  tokenizer = None
63
- MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1"
64
 
65
  # Inference configurations
66
  INFERENCE_CONFIGS = {
@@ -208,9 +208,9 @@ def load_model() -> Tuple[Optional[Any], Optional[Any]]:
208
  #"cache_dir": "./model_cache"
209
  }
210
 
211
- # Add quantization if available
212
- if quantization_config:
213
- model_kwargs["quantization_config"] = quantization_config
214
 
215
  # Try to use flash attention if available
216
  try:
 
60
  # Global model and tokenizer variables
61
  model = None
62
  tokenizer = None
63
+ MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool" #"kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v3"
64
 
65
  # Inference configurations
66
  INFERENCE_CONFIGS = {
 
208
  #"cache_dir": "./model_cache"
209
  }
210
 
211
+ # # Add quantization if available
212
+ # if quantization_config:
213
+ # model_kwargs["quantization_config"] = quantization_config
214
 
215
  # Try to use flash attention if available
216
  try:
entrypoint.sh CHANGED
@@ -16,7 +16,7 @@ done
16
  echo "🟒 Ollama is live!"
17
 
18
  # Pull your lightweight model
19
- MODEL_NAME="hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"
20
  echo "πŸ”½ Pulling model: $MODEL_NAME"
21
  /app/ollama pull "$MODEL_NAME" || {
22
  echo "❌ Failed to pull model. Check name and internet."
 
16
  echo "🟒 Ollama is live!"
17
 
18
  # Pull your lightweight model
19
+ MODEL_NAME="hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16"
20
  echo "πŸ”½ Pulling model: $MODEL_NAME"
21
  /app/ollama pull "$MODEL_NAME" || {
22
  echo "❌ Failed to pull model. Check name and internet."
model_handler_ollama.py CHANGED
@@ -6,7 +6,7 @@ from typing import Dict, Any, Optional, List
6
 
7
  # Ollama configuration
8
  OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
9
- MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Replace with your actual model name in Ollama
10
 
11
  # Inference configurations
12
  INFERENCE_CONFIGS = {
@@ -459,6 +459,6 @@ Available tools:
459
 
460
  if __name__ == "__main__":
461
  # Update MODEL_NAME to match your model in Ollama
462
- MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Change this!
463
 
464
  example_usage()
 
6
 
7
  # Ollama configuration
8
  OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
9
+ MODEL_NAME = "hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16" # Replace with your actual model name in Ollama
10
 
11
  # Inference configurations
12
  INFERENCE_CONFIGS = {
 
459
 
460
  if __name__ == "__main__":
461
  # Update MODEL_NAME to match your model in Ollama
462
+ MODEL_NAME = "hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16" # Change this!
463
 
464
  example_usage()
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ