Spaces:

kshitijthakkar
/

loggenix-moe-0.3B-A0.1B-demo

Running

Mandark-droid commited on 18 days ago

Commit

a13e7fa

1 Parent(s): 34039c9

Changed model to v5.1 fp16

Files changed (4) hide show

enhanced_model_handler.py CHANGED Viewed

@@ -60,7 +60,7 @@ except Exception as e:
 # Global model and tokenizer variables
 model = None
 tokenizer = None
-MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1"
 # Inference configurations
 INFERENCE_CONFIGS = {
@@ -208,9 +208,9 @@ def load_model() -> Tuple[Optional[Any], Optional[Any]]:
                 #"cache_dir": "./model_cache"
             }
-            # Add quantization if available
-            if quantization_config:
-                model_kwargs["quantization_config"] = quantization_config
             # Try to use flash attention if available
             try:

 # Global model and tokenizer variables
 model = None
 tokenizer = None
+MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool" #"kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v3"
 # Inference configurations
 INFERENCE_CONFIGS = {
                 #"cache_dir": "./model_cache"
             }
+            # # Add quantization if available
+            # if quantization_config:
+            #     model_kwargs["quantization_config"] = quantization_config
             # Try to use flash attention if available
             try:

entrypoint.sh CHANGED Viewed

@@ -16,7 +16,7 @@ done
 echo "🟢 Ollama is live!"
 # Pull your lightweight model
-MODEL_NAME="hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"
 echo "🔽 Pulling model: $MODEL_NAME"
 /app/ollama pull "$MODEL_NAME" || {
   echo "❌ Failed to pull model. Check name and internet."

 echo "🟢 Ollama is live!"
 # Pull your lightweight model
+MODEL_NAME="hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16"
 echo "🔽 Pulling model: $MODEL_NAME"
 /app/ollama pull "$MODEL_NAME" || {
   echo "❌ Failed to pull model. Check name and internet."

model_handler_ollama.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Dict, Any, Optional, List
 # Ollama configuration
 OLLAMA_BASE_URL = "http://localhost:11434"  # Default Ollama URL
-MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"  # Replace with your actual model name in Ollama
 # Inference configurations
 INFERENCE_CONFIGS = {
@@ -459,6 +459,6 @@ Available tools:
 if __name__ == "__main__":
     # Update MODEL_NAME to match your model in Ollama
-    MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"  # Change this!
     example_usage()

 # Ollama configuration
 OLLAMA_BASE_URL = "http://localhost:11434"  # Default Ollama URL
+MODEL_NAME = "hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16"  # Replace with your actual model name in Ollama
 # Inference configurations
 INFERENCE_CONFIGS = {
 if __name__ == "__main__":
     # Update MODEL_NAME to match your model in Ollama
+    MODEL_NAME = "hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16"  # Change this!
     example_usage()

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ