Mandark-droid
commited on
Commit
Β·
a13e7fa
1
Parent(s):
34039c9
Changed model to v5.1 fp16
Browse files- enhanced_model_handler.py +4 -4
- entrypoint.sh +1 -1
- model_handler_ollama.py +2 -2
- requirements.txt +0 -0
enhanced_model_handler.py
CHANGED
|
@@ -60,7 +60,7 @@ except Exception as e:
|
|
| 60 |
# Global model and tokenizer variables
|
| 61 |
model = None
|
| 62 |
tokenizer = None
|
| 63 |
-
MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-
|
| 64 |
|
| 65 |
# Inference configurations
|
| 66 |
INFERENCE_CONFIGS = {
|
|
@@ -208,9 +208,9 @@ def load_model() -> Tuple[Optional[Any], Optional[Any]]:
|
|
| 208 |
#"cache_dir": "./model_cache"
|
| 209 |
}
|
| 210 |
|
| 211 |
-
# Add quantization if available
|
| 212 |
-
if quantization_config:
|
| 213 |
-
|
| 214 |
|
| 215 |
# Try to use flash attention if available
|
| 216 |
try:
|
|
|
|
| 60 |
# Global model and tokenizer variables
|
| 61 |
model = None
|
| 62 |
tokenizer = None
|
| 63 |
+
MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool" #"kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v3"
|
| 64 |
|
| 65 |
# Inference configurations
|
| 66 |
INFERENCE_CONFIGS = {
|
|
|
|
| 208 |
#"cache_dir": "./model_cache"
|
| 209 |
}
|
| 210 |
|
| 211 |
+
# # Add quantization if available
|
| 212 |
+
# if quantization_config:
|
| 213 |
+
# model_kwargs["quantization_config"] = quantization_config
|
| 214 |
|
| 215 |
# Try to use flash attention if available
|
| 216 |
try:
|
entrypoint.sh
CHANGED
|
@@ -16,7 +16,7 @@ done
|
|
| 16 |
echo "π’ Ollama is live!"
|
| 17 |
|
| 18 |
# Pull your lightweight model
|
| 19 |
-
MODEL_NAME="hf.co/
|
| 20 |
echo "π½ Pulling model: $MODEL_NAME"
|
| 21 |
/app/ollama pull "$MODEL_NAME" || {
|
| 22 |
echo "β Failed to pull model. Check name and internet."
|
|
|
|
| 16 |
echo "π’ Ollama is live!"
|
| 17 |
|
| 18 |
# Pull your lightweight model
|
| 19 |
+
MODEL_NAME="hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16"
|
| 20 |
echo "π½ Pulling model: $MODEL_NAME"
|
| 21 |
/app/ollama pull "$MODEL_NAME" || {
|
| 22 |
echo "β Failed to pull model. Check name and internet."
|
model_handler_ollama.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import Dict, Any, Optional, List
|
|
| 6 |
|
| 7 |
# Ollama configuration
|
| 8 |
OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
|
| 9 |
-
MODEL_NAME = "hf.co/
|
| 10 |
|
| 11 |
# Inference configurations
|
| 12 |
INFERENCE_CONFIGS = {
|
|
@@ -459,6 +459,6 @@ Available tools:
|
|
| 459 |
|
| 460 |
if __name__ == "__main__":
|
| 461 |
# Update MODEL_NAME to match your model in Ollama
|
| 462 |
-
MODEL_NAME = "hf.co/
|
| 463 |
|
| 464 |
example_usage()
|
|
|
|
| 6 |
|
| 7 |
# Ollama configuration
|
| 8 |
OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
|
| 9 |
+
MODEL_NAME = "hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16" # Replace with your actual model name in Ollama
|
| 10 |
|
| 11 |
# Inference configurations
|
| 12 |
INFERENCE_CONFIGS = {
|
|
|
|
| 459 |
|
| 460 |
if __name__ == "__main__":
|
| 461 |
# Update MODEL_NAME to match your model in Ollama
|
| 462 |
+
MODEL_NAME = "hf.co/mradermacher/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v5.1-finetuned-GGUF:f16" # Change this!
|
| 463 |
|
| 464 |
example_usage()
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|