Spaces:

helperai
/

ai

Sleeping

App Files Files Community

helperai commited on 17 days ago

Commit

0d6bb64

verified ·

1 Parent(s): 5f83552

Create main.py

Browse files

Files changed (1) hide show

main.py +103 -0

main.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import time
+# --- MODEL CONSTANTS ---
+MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct"
+# CRITICAL: Force model to use CPU for the free tier
+DEVICE = "cpu"
+MAX_NEW_TOKENS = 512 # Limit output size for speed and cost control
+TORCH_DTYPE = torch.float32 # Use standard float for maximum CPU compatibility
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+# --- API Data Structure ---
+class CodeRequest(BaseModel):
+    """Defines the expected input structure from the front-end website."""
+    user_prompt: str # The user's request (e.g., "Fix the bug in this function")
+    code_context: str # The block of code the user provided
+# --- FastAPI App Setup ---
+# The app will run on port 7860 as defined in the Dockerfile
+app = FastAPI(title="CodeFlow AI Agent Backend - DeepSeek SLM")
+@app.on_event("startup")
+async def startup_event():
+    """Load the DeepSeek SLM Model and Tokenizer ONLY ONCE when the server starts."""
+    global model, tokenizer
+    print(f"--- Starting CodeFlow AI Agent (DeepSeek 1.3B) on {DEVICE} ---")
+    start_time = time.time()
+    try:
+        # Load the Tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+        # Load the Model
+        # Using device_map="cpu" is essential for the free tier.
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=TORCH_DTYPE,
+            device_map=DEVICE,
+            trust_remote_code=True
+        )
+        model.eval() # Set model to evaluation mode
+        print(f"DeepSeek Model loaded successfully in {time.time() - start_time:.2f} seconds.")
+    except Exception as e:
+        # If the model fails to load, log the error and prevent the API from functioning
+        print(f"ERROR: Failed to load DeepSeek model on CPU: {e}")
+        # Raising an exception here will cause the Docker container to fail, which is correct
+        # as a non-working model should not be deployed.
+        raise RuntimeError(f"Model failed to load on startup: {e}")
+# --- The API Endpoint ---
+@app.post("/fix_code")
+async def fix_code_endpoint(request: CodeRequest):
+    """
+    Accepts code context and task, processes it with DeepSeek-Coder, and returns the fix.
+    """
+    if model is None or tokenizer is None:
+        raise HTTPException(status_code=503, detail="AI Agent is still loading or failed to start.")
+    # --- CONSTRUCT AGENT PROMPT (DeepSeek Instruction Format) ---
+    # DeepSeek uses a specific format: ### Instruction: ... ### Response:
+    instruction = (
+        f"You are Arya's CodeBuddy, an elite Full-Stack Software Engineer. Your only job is to analyze "
+        f"the user's request and provide the complete, fixed, or generated code. You must ONLY output "
+        f"a single, complete, and corrected Markdown code block. Use a friendly and encouraging tone.\n\n"
+        f"TASK: {request.user_prompt}\n\n"
+        f"CODE_CONTEXT:\n{request.code_context}"
+    )
+    # Format the prompt correctly for the model
+    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
+    # Tokenize and send tensors to CPU
+    model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE)
+    try:
+        # --- GENERATE CODE (CPU Inference) ---
+        generated_ids = model.generate(
+            **model_inputs,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=False, # Deterministic output
+            temperature=0.1, # Low temperature for reliable coding
+        )
+        # Decode the output
+        response_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+        # Post-processing: Extract ONLY the text after the '### Response:' tag.
+        final_code_only = response_text.split("### Response:")[1].strip()
+        return {"fixed_code": final_code_only}
+    except Exception as e:
+        print(f"Generation error: {e}")
+        # Return a generic error to the user
+        raise HTTPException(status_code=500, detail="The DeepSeek CodeBuddy encountered a processing error.")