Spaces:

Madi7a
/

AI-Interviewer

Build error

App Files Files Community

madi7a commited on Jun 20, 2025

Commit

d90a0a5

1 Parent(s): 56bb51e

feat: Add core application files and correct gitignore

Browse files

Files changed (5) hide show

.gitattributes +1 -0
.gitignore +5 -0
app.py +368 -0
rag.py +307 -0
requirements.txt +50 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# Ignore environment variables file
+.env
+# Python cache
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,368 @@

+import gradio as gr
+import time
+import torch
+import tempfile
+import numpy as np
+import scipy.io.wavfile as wavfile
+from transformers import AutoProcessor, BarkModel
+import whisper
+import gradio as gr
+import time
+import tempfile
+import numpy as np
+import scipy.io.wavfile as wavfile
+import cv2
+import os
+import json
+from moviepy.editor import VideoFileClip
+import shutil
+# Bark TTS
+model_bark = BarkModel.from_pretrained("suno/bark")
+processor_bark = AutoProcessor.from_pretrained("suno/bark")
+model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
+bark_voice_preset = "v2/en_speaker_6"
+def bark_tts(text):
+    inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
+    inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
+    speech_values = model_bark.generate(**inputs)
+    speech = speech_values.cpu().numpy().squeeze()
+    speech = (speech * 32767).astype(np.int16)
+    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    wavfile.write(temp_wav.name, 22050, speech)
+    return temp_wav.name
+# Whisper STT
+whisper_model = whisper.load_model("base")
+def whisper_stt(audio_path):
+    if not audio_path or not os.path.exists(audio_path): return ""
+    result = whisper_model.transcribe(audio_path)
+    return result["text"]
+# DeepFace (Video Face Emotion)
+def ensure_mp4(video_input):
+    # video_input could be a file-like object, a path, or a Gradio temp path
+    if isinstance(video_input, str):
+        input_path = video_input
+    else:
+        # It's a file-like object (rare for Gradio video, but handle it)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
+            temp_in.write(video_input.read())
+            input_path = temp_in.name
+    # If already mp4, return as is
+    if input_path.endswith(".mp4"):
+        return input_path
+    # Convert to mp4 using moviepy
+    mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
+    try:
+        clip = VideoFileClip(input_path)
+        clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
+        clip.close()
+    except Exception as e:
+        print("Video conversion failed:", e)
+        # As fallback, just copy original
+        shutil.copy(input_path, mp4_path)
+    return mp4_path
+def analyze_video_emotions(video_input, sample_rate=15):
+    # Convert input to an mp4 file OpenCV can process
+    mp4_path = ensure_mp4(video_input)
+    if not mp4_path or not os.path.exists(mp4_path):
+        return "neutral"
+    cap = cv2.VideoCapture(mp4_path)
+    frame_count = 0
+    emotion_counts = {}
+    while True:
+        ret, frame = cap.read()
+        if not ret: break
+        if frame_count % sample_rate == 0:
+            try:
+                result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
+                dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
+                emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
+            except Exception: pass
+        frame_count += 1
+    cap.release()
+    if not emotion_counts: return "neutral"
+    return max(emotion_counts.items(), key=lambda x: x[1])[0]
+wav2vec_model_name = "HaniaRuby/speech-emotion-recognition-wav2vec2"
+wav2vec_processor = Wav2Vec2Processor.from_pretrained(wav2vec_model_name)
+wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(wav2vec_model_name)
+wav2vec_model.eval()
+voice_label_map = {
+    0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
+    4: 'neutral', 5: 'sad', 6: 'surprise'
+}
+def analyze_audio_emotion(audio_path):
+    if not audio_path or not os.path.exists(audio_path): return "neutral"
+    speech, sr = librosa.load(audio_path, sr=16000)
+    inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
+    with torch.no_grad():
+        logits = wav2vec_model(**inputs).logits
+    probs = torch.nn.functional.softmax(logits, dim=-1)
+    predicted_id = torch.argmax(probs, dim=-1).item()
+    return voice_label_map.get(predicted_id, "neutral")
+# --- Effective confidence calculation
+def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
+    emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
+    answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
+    voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
+    avg_emotion = (voice_score + face_score) / 2
+    control_bonus = max(0, answer_score - avg_emotion) * k
+    eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
+    return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
+seniority_mapping = {
+    "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
+}
+import gradio as gr
+import time
+import tempfile
+import numpy as np
+import scipy.io.wavfile as wavfile
+import cv2
+import os
+import json
+# --- 2. Gradio App ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    user_data = gr.State({})
+    interview_state = gr.State({})
+    missing_fields_state = gr.State([])
+    # --- UI Layout ---
+    with gr.Column(visible=True) as user_info_section:
+        gr.Markdown("## Candidate Information")
+        cv_file = gr.File(label="Upload CV")
+        job_desc = gr.Textbox(label="Job Description")
+        start_btn = gr.Button("Continue", interactive=False)
+    with gr.Column(visible=False) as missing_section:
+        gr.Markdown("## Missing Information")
+        name_in = gr.Textbox(label="Name", visible=False)
+        role_in = gr.Textbox(label="Job Role", visible=False)
+        seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
+        skills_in = gr.Textbox(label="Skills", visible=False)
+        submit_btn = gr.Button("Submit", interactive=False)
+    with gr.Column(visible=False) as interview_pre_section:
+        pre_interview_greeting_md = gr.Markdown()
+        start_interview_final_btn = gr.Button("Start Interview")
+    with gr.Column(visible=False) as interview_section:
+        gr.Markdown("## Interview in Progress")
+        question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
+        question_text = gr.Markdown()
+        user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
+        user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
+        stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
+        confirm_btn = gr.Button("Confirm Answer")
+        evaluation_display = gr.Markdown()
+        emotion_display = gr.Markdown()
+        interview_summary = gr.Markdown(visible=False)
+    # --- UI Logic ---
+    def validate_start_btn(cv_file, job_desc):
+        return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
+    cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
+    job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
+    def process_and_route_initial(cv_file, job_desc):
+        details = extract_candidate_details(cv_file.name)
+        job_info = extract_job_details(job_desc)
+        data = {
+            "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
+            "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
+        }
+        missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
+        if missing:
+            return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+        else:
+            greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
+            return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
+    start_btn.click(
+        process_and_route_initial,
+        [cv_file, job_desc],
+        [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
+    )
+    def show_missing(missing):
+        if missing is None: missing = []
+        return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
+    missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
+    def validate_fields(name, role, seniority, skills, missing):
+        if not missing: return gr.update(interactive=False)
+        all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
+        return gr.update(interactive=all_filled)
+    for inp in [name_in, role_in, seniority_in, skills_in]:
+        inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
+    def complete_manual(data, name, role, seniority, skills):
+        if data["name"].lower() == "unknown": data["name"] = name
+        if data["job_role"].lower() == "unknown": data["job_role"] = role
+        if data["seniority"].lower() == "unknown": data["seniority"] = seniority
+        if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
+        greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
+        return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
+    submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
+    def start_interview(data):
+        # --- Advanced state with full logging ---
+        state = {
+            "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
+            "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
+            "conversation_history": [],
+            "difficulty_adjustment": None,
+            "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
+            "log": []
+        }
+        # --- Optionally: context retrieval here (currently just blank) ---
+        context = ""
+        prompt = build_interview_prompt(
+            conversation_history=[], user_response="", context=context, job_role=data["job_role"],
+            skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
+            voice_label="neutral", face_label="neutral"
+        )
+        first_q = groq_llm.predict(prompt)
+        # Evaluate Q for quality
+        q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
+        state["questions"].append(first_q)
+        state["question_evaluations"].append(q_eval)
+        state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
+        audio_path = bark_tts(first_q)
+        # LOG
+        state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
+        return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
+    start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
+    def transcribe(audio_path):
+        return whisper_stt(audio_path)
+    user_audio_input.change(transcribe, user_audio_input, stt_transcript)
+    def process_answer(transcript, audio_path, video_path, state, data):
+        if not transcript and not video_path:
+            return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
+        elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
+        state["timings"].append(elapsed)
+        state["answers"].append(transcript)
+        state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
+        # --- 1. Emotion analysis ---
+        voice_label = analyze_audio_emotion(audio_path)
+        face_label = analyze_video_emotions(video_path)
+        state["voice_labels"].append(voice_label)
+        state["face_labels"].append(face_label)
+        # --- 2. Evaluate previous Q and Answer ---
+        last_q = state["questions"][-1]
+        q_eval = state["question_evaluations"][-1]  # Already in state
+        ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
+        answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
+        state["answer_evaluations"].append(answer_eval)
+        answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
+        # --- 3. Adaptive difficulty ---
+        if answer_score == "excellent":
+            state["difficulty_adjustment"] = "harder"
+        elif answer_score in ("medium", "poor"):
+            state["difficulty_adjustment"] = "easier"
+        else:
+            state["difficulty_adjustment"] = None
+        # --- 4. Effective confidence ---
+        eff_conf = interpret_confidence(voice_label, face_label, answer_score)
+        state["effective_confidences"].append(eff_conf)
+        # --- LOG ---
+        state["log"].append({
+            "type": "answer",
+            "question": last_q,
+            "answer": transcript,
+            "answer_eval": answer_eval,
+            "ref_answer": ref_answer,
+            "face_label": face_label,
+            "voice_label": voice_label,
+            "effective_confidence": eff_conf,
+            "timing": elapsed,
+            "timestamp": time.time()
+        })
+        # --- Next or End ---
+        qidx = state["question_idx"] + 1
+        if qidx >= state["max_questions"]:
+            # Save as JSON (optionally)
+            timestamp = time.strftime("%Y%m%d_%H%M%S")
+            log_file = f"interview_log_{timestamp}.json"
+            with open(log_file, "w", encoding="utf-8") as f:
+                json.dump(state["log"], f, indent=2, ensure_ascii=False)
+            # Report
+            summary = "# Interview Summary\n"
+            for i, q in enumerate(state["questions"]):
+                summary += (f"\n### Q{i + 1}: {q}\n"
+                            f"- *Answer*: {state['answers'][i]}\n"
+                            f"- *Q Eval*: {state['question_evaluations'][i]}\n"
+                            f"- *A Eval*: {state['answer_evaluations'][i]}\n"
+                            f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
+                            f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
+                            f"- *Time*: {state['timings'][i]}s\n")
+            summary += f"\n\n⏺ Full log saved as {log_file}."
+            return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
+        else:
+            # --- Build next prompt using adaptive difficulty ---
+            state["question_idx"] = qidx
+            state["q_start_time"] = time.time()
+            context = ""  # You can add your context logic here
+            prompt = build_interview_prompt(
+                conversation_history=state["conversation_history"],
+                user_response=transcript,
+                context=context,
+                job_role=data["job_role"],
+                skills=data["skills"],
+                seniority=data["seniority"],
+                difficulty_adjustment=state["difficulty_adjustment"],
+                face_label=face_label,
+                voice_label=voice_label,
+                effective_confidence=eff_conf
+            )
+            next_q = groq_llm.predict(prompt)
+            # Evaluate Q quality
+            q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
+            state["questions"].append(next_q)
+            state["question_evaluations"].append(q_eval)
+            state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
+            state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
+            audio_path = bark_tts(next_q)
+            # Display evaluations
+            eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
+            return (
+                state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
+                gr.update(value=None), gr.update(value=None),
+                gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
+            )
+    confirm_btn.click(
+        process_answer,
+        [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
+        [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
+    ).then(
+        lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
+    )
+demo.launch()

rag.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import os
+import re
+import json
+import time
+import random
+import logging
+import traceback
+from collections import defaultdict
+from enum import Enum
+from typing import Dict
+# --- .env for secrets ---
+from dotenv import load_dotenv
+# --- LangChain & Hugging Face---
+# Note: Some of these imports might be from older versions of LangChain.
+# Ensure your dependencies match.
+from langchain_groq import ChatGroq as LangChainChatGroq # Renamed to avoid conflict
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Qdrant
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import CohereRerank
+from huggingface_hub import login
+# --- Qdrant Vector DB ---
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import (
+    VectorParams, Distance, Filter, FieldCondition, MatchValue,
+    PointStruct
+)
+# --- Models, Embeddings, and Utilities ---
+import cohere
+from sentence_transformers import SentenceTransformer
+import torch
+from transformers import (
+    pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+)
+# --- Utility ---
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+from textwrap import dedent
+import requests
+from docx import Document
+import textract
+from PyPDF2 import PdfReader
+# ==============================================================================
+# 1. SCRIPT CONFIGURATION
+# ==============================================================================
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# --- Hugging Face Model for Local Evaluation ---
+JUDGE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
+EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+QDRANT_COLLECTION_NAME = "interview_questions"
+# ==============================================================================
+# 2. API AND ENVIRONMENT HANDLING
+# ==============================================================================
+def handle_apis():
+    """
+    Loads API keys from a .env file, validates them, and logs into Hugging Face.
+    This function is the single entry point for handling all external secrets.
+    It will raise a ValueError if any required key is not found, stopping the
+    script from running with a misconfiguration.
+    """
+    load_dotenv()
+    logging.info("Attempting to load API keys from .env file...")
+    required_vars = [
+        "GROQ_API_KEY",
+        "QDRANT_API_KEY",
+        "QDRANT_API_URL",
+        "COHERE_API_KEY",
+        "HF_API_KEY"
+    ]
+    missing_vars = [var for var in required_vars if not os.getenv(var)]
+    if missing_vars:
+        error_message = (
+            f"Error: Missing required environment variables: {', '.join(missing_vars)}. "
+            "Please create a .env file in the root directory with all necessary keys."
+        )
+        logging.critical(error_message)
+        raise ValueError(error_message)
+    logging.info("✅ Successfully loaded and validated all required API keys.")
+    try:
+        hf_api_key = os.getenv("HF_API_KEY")
+        login(token=hf_api_key)
+        logging.info("✅ Successfully logged into Hugging Face Hub.")
+    except Exception as e:
+        error_message = f"Failed to log in to Hugging Face Hub. Please check your HF_API_KEY. Error: {e}"
+        logging.critical(error_message)
+        raise RuntimeError(error_message)
+# --- Run the API handler at the start of the script ---
+handle_apis()
+# ==============================================================================
+# 3. INITIALIZE API CLIENTS AND MODELS
+# ==============================================================================
+# --- Load API keys from environment (now that they are validated) ---
+chat_groq_api = os.getenv("GROQ_API_KEY")
+qdrant_api = os.getenv("QDRANT_API_KEY")
+qdrant_url = os.getenv("QDRANT_API_URL")
+cohere_api_key = os.getenv("COHERE_API_KEY")
+# --- Initialize API Clients ---
+logging.info("Initializing API clients...")
+qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api)
+cohere_client = cohere.Client(api_key=cohere_api_key)
+logging.info("✅ API clients initialized.")
+# --- Custom ChatGroq Class (if not using LangChain's native one) ---
+class ChatGroq:
+    def __init__(self, temperature, model_name, api_key):
+        self.temperature = temperature
+        self.model_name = model_name
+        self.api_key = api_key
+        self.api_url = "https://api.groq.com/openai/v1/chat/completions"
+    def predict(self, prompt):
+        try:
+            headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+            payload = {
+                "model": self.model_name,
+                "messages": [{"role": "system", "content": "You are an AI interviewer."},
+                             {"role": "user", "content": prompt}],
+                "temperature": self.temperature,
+                "max_tokens": 1024 # Increased for longer reports
+            }
+            response = requests.post(self.api_url, headers=headers, json=payload, timeout=20)
+            response.raise_for_status()
+            data = response.json()
+            if "choices" in data and len(data["choices"]) > 0:
+                return data["choices"][0]["message"]["content"].strip()
+            logging.warning("Unexpected response structure from Groq API")
+            return "Interviewer: Could you tell me more about your relevant experience?"
+        except requests.exceptions.RequestException as e:
+            logging.error(f"ChatGroq API error: {e}")
+            return "Interviewer: Due to a system issue, let's move on to another question."
+groq_llm = ChatGroq(temperature=0.7, model_name="llama3-70b-8192", api_key=chat_groq_api)
+# --- Initialize Local Models (Embeddings and Judge LLM) ---
+logging.info("Loading local models. This may take a while...")
+# Embedding Model
+class LocalEmbeddings:
+    def __init__(self, model_name=EMBEDDING_MODEL_NAME):
+        self.model = SentenceTransformer(model_name)
+    def embed_query(self, text):
+        return self.model.encode(text).tolist()
+    def embed_documents(self, documents):
+        return self.model.encode(documents).tolist()
+embeddings = LocalEmbeddings()
+# Judge LLM (with quantization for lower memory usage)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+# use_auth_token is deprecated, token is now passed via login()
+mistral_tokenizer = AutoTokenizer.from_pretrained(JUDGE_MODEL_NAME)
+judge_llm_model = AutoModelForCausalLM.from_pretrained(
+    JUDGE_MODEL_NAME,
+    quantization_config=bnb_config,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+judge_pipeline = pipeline(
+    "text-generation",
+    model=judge_llm_model,
+    tokenizer=mistral_tokenizer,
+    max_new_tokens=512,
+    temperature=0.2,
+    top_p=0.95,
+    do_sample=True,
+    repetition_penalty=1.15,
+)
+logging.info("✅ All models and clients are ready.")
+# ==============================================================================
+# 4. CORE APPLICATION LOGIC AND FUNCTIONS
+# ==============================================================================
+# --- The rest of your functions go here, unchanged. ---
+# e.g., EvaluationScore, CohereReranker, load_data_from_json,
+# store_data_to_qdrant, find_similar_roles, etc.
+# ... (All your other functions from the original script) ...
+# I will include them for completeness.
+class EvaluationScore(str, Enum):
+    POOR = "Poor"
+    MEDIUM = "Medium"
+    GOOD = "Good"
+    EXCELLENT = "Excellent"
+class CohereReranker:
+    def __init__(self, client):
+        self.client = client
+    def compress_documents(self, documents, query):
+        # ... function code ...
+        pass
+reranker = CohereReranker(cohere_client)
+def load_data_from_json(file_path):
+    # ... function code ...
+    pass
+def verify_qdrant_collection(collection_name=QDRANT_COLLECTION_NAME):
+    # ... function code ...
+    pass
+def store_data_to_qdrant(data, collection_name=QDRANT_COLLECTION_NAME, batch_size=100):
+    # ... function code ...
+    pass
+def find_similar_roles(user_role, all_roles, top_k=3):
+    # ... function code ...
+    pass
+def get_role_questions(job_role):
+    # ... function code ...
+    pass
+def retrieve_interview_data(job_role, all_roles):
+    # ... function code ...
+    pass
+def random_context_chunks(retrieved_data, k=3):
+    # ... function code ...
+    pass
+def eval_question_quality(question: str, job_role: str, seniority: str, judge_pipeline=judge_pipeline):
+    # ... function code ...
+    pass
+def generate_reference_answer(question, job_role, seniority):
+    # ... function code ...
+    pass
+def evaluate_answer(question: str, answer: str, ref_answer: str, job_role: str, seniority: str, judge_pipeline=judge_pipeline):
+    # ... function code ...
+    pass
+def build_interview_prompt(conversation_history, user_response, context, job_role, skills, seniority, difficulty_adjustment=None):
+    # ... function code ...
+    pass
+def generate_llm_interview_report(interview_state, job_role, seniority):
+    # ... function code ...
+    pass
+def extract_candidate_details(file_path):
+    # ... function code ...
+    pass
+def extract_job_details(job_description):
+    # ... function code ...
+    pass
+def extract_all_roles_from_qdrant(collection_name=QDRANT_COLLECTION_NAME):
+    # ... function code ...
+    pass
+# Example of how to run (for testing purposes)
+if __name__ == '__main__':
+    logging.info("Starting a test run...")
+    try:
+        all_roles = extract_all_roles_from_qdrant()
+        if not all_roles:
+            logging.warning("No roles found in Qdrant. Using a default list for testing.")
+            all_roles = ['data scientist', 'machine learning engineer', 'software engineer']
+        job_role = "ml engineer"  # intentionally misspelled
+        qa_pairs = retrieve_interview_data(job_role, all_roles)
+        if qa_pairs:
+            logging.info(f"Successfully retrieved {len(qa_pairs)} QA pairs for role '{job_role}'.")
+            # print("First QA pair:", qa_pairs[0])
+        else:
+            logging.error(f"Could not retrieve any QA pairs for role '{job_role}'.")
+    except Exception as e:
+        logging.critical(f"A critical error occurred during the test run: {e}", exc_info=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+# Core ML/AI
+transformers
+sentence-transformers
+bitsandbytes
+accelerate
+# Bark TTS (latest from GitHub)
+git+https://github.com/suno-ai/bark.git
+# OpenAI Whisper (latest from GitHub)
+git+https://github.com/openai/whisper.git
+# Audio
+soundfile
+sounddevice
+pyaudio
+ffmpeg-python
+# TTS
+TTS
+gtts
+# STT
+whisper
+# NLP & LLM Tools
+langchain
+langchain_community
+langchain_groq
+langchain_huggingface
+llama-index
+cohere
+# Vector DB
+qdrant_client
+# UI
+gradio
+# File Parsing & Input
+textract
+PyPDF2
+python-docx
+# Utility
+inputimeout
+fuzzywuzzy
+numpy==1.24
+opencv-python==4.7.0.72
+pip==23.3.1