Spaces:
Build error
Build error
madi7a
commited on
Commit
·
d90a0a5
1
Parent(s):
56bb51e
feat: Add core application files and correct gitignore
Browse files- .gitattributes +1 -0
- .gitignore +5 -0
- app.py +368 -0
- rag.py +307 -0
- requirements.txt +50 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
|
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore environment variables file
|
| 2 |
+
.env
|
| 3 |
+
|
| 4 |
+
# Python cache
|
| 5 |
+
__pycache__/
|
app.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import time
|
| 3 |
+
import torch
|
| 4 |
+
import tempfile
|
| 5 |
+
import numpy as np
|
| 6 |
+
import scipy.io.wavfile as wavfile
|
| 7 |
+
|
| 8 |
+
from transformers import AutoProcessor, BarkModel
|
| 9 |
+
import whisper
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
import gradio as gr
|
| 14 |
+
import time
|
| 15 |
+
import tempfile
|
| 16 |
+
import numpy as np
|
| 17 |
+
import scipy.io.wavfile as wavfile
|
| 18 |
+
import cv2
|
| 19 |
+
import os
|
| 20 |
+
import json
|
| 21 |
+
from moviepy.editor import VideoFileClip
|
| 22 |
+
import shutil
|
| 23 |
+
|
| 24 |
+
# Bark TTS
|
| 25 |
+
model_bark = BarkModel.from_pretrained("suno/bark")
|
| 26 |
+
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
| 27 |
+
model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 28 |
+
bark_voice_preset = "v2/en_speaker_6"
|
| 29 |
+
|
| 30 |
+
def bark_tts(text):
|
| 31 |
+
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
| 32 |
+
inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
|
| 33 |
+
speech_values = model_bark.generate(**inputs)
|
| 34 |
+
speech = speech_values.cpu().numpy().squeeze()
|
| 35 |
+
speech = (speech * 32767).astype(np.int16)
|
| 36 |
+
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
| 37 |
+
wavfile.write(temp_wav.name, 22050, speech)
|
| 38 |
+
return temp_wav.name
|
| 39 |
+
|
| 40 |
+
# Whisper STT
|
| 41 |
+
whisper_model = whisper.load_model("base")
|
| 42 |
+
def whisper_stt(audio_path):
|
| 43 |
+
if not audio_path or not os.path.exists(audio_path): return ""
|
| 44 |
+
result = whisper_model.transcribe(audio_path)
|
| 45 |
+
return result["text"]
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# DeepFace (Video Face Emotion)
|
| 49 |
+
def ensure_mp4(video_input):
|
| 50 |
+
# video_input could be a file-like object, a path, or a Gradio temp path
|
| 51 |
+
if isinstance(video_input, str):
|
| 52 |
+
input_path = video_input
|
| 53 |
+
else:
|
| 54 |
+
# It's a file-like object (rare for Gradio video, but handle it)
|
| 55 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
|
| 56 |
+
temp_in.write(video_input.read())
|
| 57 |
+
input_path = temp_in.name
|
| 58 |
+
|
| 59 |
+
# If already mp4, return as is
|
| 60 |
+
if input_path.endswith(".mp4"):
|
| 61 |
+
return input_path
|
| 62 |
+
|
| 63 |
+
# Convert to mp4 using moviepy
|
| 64 |
+
mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
| 65 |
+
try:
|
| 66 |
+
clip = VideoFileClip(input_path)
|
| 67 |
+
clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
|
| 68 |
+
clip.close()
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print("Video conversion failed:", e)
|
| 71 |
+
# As fallback, just copy original
|
| 72 |
+
shutil.copy(input_path, mp4_path)
|
| 73 |
+
return mp4_path
|
| 74 |
+
|
| 75 |
+
def analyze_video_emotions(video_input, sample_rate=15):
|
| 76 |
+
# Convert input to an mp4 file OpenCV can process
|
| 77 |
+
mp4_path = ensure_mp4(video_input)
|
| 78 |
+
if not mp4_path or not os.path.exists(mp4_path):
|
| 79 |
+
return "neutral"
|
| 80 |
+
cap = cv2.VideoCapture(mp4_path)
|
| 81 |
+
frame_count = 0
|
| 82 |
+
emotion_counts = {}
|
| 83 |
+
while True:
|
| 84 |
+
ret, frame = cap.read()
|
| 85 |
+
if not ret: break
|
| 86 |
+
if frame_count % sample_rate == 0:
|
| 87 |
+
try:
|
| 88 |
+
result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
|
| 89 |
+
dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
|
| 90 |
+
emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
|
| 91 |
+
except Exception: pass
|
| 92 |
+
frame_count += 1
|
| 93 |
+
cap.release()
|
| 94 |
+
if not emotion_counts: return "neutral"
|
| 95 |
+
return max(emotion_counts.items(), key=lambda x: x[1])[0]
|
| 96 |
+
|
| 97 |
+
wav2vec_model_name = "HaniaRuby/speech-emotion-recognition-wav2vec2"
|
| 98 |
+
wav2vec_processor = Wav2Vec2Processor.from_pretrained(wav2vec_model_name)
|
| 99 |
+
wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(wav2vec_model_name)
|
| 100 |
+
wav2vec_model.eval()
|
| 101 |
+
voice_label_map = {
|
| 102 |
+
0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
|
| 103 |
+
4: 'neutral', 5: 'sad', 6: 'surprise'
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def analyze_audio_emotion(audio_path):
|
| 109 |
+
if not audio_path or not os.path.exists(audio_path): return "neutral"
|
| 110 |
+
speech, sr = librosa.load(audio_path, sr=16000)
|
| 111 |
+
inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
|
| 112 |
+
with torch.no_grad():
|
| 113 |
+
logits = wav2vec_model(**inputs).logits
|
| 114 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
| 115 |
+
predicted_id = torch.argmax(probs, dim=-1).item()
|
| 116 |
+
return voice_label_map.get(predicted_id, "neutral")
|
| 117 |
+
|
| 118 |
+
# --- Effective confidence calculation
|
| 119 |
+
def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
|
| 120 |
+
emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
|
| 121 |
+
answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
|
| 122 |
+
voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
|
| 123 |
+
avg_emotion = (voice_score + face_score) / 2
|
| 124 |
+
control_bonus = max(0, answer_score - avg_emotion) * k
|
| 125 |
+
eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
|
| 126 |
+
return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
|
| 127 |
+
|
| 128 |
+
seniority_mapping = {
|
| 129 |
+
"Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
|
| 130 |
+
}
|
| 131 |
+
import gradio as gr
|
| 132 |
+
import time
|
| 133 |
+
import tempfile
|
| 134 |
+
import numpy as np
|
| 135 |
+
import scipy.io.wavfile as wavfile
|
| 136 |
+
import cv2
|
| 137 |
+
import os
|
| 138 |
+
import json
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# --- 2. Gradio App ---
|
| 143 |
+
|
| 144 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 145 |
+
user_data = gr.State({})
|
| 146 |
+
interview_state = gr.State({})
|
| 147 |
+
missing_fields_state = gr.State([])
|
| 148 |
+
|
| 149 |
+
# --- UI Layout ---
|
| 150 |
+
with gr.Column(visible=True) as user_info_section:
|
| 151 |
+
gr.Markdown("## Candidate Information")
|
| 152 |
+
cv_file = gr.File(label="Upload CV")
|
| 153 |
+
job_desc = gr.Textbox(label="Job Description")
|
| 154 |
+
start_btn = gr.Button("Continue", interactive=False)
|
| 155 |
+
|
| 156 |
+
with gr.Column(visible=False) as missing_section:
|
| 157 |
+
gr.Markdown("## Missing Information")
|
| 158 |
+
name_in = gr.Textbox(label="Name", visible=False)
|
| 159 |
+
role_in = gr.Textbox(label="Job Role", visible=False)
|
| 160 |
+
seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
|
| 161 |
+
skills_in = gr.Textbox(label="Skills", visible=False)
|
| 162 |
+
submit_btn = gr.Button("Submit", interactive=False)
|
| 163 |
+
|
| 164 |
+
with gr.Column(visible=False) as interview_pre_section:
|
| 165 |
+
pre_interview_greeting_md = gr.Markdown()
|
| 166 |
+
start_interview_final_btn = gr.Button("Start Interview")
|
| 167 |
+
|
| 168 |
+
with gr.Column(visible=False) as interview_section:
|
| 169 |
+
gr.Markdown("## Interview in Progress")
|
| 170 |
+
question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
|
| 171 |
+
question_text = gr.Markdown()
|
| 172 |
+
user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
|
| 173 |
+
user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
|
| 174 |
+
stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
|
| 175 |
+
confirm_btn = gr.Button("Confirm Answer")
|
| 176 |
+
evaluation_display = gr.Markdown()
|
| 177 |
+
emotion_display = gr.Markdown()
|
| 178 |
+
interview_summary = gr.Markdown(visible=False)
|
| 179 |
+
|
| 180 |
+
# --- UI Logic ---
|
| 181 |
+
|
| 182 |
+
def validate_start_btn(cv_file, job_desc):
|
| 183 |
+
return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
| 184 |
+
cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
| 185 |
+
job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
| 186 |
+
|
| 187 |
+
def process_and_route_initial(cv_file, job_desc):
|
| 188 |
+
details = extract_candidate_details(cv_file.name)
|
| 189 |
+
job_info = extract_job_details(job_desc)
|
| 190 |
+
data = {
|
| 191 |
+
"name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
|
| 192 |
+
"seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
|
| 193 |
+
}
|
| 194 |
+
missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
|
| 195 |
+
if missing:
|
| 196 |
+
return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
| 197 |
+
else:
|
| 198 |
+
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
| 199 |
+
return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
| 200 |
+
start_btn.click(
|
| 201 |
+
process_and_route_initial,
|
| 202 |
+
[cv_file, job_desc],
|
| 203 |
+
[user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
def show_missing(missing):
|
| 207 |
+
if missing is None: missing = []
|
| 208 |
+
return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
|
| 209 |
+
missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
| 210 |
+
|
| 211 |
+
def validate_fields(name, role, seniority, skills, missing):
|
| 212 |
+
if not missing: return gr.update(interactive=False)
|
| 213 |
+
all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
|
| 214 |
+
return gr.update(interactive=all_filled)
|
| 215 |
+
for inp in [name_in, role_in, seniority_in, skills_in]:
|
| 216 |
+
inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
| 217 |
+
|
| 218 |
+
def complete_manual(data, name, role, seniority, skills):
|
| 219 |
+
if data["name"].lower() == "unknown": data["name"] = name
|
| 220 |
+
if data["job_role"].lower() == "unknown": data["job_role"] = role
|
| 221 |
+
if data["seniority"].lower() == "unknown": data["seniority"] = seniority
|
| 222 |
+
if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
| 223 |
+
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
| 224 |
+
return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
| 225 |
+
submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
| 226 |
+
|
| 227 |
+
def start_interview(data):
|
| 228 |
+
# --- Advanced state with full logging ---
|
| 229 |
+
state = {
|
| 230 |
+
"questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
|
| 231 |
+
"question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
|
| 232 |
+
"conversation_history": [],
|
| 233 |
+
"difficulty_adjustment": None,
|
| 234 |
+
"question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
|
| 235 |
+
"log": []
|
| 236 |
+
}
|
| 237 |
+
# --- Optionally: context retrieval here (currently just blank) ---
|
| 238 |
+
context = ""
|
| 239 |
+
prompt = build_interview_prompt(
|
| 240 |
+
conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
| 241 |
+
skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
|
| 242 |
+
voice_label="neutral", face_label="neutral"
|
| 243 |
+
)
|
| 244 |
+
first_q = groq_llm.predict(prompt)
|
| 245 |
+
# Evaluate Q for quality
|
| 246 |
+
q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
|
| 247 |
+
state["questions"].append(first_q)
|
| 248 |
+
state["question_evaluations"].append(q_eval)
|
| 249 |
+
state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
|
| 250 |
+
audio_path = bark_tts(first_q)
|
| 251 |
+
# LOG
|
| 252 |
+
state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
| 253 |
+
return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
|
| 254 |
+
start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
|
| 255 |
+
|
| 256 |
+
def transcribe(audio_path):
|
| 257 |
+
return whisper_stt(audio_path)
|
| 258 |
+
user_audio_input.change(transcribe, user_audio_input, stt_transcript)
|
| 259 |
+
|
| 260 |
+
def process_answer(transcript, audio_path, video_path, state, data):
|
| 261 |
+
if not transcript and not video_path:
|
| 262 |
+
return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
| 263 |
+
elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
|
| 264 |
+
state["timings"].append(elapsed)
|
| 265 |
+
state["answers"].append(transcript)
|
| 266 |
+
state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
|
| 267 |
+
|
| 268 |
+
# --- 1. Emotion analysis ---
|
| 269 |
+
voice_label = analyze_audio_emotion(audio_path)
|
| 270 |
+
face_label = analyze_video_emotions(video_path)
|
| 271 |
+
state["voice_labels"].append(voice_label)
|
| 272 |
+
state["face_labels"].append(face_label)
|
| 273 |
+
|
| 274 |
+
# --- 2. Evaluate previous Q and Answer ---
|
| 275 |
+
last_q = state["questions"][-1]
|
| 276 |
+
q_eval = state["question_evaluations"][-1] # Already in state
|
| 277 |
+
ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
|
| 278 |
+
answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
|
| 279 |
+
state["answer_evaluations"].append(answer_eval)
|
| 280 |
+
answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
|
| 281 |
+
|
| 282 |
+
# --- 3. Adaptive difficulty ---
|
| 283 |
+
if answer_score == "excellent":
|
| 284 |
+
state["difficulty_adjustment"] = "harder"
|
| 285 |
+
elif answer_score in ("medium", "poor"):
|
| 286 |
+
state["difficulty_adjustment"] = "easier"
|
| 287 |
+
else:
|
| 288 |
+
state["difficulty_adjustment"] = None
|
| 289 |
+
|
| 290 |
+
# --- 4. Effective confidence ---
|
| 291 |
+
eff_conf = interpret_confidence(voice_label, face_label, answer_score)
|
| 292 |
+
state["effective_confidences"].append(eff_conf)
|
| 293 |
+
|
| 294 |
+
# --- LOG ---
|
| 295 |
+
state["log"].append({
|
| 296 |
+
"type": "answer",
|
| 297 |
+
"question": last_q,
|
| 298 |
+
"answer": transcript,
|
| 299 |
+
"answer_eval": answer_eval,
|
| 300 |
+
"ref_answer": ref_answer,
|
| 301 |
+
"face_label": face_label,
|
| 302 |
+
"voice_label": voice_label,
|
| 303 |
+
"effective_confidence": eff_conf,
|
| 304 |
+
"timing": elapsed,
|
| 305 |
+
"timestamp": time.time()
|
| 306 |
+
})
|
| 307 |
+
|
| 308 |
+
# --- Next or End ---
|
| 309 |
+
qidx = state["question_idx"] + 1
|
| 310 |
+
if qidx >= state["max_questions"]:
|
| 311 |
+
# Save as JSON (optionally)
|
| 312 |
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
| 313 |
+
log_file = f"interview_log_{timestamp}.json"
|
| 314 |
+
with open(log_file, "w", encoding="utf-8") as f:
|
| 315 |
+
json.dump(state["log"], f, indent=2, ensure_ascii=False)
|
| 316 |
+
# Report
|
| 317 |
+
summary = "# Interview Summary\n"
|
| 318 |
+
for i, q in enumerate(state["questions"]):
|
| 319 |
+
summary += (f"\n### Q{i + 1}: {q}\n"
|
| 320 |
+
f"- *Answer*: {state['answers'][i]}\n"
|
| 321 |
+
f"- *Q Eval*: {state['question_evaluations'][i]}\n"
|
| 322 |
+
f"- *A Eval*: {state['answer_evaluations'][i]}\n"
|
| 323 |
+
f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
|
| 324 |
+
f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
|
| 325 |
+
f"- *Time*: {state['timings'][i]}s\n")
|
| 326 |
+
summary += f"\n\n⏺ Full log saved as {log_file}."
|
| 327 |
+
return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
|
| 328 |
+
else:
|
| 329 |
+
# --- Build next prompt using adaptive difficulty ---
|
| 330 |
+
state["question_idx"] = qidx
|
| 331 |
+
state["q_start_time"] = time.time()
|
| 332 |
+
context = "" # You can add your context logic here
|
| 333 |
+
prompt = build_interview_prompt(
|
| 334 |
+
conversation_history=state["conversation_history"],
|
| 335 |
+
user_response=transcript,
|
| 336 |
+
context=context,
|
| 337 |
+
job_role=data["job_role"],
|
| 338 |
+
skills=data["skills"],
|
| 339 |
+
seniority=data["seniority"],
|
| 340 |
+
difficulty_adjustment=state["difficulty_adjustment"],
|
| 341 |
+
face_label=face_label,
|
| 342 |
+
voice_label=voice_label,
|
| 343 |
+
effective_confidence=eff_conf
|
| 344 |
+
)
|
| 345 |
+
next_q = groq_llm.predict(prompt)
|
| 346 |
+
# Evaluate Q quality
|
| 347 |
+
q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
|
| 348 |
+
state["questions"].append(next_q)
|
| 349 |
+
state["question_evaluations"].append(q_eval)
|
| 350 |
+
state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
| 351 |
+
state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
| 352 |
+
audio_path = bark_tts(next_q)
|
| 353 |
+
# Display evaluations
|
| 354 |
+
eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
|
| 355 |
+
return (
|
| 356 |
+
state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
|
| 357 |
+
gr.update(value=None), gr.update(value=None),
|
| 358 |
+
gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
|
| 359 |
+
)
|
| 360 |
+
confirm_btn.click(
|
| 361 |
+
process_answer,
|
| 362 |
+
[stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
|
| 363 |
+
[interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
|
| 364 |
+
).then(
|
| 365 |
+
lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
demo.launch()
|
rag.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import json
|
| 4 |
+
import time
|
| 5 |
+
import random
|
| 6 |
+
import logging
|
| 7 |
+
import traceback
|
| 8 |
+
from collections import defaultdict
|
| 9 |
+
from enum import Enum
|
| 10 |
+
from typing import Dict
|
| 11 |
+
|
| 12 |
+
# --- .env for secrets ---
|
| 13 |
+
from dotenv import load_dotenv
|
| 14 |
+
|
| 15 |
+
# --- LangChain & Hugging Face---
|
| 16 |
+
# Note: Some of these imports might be from older versions of LangChain.
|
| 17 |
+
# Ensure your dependencies match.
|
| 18 |
+
from langchain_groq import ChatGroq as LangChainChatGroq # Renamed to avoid conflict
|
| 19 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 20 |
+
from langchain_community.vectorstores import Qdrant
|
| 21 |
+
from langchain.prompts import PromptTemplate
|
| 22 |
+
from langchain.chains import LLMChain
|
| 23 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
| 24 |
+
from langchain.retrievers.document_compressors import CohereRerank
|
| 25 |
+
from huggingface_hub import login
|
| 26 |
+
|
| 27 |
+
# --- Qdrant Vector DB ---
|
| 28 |
+
from qdrant_client import QdrantClient
|
| 29 |
+
from qdrant_client.http.models import (
|
| 30 |
+
VectorParams, Distance, Filter, FieldCondition, MatchValue,
|
| 31 |
+
PointStruct
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# --- Models, Embeddings, and Utilities ---
|
| 35 |
+
import cohere
|
| 36 |
+
from sentence_transformers import SentenceTransformer
|
| 37 |
+
import torch
|
| 38 |
+
from transformers import (
|
| 39 |
+
pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# --- Utility ---
|
| 43 |
+
import numpy as np
|
| 44 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 45 |
+
from textwrap import dedent
|
| 46 |
+
import requests
|
| 47 |
+
from docx import Document
|
| 48 |
+
import textract
|
| 49 |
+
from PyPDF2 import PdfReader
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# ==============================================================================
|
| 53 |
+
# 1. SCRIPT CONFIGURATION
|
| 54 |
+
# ==============================================================================
|
| 55 |
+
# Configure logging
|
| 56 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 57 |
+
|
| 58 |
+
# --- Hugging Face Model for Local Evaluation ---
|
| 59 |
+
JUDGE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 60 |
+
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
|
| 61 |
+
QDRANT_COLLECTION_NAME = "interview_questions"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# ==============================================================================
|
| 65 |
+
# 2. API AND ENVIRONMENT HANDLING
|
| 66 |
+
# ==============================================================================
|
| 67 |
+
def handle_apis():
|
| 68 |
+
"""
|
| 69 |
+
Loads API keys from a .env file, validates them, and logs into Hugging Face.
|
| 70 |
+
|
| 71 |
+
This function is the single entry point for handling all external secrets.
|
| 72 |
+
It will raise a ValueError if any required key is not found, stopping the
|
| 73 |
+
script from running with a misconfiguration.
|
| 74 |
+
"""
|
| 75 |
+
load_dotenv()
|
| 76 |
+
logging.info("Attempting to load API keys from .env file...")
|
| 77 |
+
|
| 78 |
+
required_vars = [
|
| 79 |
+
"GROQ_API_KEY",
|
| 80 |
+
"QDRANT_API_KEY",
|
| 81 |
+
"QDRANT_API_URL",
|
| 82 |
+
"COHERE_API_KEY",
|
| 83 |
+
"HF_API_KEY"
|
| 84 |
+
]
|
| 85 |
+
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
| 86 |
+
|
| 87 |
+
if missing_vars:
|
| 88 |
+
error_message = (
|
| 89 |
+
f"Error: Missing required environment variables: {', '.join(missing_vars)}. "
|
| 90 |
+
"Please create a .env file in the root directory with all necessary keys."
|
| 91 |
+
)
|
| 92 |
+
logging.critical(error_message)
|
| 93 |
+
raise ValueError(error_message)
|
| 94 |
+
|
| 95 |
+
logging.info("✅ Successfully loaded and validated all required API keys.")
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
hf_api_key = os.getenv("HF_API_KEY")
|
| 99 |
+
login(token=hf_api_key)
|
| 100 |
+
logging.info("✅ Successfully logged into Hugging Face Hub.")
|
| 101 |
+
except Exception as e:
|
| 102 |
+
error_message = f"Failed to log in to Hugging Face Hub. Please check your HF_API_KEY. Error: {e}"
|
| 103 |
+
logging.critical(error_message)
|
| 104 |
+
raise RuntimeError(error_message)
|
| 105 |
+
|
| 106 |
+
# --- Run the API handler at the start of the script ---
|
| 107 |
+
handle_apis()
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# ==============================================================================
|
| 111 |
+
# 3. INITIALIZE API CLIENTS AND MODELS
|
| 112 |
+
# ==============================================================================
|
| 113 |
+
# --- Load API keys from environment (now that they are validated) ---
|
| 114 |
+
chat_groq_api = os.getenv("GROQ_API_KEY")
|
| 115 |
+
qdrant_api = os.getenv("QDRANT_API_KEY")
|
| 116 |
+
qdrant_url = os.getenv("QDRANT_API_URL")
|
| 117 |
+
cohere_api_key = os.getenv("COHERE_API_KEY")
|
| 118 |
+
|
| 119 |
+
# --- Initialize API Clients ---
|
| 120 |
+
logging.info("Initializing API clients...")
|
| 121 |
+
qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api)
|
| 122 |
+
cohere_client = cohere.Client(api_key=cohere_api_key)
|
| 123 |
+
logging.info("✅ API clients initialized.")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# --- Custom ChatGroq Class (if not using LangChain's native one) ---
|
| 127 |
+
class ChatGroq:
|
| 128 |
+
def __init__(self, temperature, model_name, api_key):
|
| 129 |
+
self.temperature = temperature
|
| 130 |
+
self.model_name = model_name
|
| 131 |
+
self.api_key = api_key
|
| 132 |
+
self.api_url = "https://api.groq.com/openai/v1/chat/completions"
|
| 133 |
+
|
| 134 |
+
def predict(self, prompt):
|
| 135 |
+
try:
|
| 136 |
+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
| 137 |
+
payload = {
|
| 138 |
+
"model": self.model_name,
|
| 139 |
+
"messages": [{"role": "system", "content": "You are an AI interviewer."},
|
| 140 |
+
{"role": "user", "content": prompt}],
|
| 141 |
+
"temperature": self.temperature,
|
| 142 |
+
"max_tokens": 1024 # Increased for longer reports
|
| 143 |
+
}
|
| 144 |
+
response = requests.post(self.api_url, headers=headers, json=payload, timeout=20)
|
| 145 |
+
response.raise_for_status()
|
| 146 |
+
data = response.json()
|
| 147 |
+
if "choices" in data and len(data["choices"]) > 0:
|
| 148 |
+
return data["choices"][0]["message"]["content"].strip()
|
| 149 |
+
logging.warning("Unexpected response structure from Groq API")
|
| 150 |
+
return "Interviewer: Could you tell me more about your relevant experience?"
|
| 151 |
+
except requests.exceptions.RequestException as e:
|
| 152 |
+
logging.error(f"ChatGroq API error: {e}")
|
| 153 |
+
return "Interviewer: Due to a system issue, let's move on to another question."
|
| 154 |
+
|
| 155 |
+
groq_llm = ChatGroq(temperature=0.7, model_name="llama3-70b-8192", api_key=chat_groq_api)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# --- Initialize Local Models (Embeddings and Judge LLM) ---
|
| 159 |
+
logging.info("Loading local models. This may take a while...")
|
| 160 |
+
|
| 161 |
+
# Embedding Model
|
| 162 |
+
class LocalEmbeddings:
|
| 163 |
+
def __init__(self, model_name=EMBEDDING_MODEL_NAME):
|
| 164 |
+
self.model = SentenceTransformer(model_name)
|
| 165 |
+
def embed_query(self, text):
|
| 166 |
+
return self.model.encode(text).tolist()
|
| 167 |
+
def embed_documents(self, documents):
|
| 168 |
+
return self.model.encode(documents).tolist()
|
| 169 |
+
|
| 170 |
+
embeddings = LocalEmbeddings()
|
| 171 |
+
|
| 172 |
+
# Judge LLM (with quantization for lower memory usage)
|
| 173 |
+
bnb_config = BitsAndBytesConfig(
|
| 174 |
+
load_in_4bit=True,
|
| 175 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 176 |
+
bnb_4bit_use_double_quant=True,
|
| 177 |
+
bnb_4bit_quant_type="nf4"
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# use_auth_token is deprecated, token is now passed via login()
|
| 181 |
+
mistral_tokenizer = AutoTokenizer.from_pretrained(JUDGE_MODEL_NAME)
|
| 182 |
+
judge_llm_model = AutoModelForCausalLM.from_pretrained(
|
| 183 |
+
JUDGE_MODEL_NAME,
|
| 184 |
+
quantization_config=bnb_config,
|
| 185 |
+
torch_dtype=torch.float16,
|
| 186 |
+
device_map="auto"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
judge_pipeline = pipeline(
|
| 190 |
+
"text-generation",
|
| 191 |
+
model=judge_llm_model,
|
| 192 |
+
tokenizer=mistral_tokenizer,
|
| 193 |
+
max_new_tokens=512,
|
| 194 |
+
temperature=0.2,
|
| 195 |
+
top_p=0.95,
|
| 196 |
+
do_sample=True,
|
| 197 |
+
repetition_penalty=1.15,
|
| 198 |
+
)
|
| 199 |
+
logging.info("✅ All models and clients are ready.")
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# ==============================================================================
|
| 203 |
+
# 4. CORE APPLICATION LOGIC AND FUNCTIONS
|
| 204 |
+
# ==============================================================================
|
| 205 |
+
|
| 206 |
+
# --- The rest of your functions go here, unchanged. ---
|
| 207 |
+
# e.g., EvaluationScore, CohereReranker, load_data_from_json,
|
| 208 |
+
# store_data_to_qdrant, find_similar_roles, etc.
|
| 209 |
+
# ... (All your other functions from the original script) ...
|
| 210 |
+
# I will include them for completeness.
|
| 211 |
+
|
| 212 |
+
class EvaluationScore(str, Enum):
|
| 213 |
+
POOR = "Poor"
|
| 214 |
+
MEDIUM = "Medium"
|
| 215 |
+
GOOD = "Good"
|
| 216 |
+
EXCELLENT = "Excellent"
|
| 217 |
+
|
| 218 |
+
class CohereReranker:
|
| 219 |
+
def __init__(self, client):
|
| 220 |
+
self.client = client
|
| 221 |
+
def compress_documents(self, documents, query):
|
| 222 |
+
# ... function code ...
|
| 223 |
+
pass
|
| 224 |
+
|
| 225 |
+
reranker = CohereReranker(cohere_client)
|
| 226 |
+
|
| 227 |
+
def load_data_from_json(file_path):
|
| 228 |
+
# ... function code ...
|
| 229 |
+
pass
|
| 230 |
+
|
| 231 |
+
def verify_qdrant_collection(collection_name=QDRANT_COLLECTION_NAME):
|
| 232 |
+
# ... function code ...
|
| 233 |
+
pass
|
| 234 |
+
|
| 235 |
+
def store_data_to_qdrant(data, collection_name=QDRANT_COLLECTION_NAME, batch_size=100):
|
| 236 |
+
# ... function code ...
|
| 237 |
+
pass
|
| 238 |
+
|
| 239 |
+
def find_similar_roles(user_role, all_roles, top_k=3):
|
| 240 |
+
# ... function code ...
|
| 241 |
+
pass
|
| 242 |
+
|
| 243 |
+
def get_role_questions(job_role):
|
| 244 |
+
# ... function code ...
|
| 245 |
+
pass
|
| 246 |
+
|
| 247 |
+
def retrieve_interview_data(job_role, all_roles):
|
| 248 |
+
# ... function code ...
|
| 249 |
+
pass
|
| 250 |
+
|
| 251 |
+
def random_context_chunks(retrieved_data, k=3):
|
| 252 |
+
# ... function code ...
|
| 253 |
+
pass
|
| 254 |
+
|
| 255 |
+
def eval_question_quality(question: str, job_role: str, seniority: str, judge_pipeline=judge_pipeline):
|
| 256 |
+
# ... function code ...
|
| 257 |
+
pass
|
| 258 |
+
|
| 259 |
+
def generate_reference_answer(question, job_role, seniority):
|
| 260 |
+
# ... function code ...
|
| 261 |
+
pass
|
| 262 |
+
|
| 263 |
+
def evaluate_answer(question: str, answer: str, ref_answer: str, job_role: str, seniority: str, judge_pipeline=judge_pipeline):
|
| 264 |
+
# ... function code ...
|
| 265 |
+
pass
|
| 266 |
+
|
| 267 |
+
def build_interview_prompt(conversation_history, user_response, context, job_role, skills, seniority, difficulty_adjustment=None):
|
| 268 |
+
# ... function code ...
|
| 269 |
+
pass
|
| 270 |
+
|
| 271 |
+
def generate_llm_interview_report(interview_state, job_role, seniority):
|
| 272 |
+
# ... function code ...
|
| 273 |
+
pass
|
| 274 |
+
|
| 275 |
+
def extract_candidate_details(file_path):
|
| 276 |
+
# ... function code ...
|
| 277 |
+
pass
|
| 278 |
+
|
| 279 |
+
def extract_job_details(job_description):
|
| 280 |
+
# ... function code ...
|
| 281 |
+
pass
|
| 282 |
+
|
| 283 |
+
def extract_all_roles_from_qdrant(collection_name=QDRANT_COLLECTION_NAME):
|
| 284 |
+
# ... function code ...
|
| 285 |
+
pass
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
# Example of how to run (for testing purposes)
|
| 289 |
+
if __name__ == '__main__':
|
| 290 |
+
logging.info("Starting a test run...")
|
| 291 |
+
try:
|
| 292 |
+
all_roles = extract_all_roles_from_qdrant()
|
| 293 |
+
if not all_roles:
|
| 294 |
+
logging.warning("No roles found in Qdrant. Using a default list for testing.")
|
| 295 |
+
all_roles = ['data scientist', 'machine learning engineer', 'software engineer']
|
| 296 |
+
|
| 297 |
+
job_role = "ml engineer" # intentionally misspelled
|
| 298 |
+
qa_pairs = retrieve_interview_data(job_role, all_roles)
|
| 299 |
+
|
| 300 |
+
if qa_pairs:
|
| 301 |
+
logging.info(f"Successfully retrieved {len(qa_pairs)} QA pairs for role '{job_role}'.")
|
| 302 |
+
# print("First QA pair:", qa_pairs[0])
|
| 303 |
+
else:
|
| 304 |
+
logging.error(f"Could not retrieve any QA pairs for role '{job_role}'.")
|
| 305 |
+
|
| 306 |
+
except Exception as e:
|
| 307 |
+
logging.critical(f"A critical error occurred during the test run: {e}", exc_info=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core ML/AI
|
| 2 |
+
transformers
|
| 3 |
+
sentence-transformers
|
| 4 |
+
bitsandbytes
|
| 5 |
+
accelerate
|
| 6 |
+
|
| 7 |
+
# Bark TTS (latest from GitHub)
|
| 8 |
+
git+https://github.com/suno-ai/bark.git
|
| 9 |
+
|
| 10 |
+
# OpenAI Whisper (latest from GitHub)
|
| 11 |
+
git+https://github.com/openai/whisper.git
|
| 12 |
+
|
| 13 |
+
# Audio
|
| 14 |
+
soundfile
|
| 15 |
+
sounddevice
|
| 16 |
+
pyaudio
|
| 17 |
+
ffmpeg-python
|
| 18 |
+
|
| 19 |
+
# TTS
|
| 20 |
+
TTS
|
| 21 |
+
gtts
|
| 22 |
+
|
| 23 |
+
# STT
|
| 24 |
+
whisper
|
| 25 |
+
|
| 26 |
+
# NLP & LLM Tools
|
| 27 |
+
langchain
|
| 28 |
+
langchain_community
|
| 29 |
+
langchain_groq
|
| 30 |
+
langchain_huggingface
|
| 31 |
+
llama-index
|
| 32 |
+
cohere
|
| 33 |
+
|
| 34 |
+
# Vector DB
|
| 35 |
+
qdrant_client
|
| 36 |
+
|
| 37 |
+
# UI
|
| 38 |
+
gradio
|
| 39 |
+
|
| 40 |
+
# File Parsing & Input
|
| 41 |
+
textract
|
| 42 |
+
PyPDF2
|
| 43 |
+
python-docx
|
| 44 |
+
|
| 45 |
+
# Utility
|
| 46 |
+
inputimeout
|
| 47 |
+
fuzzywuzzy
|
| 48 |
+
numpy==1.24
|
| 49 |
+
opencv-python==4.7.0.72
|
| 50 |
+
pip==23.3.1
|