"""
Enhanced AI-Powered Fraud Detection Chatbot
Clean version with improved context awareness and error handling
"""

import requests
import json
import os
import time
import re

class AIFraudChatbot:
    """Enhanced AI-powered chatbot for fraud detection system"""
    
    def __init__(self, api_key=None):
        # Clean API key
        if api_key:
            self.api_key = api_key.strip().strip('"').strip("'")
        else:
            env_key = os.environ.get("OPENROUTER_API_KEY", "")
            self.api_key = env_key.strip().strip('"').strip("'")
        
        self.base_url = "https://openrouter.ai/api/v1/chat/completions"
        self.model = "meta-llama/llama-3.2-3b-instruct:free"
        
        # Track rate limiting
        self.last_request_time = 0
        self.min_request_interval = 5  # seconds between API calls
        
        # Built-in responses for common questions (fallback when API unavailable)
        self.builtin_responses = {
            "fraud_detection": """**How Our Fraud Detection Works** 🔍

Our system uses a **Hybrid Quantum-Classical approach**:

**1. Classical Component (80% weight)**
- XGBoost Gradient Boosting model
- Analyzes 10 key features: transaction amount, time, location patterns, etc.
- Fast inference (~1ms per transaction)

**2. Quantum Ensemble (20% weight)**
- **VQC (40%)**: Variational Quantum Classifier for pattern recognition
- **QAOA (30%)**: Quantum optimization for feature selection
- **QNN (30%)**: Quantum Neural Network for non-linear patterns

**Why Hybrid?** The quantum component excels at detecting subtle patterns that classical models might miss, while the classical model provides reliable baseline accuracy.""",

            "quantum_advantage": """**Quantum vs Classical: The Advantage** ⚛️

**Classical Models (XGBoost)**:
✅ High accuracy on structured data
✅ Fast training and inference
✅ Well-understood behavior
❌ May miss subtle correlations

**Quantum Components**:
✅ Detects complex entangled patterns
✅ Superior in high-dimensional feature spaces
✅ Finds correlations invisible to classical ML
❌ Currently limited by qubit count

**Our Hybrid Approach**:
- Uses classical model as the primary detector (80%)
- Quantum ensemble refines predictions (20%)
- Best of both worlds: reliability + pattern detection

**Real Impact**: The quantum component improves edge-case detection by catching fraud patterns that classical models alone would miss.""",

            "precision_recall": """**Understanding Precision & Recall** 📊

**Precision** = True Positives / (True Positives + False Positives)
- "When we flag fraud, how often are we right?"
- High precision = fewer false alarms

**Recall** = True Positives / (True Positives + False Negatives)
- "Of all actual fraud, how much do we catch?"
- High recall = fewer missed fraud cases

**The Trade-off**:
- 🔼 Increase threshold → Higher precision, lower recall
- 🔽 Decrease threshold → Higher recall, lower precision

**Business Impact**:
- **Low Precision**: Customer frustration from blocked legitimate transactions
- **Low Recall**: Financial losses from undetected fraud

**Recommendation**: Balance based on your risk tolerance. Most businesses target ~85% recall with >70% precision.""",

            "threshold": """**Optimizing the Fraud Threshold** 🎯

The threshold (default: 0.5) determines when a transaction is flagged as fraud.

**When to Lower Threshold (e.g., 0.4)**:
- Recall is too low (missing fraud)
- High-value transactions at risk
- Can handle more manual reviews

**When to Raise Threshold (e.g., 0.6)**:
- Too many false positives
- Customer complaints about blocks
- Need higher precision

**Testing Approach**:
1. Start with historical data
2. Test thresholds: 0.4, 0.45, 0.5, 0.55, 0.6
3. Calculate precision/recall for each
4. Choose based on your cost model:
   - Cost of fraud vs cost of false positive

**Current System**: Using 0.5 threshold with 80/20 classical/quantum weighting.""",

            "vqc_qaoa_qnn": """**Quantum Model Components Explained** ⚛️

**VQC - Variational Quantum Classifier (40% weight)**
- Uses parameterized quantum circuits
- Learns optimal qubit rotations during training
- Best for: Binary classification tasks
- 4 qubits, multiple rotation layers

**QAOA - Quantum Approximate Optimization (30% weight)**
- Solves optimization problems
- Finds optimal feature combinations
- Best for: Feature selection, pattern optimization
- 2 layers of mixing and cost operators

**QNN - Quantum Neural Network (30% weight)**
- Deep quantum circuits with entanglement
- Captures non-linear relationships
- Best for: Complex pattern recognition
- 3-layer architecture with strong entanglement

**Why These Three?**
Each captures different aspects of fraud patterns:
- VQC: Direct classification
- QAOA: Optimal feature weighting
- QNN: Hidden correlations""",

            "improve_accuracy": """**Improving Model Accuracy** 📈

**1. Data Quality**
- Ensure balanced dataset (fraud vs non-fraud)
- Handle missing values properly
- Feature normalization is applied

**2. Threshold Tuning**
- Current: 0.5 (default)
- Adjust based on precision/recall needs
- Use ROC curve analysis

**3. Feature Engineering**
- Transaction velocity (txns per hour)
- Geographic anomalies
- Merchant category patterns
- Time-based features (hour, day of week)

**4. Model Ensemble**
- Classical handles bulk cases well
- Quantum catches edge cases
- Current 80/20 split is optimized

**5. Regular Retraining**
- Fraud patterns evolve
- Retrain monthly with new data
- Monitor drift metrics

**Quick Wins**:
- Check for data imbalance
- Verify feature scaling
- Test different threshold values""",

            "false_positives": """**Reducing False Positives** 🚫

False positives occur when legitimate transactions are flagged as fraud.

**Common Causes**:
1. Threshold too low
2. Unusual but legitimate behavior
3. New customer patterns
4. Geographic false flags

**Solutions**:

**1. Raise Threshold**
- Current: 0.5 → Try 0.55 or 0.6
- Trade-off: May miss some fraud

**2. Customer Profiling**
- Build normal behavior baselines
- Flag only significant deviations

**3. Velocity Checks**
- Multiple small txns vs one large
- Time-based patterns

**4. Merchant Categories**
- Trust established merchants
- Scrutinize high-risk categories

**5. Two-Stage Review**
- Score 0.5-0.7: Soft flag (monitor)
- Score >0.7: Hard flag (block)

**Business Impact**: Each false positive costs customer trust. Balance carefully.""",

            "how_it_works": """**System Architecture Overview** 🏗️

**Data Flow**:
1. Transaction arrives → Feature extraction
2. Features scaled using StandardScaler
3. Parallel processing:
   - Classical model (XGBoost) → 80% weight
   - Quantum ensemble (VQC+QAOA+QNN) → 20% weight
4. Scores combined → Final prediction
5. Threshold comparison → Fraud/Safe label

**Tech Stack**:
- **Backend**: FastAPI (Python)
- **ML**: XGBoost, PennyLane (Quantum)
- **Frontend**: Next.js, React
- **Database**: 1.2M+ transactions

**Real-time Processing**:
- Inference time: ~50ms per transaction
- Quantum simulation on CPU (production would use QPU)
- Handles batch and streaming modes

**Models Location**: `/models/` directory
- `classical_model.joblib`: XGBoost
- `vqc_weights.npy`: VQC parameters
- `qaoa_weights.npy`: QAOA parameters
- `qnn_weights.npy`: QNN parameters"""
        }
        
        # System prompt with comprehensive instructions
        self.system_prompt = """You are an expert AI assistant for a Hybrid Quantum-Classical Fraud Detection System.

RESPONSE GUIDELINES:
- Be professional, concise, and actionable
- Use specific data from the current system when provided
- Explain technical concepts clearly without excessive jargon
- Focus on practical recommendations and insights
- Keep responses under 300 words
- Never fabricate statistics or provide broken formulas

YOUR EXPERTISE:
- Fraud detection performance analysis
- Quantum-classical hybrid architecture explanation
- Threshold optimization and tuning
- Model performance troubleshooting
- Pattern recognition in financial transactions

SYSTEM ARCHITECTURE:
- Hybrid Model: 80% Classical XGBoost + 20% Quantum Ensemble
- Quantum Components: VQC (40%) + QAOA (30%) + QNN (30%)
- Features: 10 classical features, 4 quantum-optimized features
- Real-time processing with configurable thresholds

KEY METRICS INTERPRETATION:
- Precision = TP/(TP+FP) - Accuracy of fraud predictions
- Recall = TP/(TP+FN) - Coverage of actual fraud cases
- High Accuracy + Low Precision = Too many false positives
- Low Recall = Missing real fraud cases

COMMON ISSUES & SOLUTIONS:
- 0% Precision: All flagged transactions are false positives → Increase threshold
- 0% Recall: Missing actual fraud → Decrease threshold or improve features
- Imbalanced data: Use weighted metrics and proper sampling

Always provide specific, actionable recommendations based on the current system state."""
    
    def _get_system_context(self, history):
        """Generate detailed context from current transaction history"""
        if not history or len(history) == 0:
            return "System Status: Initialized and ready. No transactions processed yet."
        
        # Calculate comprehensive metrics
        total = len(history)
        true_labels = [t.get('is_fraud', 0) for t in history]
        predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history]
        
        # Confusion matrix
        tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1)
        fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1)
        fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0)
        tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0)
        
        # Performance metrics
        accuracy = (tp + tn) / total if total > 0 else 0
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        
        # Score analysis
        final_scores = [t.get('Final_Score', 0) for t in history]
        quantum_scores = [t.get('Quantum_Score', 0) for t in history]
        classical_scores = [t.get('Classical_Score', 0) for t in history]
        
        avg_final = sum(final_scores) / len(final_scores) if final_scores else 0
        avg_quantum = sum(quantum_scores) / len(quantum_scores) if quantum_scores else 0
        avg_classical = sum(classical_scores) / len(classical_scores) if classical_scores else 0
        
        # Fraud analysis
        flagged_count = sum(predictions)
        actual_fraud_count = sum(true_labels)
        
        # Generate context summary
        context = f"""
CURRENT SYSTEM ANALYSIS:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

📊 TRANSACTION OVERVIEW:
• Total Processed: {total:,} transactions
• Flagged as Fraud: {flagged_count} ({flagged_count/total*100:.1f}% rate)
• Actual Fraud Cases: {actual_fraud_count} ({actual_fraud_count/total*100:.1f}% rate)

⚡ PERFORMANCE METRICS:
• Accuracy: {accuracy*100:.1f}% (Overall correctness)
• Precision: {precision*100:.1f}% (Fraud prediction accuracy)
• Recall: {recall*100:.1f}% (Fraud detection coverage)
• F1-Score: {f1:.3f} (Balanced performance measure)

🔍 DETECTION BREAKDOWN:
• True Positives (Correct Fraud): {tp}
• False Positives (Safe → Fraud): {fp} {"⚠️ HIGH!" if fp > tp * 2 else ""}
• False Negatives (Fraud → Safe): {fn} {"🚨 CRITICAL!" if fn > 0 else ""}
• True Negatives (Correct Safe): {tn}

⚛️ HYBRID MODEL SCORES:
• Average Final Score: {avg_final:.4f}
• Classical Component: {avg_classical:.4f} (80% weight)
• Quantum Ensemble: {avg_quantum:.4f} (20% weight)

🎯 THRESHOLD ANALYSIS:
• Current Threshold: 0.5 (configurable)
• Scores Range: {min(final_scores):.3f} - {max(final_scores):.3f}
{'• RECOMMENDATION: Consider adjusting threshold based on precision/recall balance' if precision < 0.5 or recall < 0.5 else ''}

🔬 QUANTUM COMPONENT STATUS:
• VQC (Pattern Recognition): Active
• QAOA (Optimization): Active  
• QNN (Neural Processing): Active
"""
        
        # In the _get_system_context method, add threshold analysis
        if precision < 0.7 and recall < 0.7:
            context += """
🎯 THRESHOLD RECOMMENDATION:
Current threshold may be suboptimal. Consider:
• Lower threshold (0.45) to improve recall
• Higher threshold (0.65) to improve precision
• Current performance suggests class imbalance
"""
        
        return context
    
    def get_response(self, user_message, history=None):
        """Generate AI response with enhanced context and error handling"""
        
        # Try built-in response first (always available, no rate limits)
        builtin_response = self._get_builtin_response(user_message)
        
        # Rate limiting check
        current_time = time.time()
        if current_time - self.last_request_time < self.min_request_interval:
            if builtin_response:
                return builtin_response + "\n\n💡 *Response from built-in knowledge base*"
            else:
                wait_time = int(self.min_request_interval - (current_time - self.last_request_time))
                return f"⏳ Please wait {wait_time} seconds between questions to avoid rate limits."
        
        # Validate API key
        if not self.api_key or self.api_key == "" or not self.api_key.startswith("sk-or-v1-"):
            if builtin_response:
                return builtin_response + "\n\n💡 *Response from built-in knowledge base (AI service not configured)*"
            return """🤖 **AI Assistant Configuration Required**

To enable intelligent fraud analysis:

1. **Get Free API Key**: Visit [OpenRouter.ai](https://openrouter.ai)
2. **Configure**: Create a `.env` file with:
   ```
   OPENROUTER_API_KEY=sk-or-v1-your-key-here
   ```
3. **Restart**: The backend server

**Current Status**: Core fraud detection system is fully operational. Built-in responses available for common questions."""
        
        # Build context from system state
        system_context = self._get_system_context(history) if history else "System ready for analysis."
        
        # Construct conversation
        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": f"""
CURRENT SYSTEM DATA:
{system_context}

USER QUESTION: {user_message}

Please provide a clear, specific response using the current system metrics. Focus on actionable insights and practical recommendations.
"""}
        ]
        
        try:
            self.last_request_time = current_time
            
            # Make API request
            response = requests.post(
                self.base_url,
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json",
                    "HTTP-Referer": "https://quantum-fraud-detector.local",
                    "X-Title": "Quantum Fraud Detection System"
                },
                json={
                    "model": self.model,
                    "messages": messages,
                    "max_tokens": 400,
                    "temperature": 0.3,
                    "top_p": 0.9,
                    "frequency_penalty": 0.2,
                    "presence_penalty": 0.1
                },
                timeout=30
            )
            
            # Handle successful response
            if response.status_code == 200:
                result = response.json()
                ai_response = result['choices'][0]['message']['content'].strip()
                ai_response = ai_response.replace('```', '').replace('**', '**')
                
                if any(keyword in user_message.lower() for keyword in ['quantum', 'vqc', 'qaoa', 'qnn']):
                    ai_response += "\n\n💡 **Quick Reference**: VQC = Variational Quantum Circuit, QAOA = Quantum Optimization, QNN = Quantum Neural Network"
                
                if any(keyword in user_message.lower() for keyword in ['precision', 'recall', 'f1']):
                    ai_response += "\n\n📊 **Metric Tip**: Balance precision (fraud accuracy) vs recall (fraud coverage) based on business priorities."
                
                return ai_response
                
            # Handle API errors - use builtin fallback
            elif response.status_code == 429:
                if builtin_response:
                    return builtin_response + "\n\n💡 *Response from built-in knowledge base (AI rate limited)*"
                return """⏰ **Rate Limit Exceeded**

Too many requests to the AI service. Try asking about:
• How fraud detection works
• Quantum vs classical advantage
• Precision and recall
• Threshold optimization

These topics have built-in responses available!"""
                
            elif response.status_code in [401, 402]:
                if builtin_response:
                    return builtin_response + "\n\n💡 *Response from built-in knowledge base*"
                return """🔐 **AI Service Issue**

API authentication failed. Built-in responses available for common questions about:
• Fraud detection methodology
• Quantum model components
• Performance optimization"""
                
            else:
                if builtin_response:
                    return builtin_response + "\n\n💡 *Response from built-in knowledge base*"
                return f"⚠️ AI service returned status {response.status_code}. Try asking about fraud detection basics."
        
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.RequestException):
            if builtin_response:
                return builtin_response + "\n\n💡 *Response from built-in knowledge base (network issue)*"
            return """🌐 **Connection Issue**

Unable to reach AI service. The fraud detection system is working normally.

**Available Topics** (no network needed):
• "How does fraud detection work?"
• "Explain quantum advantage"
• "What is precision vs recall?"
• "How to reduce false positives?" """
        
        except Exception as e:
            if builtin_response:
                return builtin_response
            return "🛠️ An error occurred. Core fraud detection remains operational."
    
    def _get_builtin_response(self, user_message):
        """Match user question to built-in responses"""
        message_lower = user_message.lower()
        
        # Pattern matching for common questions
        patterns = {
            r'(how|does|what).*(fraud|detection|detect|work|identify)': 'fraud_detection',
            r'(quantum|classical|advantage|better|compare|difference|useful)': 'quantum_advantage',
            r'(precision|recall|f1|metric|score|accuracy)': 'precision_recall',
            r'(threshold|cutoff|adjust|tune|optimal)': 'threshold',
            r'(vqc|qaoa|qnn|variational|quantum.*(model|circuit|neural))': 'vqc_qaoa_qnn',
            r'(improve|increase|better|accuracy|performance|optimize)': 'improve_accuracy',
            r'(false.?positive|wrong|mistake|legitimate|block)': 'false_positives',
            r'(architecture|system|how.*work|overview|explain.*system)': 'how_it_works',
        }
        
        for pattern, key in patterns.items():
            if re.search(pattern, message_lower):
                return self.builtin_responses.get(key)
        
        return None

# --- Test Function ---
def test_chatbot():
    """Test the enhanced chatbot functionality"""
    print("🧪 Testing Enhanced AI Fraud Detection Assistant")
    print("=" * 60)
    
    api_key = os.getenv("OPENROUTER_API_KEY", "")
    if not api_key:
        print("❌ No API key found. Set OPENROUTER_API_KEY environment variable.")
        return
    
    chatbot = AIFraudChatbot(api_key=api_key)
    
    # Simulate realistic transaction history
    test_history = []
    for i in range(100):
        test_history.append({
            'Prediction': 'Fraud' if i % 20 == 0 else 'Safe',  # 5% fraud rate
            'is_fraud': 1 if i % 25 == 0 else 0,  # 4% actual fraud
            'Final_Score': 0.6 + (i % 5) * 0.1,  # Varied scores
            'Quantum_Score': 0.3 + (i % 3) * 0.1,
            'Classical_Score': 0.7 + (i % 4) * 0.05
        })
    
    test_questions = [
        "Why is my precision so low?",
        "How can I improve recall performance?", 
        "Explain how the quantum ensemble works",
        "What's the optimal fraud threshold?",
        "How do false positives impact my business?"
    ]
    
    for i, question in enumerate(test_questions, 1):
        print(f"\n🔸 Test {i}: {question}")
        print("-" * 50)
        response = chatbot.get_response(question, test_history)
        print(response[:200] + "..." if len(response) > 200 else response)
        time.sleep(1)  # Respectful rate limiting

if __name__ == "__main__":
    test_chatbot()