import pandas as pd import numpy as np from fpdf import FPDF from datetime import datetime class ReportGenerator: """Generate CSV and PDF reports for fraud detection analysis""" def __init__(self): self.report_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") def generate_csv_report(self, history, filename=None): """Generate CSV report from transaction history""" if not history or len(history) == 0: return None if filename is None: filename = f"fraud_report_{self.report_timestamp}.csv" # Convert history to DataFrame df = pd.DataFrame(history) # Select relevant columns report_cols = ['trans_num', 'Amount', 'merchant', 'category', 'Final_Score', 'Prediction', 'is_fraud'] available_cols = [col for col in report_cols if col in df.columns] report_df = df[available_cols] # Save to CSV report_df.to_csv(filename, index=False) return filename def generate_pdf_report(self, history, filename=None): """Generate PDF report with summary statistics""" if not history or len(history) == 0: return None if filename is None: filename = f"fraud_report_{self.report_timestamp}.pdf" # Calculate metrics metrics = self._calculate_metrics(history) # Create PDF pdf = FPDF() pdf.add_page() # Title pdf.set_font('Arial', 'B', 20) pdf.cell(0, 10, 'Fraud Detection Report', 0, 1, 'C') pdf.set_font('Arial', '', 10) pdf.cell(0, 10, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1, 'C') pdf.ln(10) # Executive Summary pdf.set_font('Arial', 'B', 14) pdf.cell(0, 10, 'Executive Summary', 0, 1, 'L') pdf.set_font('Arial', '', 11) summary_text = [ f"Total Transactions Processed: {metrics['total_transactions']}", f"Transactions Flagged as Fraud: {metrics['flagged_fraud']}", f"Actual Fraud Cases: {metrics['actual_fraud']}", f"Overall Fraud Rate: {metrics['fraud_rate']:.2f}%", "", f"System Accuracy: {metrics['accuracy']:.2f}%", f"Precision: {metrics['precision']:.2f}%", f"Recall (Fraud Detection Rate): {metrics['recall']:.2f}%", f"F1 Score: {metrics['f1_score']:.3f}" ] for line in summary_text: pdf.cell(0, 7, line, 0, 1, 'L') pdf.ln(5) # Performance Metrics pdf.set_font('Arial', 'B', 14) pdf.cell(0, 10, 'Performance Breakdown', 0, 1, 'L') pdf.set_font('Arial', '', 11) performance_text = [ f"True Positives (Correctly Identified Fraud): {metrics['tp']}", f"True Negatives (Correctly Identified Safe): {metrics['tn']}", f"False Positives (Safe Flagged as Fraud): {metrics['fp']}", f"False Negatives (Missed Fraud): {metrics['fn']}" ] for line in performance_text: pdf.cell(0, 7, line, 0, 1, 'L') pdf.ln(5) # Top Flagged Transactions pdf.set_font('Arial', 'B', 14) pdf.cell(0, 10, 'Top 10 Highest Risk Transactions', 0, 1, 'L') pdf.set_font('Arial', '', 9) # Get top transactions by score sorted_history = sorted(history, key=lambda x: x.get('Final_Score', 0), reverse=True)[:10] for i, txn in enumerate(sorted_history, 1): line = f"{i}. Amount: ${txn.get('Amount', 0):.2f} | Score: {txn.get('Final_Score', 0):.3f} | " line += f"Prediction: {txn.get('Prediction', 'N/A')} | Actual: {'Fraud' if txn.get('is_fraud', 0) == 1 else 'Safe'}" pdf.cell(0, 5, line, 0, 1, 'L') pdf.ln(5) # Model Architecture pdf.set_font('Arial', 'B', 14) pdf.cell(0, 10, 'Hybrid Model Architecture', 0, 1, 'L') pdf.set_font('Arial', '', 10) arch_text = [ "This system uses a hybrid quantum-classical approach:", "", "Classical Model (XGBoost) - 80% weight:", " - Processes 10 engineered features", " - High throughput, low latency", " - Handles majority of fraud detection", "", "Quantum Model (VQC) - 20% weight:", " - Focuses on 4 critical features", " - Specialized anomaly detection", " - Captures subtle non-linear patterns", "", "Final Decision = 0.8 * Classical + 0.2 * Quantum" ] for line in arch_text: pdf.cell(0, 5, line, 0, 1, 'L') # Save PDF pdf.output(filename) return filename def _calculate_metrics(self, history): """Calculate performance metrics from history""" true_labels = [t.get('is_fraud', 0) for t in history] predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history] total = len(true_labels) flagged = sum(predictions) actual_fraud = sum(true_labels) # Confusion matrix tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1) fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1) fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0) tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0) # Calculate metrics accuracy = (tp + tn) / total if total > 0 else 0 precision = tp / (tp + fp) if (tp + fp) > 0 else 0 recall = tp / (tp + fn) if (tp + fn) > 0 else 0 f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 fraud_rate = (actual_fraud / total * 100) if total > 0 else 0 return { 'total_transactions': total, 'flagged_fraud': flagged, 'actual_fraud': actual_fraud, 'fraud_rate': fraud_rate, 'accuracy': accuracy * 100, 'precision': precision * 100, 'recall': recall * 100, 'f1_score': f1, 'tp': tp, 'fp': fp, 'fn': fn, 'tn': tn } if __name__ == "__main__": # Test with sample data sample_history = [ {'trans_num': '001', 'Amount': 150.0, 'merchant': 'Test Store', 'category': 'retail', 'Final_Score': 0.75, 'Prediction': 'Fraud', 'is_fraud': 1}, {'trans_num': '002', 'Amount': 25.0, 'merchant': 'Coffee Shop', 'category': 'food', 'Final_Score': 0.15, 'Prediction': 'Safe', 'is_fraud': 0}, ] generator = ReportGenerator() csv_file = generator.generate_csv_report(sample_history) pdf_file = generator.generate_pdf_report(sample_history) print(f"Test reports generated: {csv_file}, {pdf_file}")