QuantumShield / backend /utils /report_generator.py
SantoshKumar1310's picture
Upload folder using huggingface_hub
63590dc verified
import pandas as pd
import numpy as np
from fpdf import FPDF
from datetime import datetime
class ReportGenerator:
"""Generate CSV and PDF reports for fraud detection analysis"""
def __init__(self):
self.report_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
def generate_csv_report(self, history, filename=None):
"""Generate CSV report from transaction history"""
if not history or len(history) == 0:
return None
if filename is None:
filename = f"fraud_report_{self.report_timestamp}.csv"
# Convert history to DataFrame
df = pd.DataFrame(history)
# Select relevant columns
report_cols = ['trans_num', 'Amount', 'merchant', 'category',
'Final_Score', 'Prediction', 'is_fraud']
available_cols = [col for col in report_cols if col in df.columns]
report_df = df[available_cols]
# Save to CSV
report_df.to_csv(filename, index=False)
return filename
def generate_pdf_report(self, history, filename=None):
"""Generate PDF report with summary statistics"""
if not history or len(history) == 0:
return None
if filename is None:
filename = f"fraud_report_{self.report_timestamp}.pdf"
# Calculate metrics
metrics = self._calculate_metrics(history)
# Create PDF
pdf = FPDF()
pdf.add_page()
# Title
pdf.set_font('Arial', 'B', 20)
pdf.cell(0, 10, 'Fraud Detection Report', 0, 1, 'C')
pdf.set_font('Arial', '', 10)
pdf.cell(0, 10, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1, 'C')
pdf.ln(10)
# Executive Summary
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Executive Summary', 0, 1, 'L')
pdf.set_font('Arial', '', 11)
summary_text = [
f"Total Transactions Processed: {metrics['total_transactions']}",
f"Transactions Flagged as Fraud: {metrics['flagged_fraud']}",
f"Actual Fraud Cases: {metrics['actual_fraud']}",
f"Overall Fraud Rate: {metrics['fraud_rate']:.2f}%",
"",
f"System Accuracy: {metrics['accuracy']:.2f}%",
f"Precision: {metrics['precision']:.2f}%",
f"Recall (Fraud Detection Rate): {metrics['recall']:.2f}%",
f"F1 Score: {metrics['f1_score']:.3f}"
]
for line in summary_text:
pdf.cell(0, 7, line, 0, 1, 'L')
pdf.ln(5)
# Performance Metrics
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Performance Breakdown', 0, 1, 'L')
pdf.set_font('Arial', '', 11)
performance_text = [
f"True Positives (Correctly Identified Fraud): {metrics['tp']}",
f"True Negatives (Correctly Identified Safe): {metrics['tn']}",
f"False Positives (Safe Flagged as Fraud): {metrics['fp']}",
f"False Negatives (Missed Fraud): {metrics['fn']}"
]
for line in performance_text:
pdf.cell(0, 7, line, 0, 1, 'L')
pdf.ln(5)
# Top Flagged Transactions
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Top 10 Highest Risk Transactions', 0, 1, 'L')
pdf.set_font('Arial', '', 9)
# Get top transactions by score
sorted_history = sorted(history, key=lambda x: x.get('Final_Score', 0), reverse=True)[:10]
for i, txn in enumerate(sorted_history, 1):
line = f"{i}. Amount: ${txn.get('Amount', 0):.2f} | Score: {txn.get('Final_Score', 0):.3f} | "
line += f"Prediction: {txn.get('Prediction', 'N/A')} | Actual: {'Fraud' if txn.get('is_fraud', 0) == 1 else 'Safe'}"
pdf.cell(0, 5, line, 0, 1, 'L')
pdf.ln(5)
# Model Architecture
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Hybrid Model Architecture', 0, 1, 'L')
pdf.set_font('Arial', '', 10)
arch_text = [
"This system uses a hybrid quantum-classical approach:",
"",
"Classical Model (XGBoost) - 80% weight:",
" - Processes 10 engineered features",
" - High throughput, low latency",
" - Handles majority of fraud detection",
"",
"Quantum Model (VQC) - 20% weight:",
" - Focuses on 4 critical features",
" - Specialized anomaly detection",
" - Captures subtle non-linear patterns",
"",
"Final Decision = 0.8 * Classical + 0.2 * Quantum"
]
for line in arch_text:
pdf.cell(0, 5, line, 0, 1, 'L')
# Save PDF
pdf.output(filename)
return filename
def _calculate_metrics(self, history):
"""Calculate performance metrics from history"""
true_labels = [t.get('is_fraud', 0) for t in history]
predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history]
total = len(true_labels)
flagged = sum(predictions)
actual_fraud = sum(true_labels)
# Confusion matrix
tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1)
fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1)
fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0)
tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0)
# Calculate metrics
accuracy = (tp + tn) / total if total > 0 else 0
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
fraud_rate = (actual_fraud / total * 100) if total > 0 else 0
return {
'total_transactions': total,
'flagged_fraud': flagged,
'actual_fraud': actual_fraud,
'fraud_rate': fraud_rate,
'accuracy': accuracy * 100,
'precision': precision * 100,
'recall': recall * 100,
'f1_score': f1,
'tp': tp,
'fp': fp,
'fn': fn,
'tn': tn
}
if __name__ == "__main__":
# Test with sample data
sample_history = [
{'trans_num': '001', 'Amount': 150.0, 'merchant': 'Test Store',
'category': 'retail', 'Final_Score': 0.75, 'Prediction': 'Fraud', 'is_fraud': 1},
{'trans_num': '002', 'Amount': 25.0, 'merchant': 'Coffee Shop',
'category': 'food', 'Final_Score': 0.15, 'Prediction': 'Safe', 'is_fraud': 0},
]
generator = ReportGenerator()
csv_file = generator.generate_csv_report(sample_history)
pdf_file = generator.generate_pdf_report(sample_history)
print(f"Test reports generated: {csv_file}, {pdf_file}")