Spaces:
Build error
Build error
| """ | |
| Hugging Face Integration for Quantum Fraud Detection | |
| Offloads heavy ML computations to Hugging Face Inference API | |
| """ | |
| import os | |
| import requests | |
| import numpy as np | |
| from typing import Dict, List, Optional | |
| import time | |
| class HuggingFaceIntegration: | |
| """ | |
| Integrates Hugging Face Inference API for cloud-based ML inference. | |
| Reduces local system burden by offloading computations. | |
| """ | |
| # Free fraud detection models on Hugging Face | |
| MODELS = { | |
| "text_classification": "cardiffnlp/twitter-roberta-base-sentiment-latest", | |
| "fraud_analysis": "bert-base-uncased", | |
| "embeddings": "sentence-transformers/all-MiniLM-L6-v2" | |
| } | |
| def __init__(self, api_key: Optional[str] = None): | |
| """ | |
| Initialize HuggingFace integration. | |
| Args: | |
| api_key: HuggingFace API token. Get free at: https://huggingface.co/settings/tokens | |
| """ | |
| self.api_key = api_key or os.getenv("HUGGINGFACE_API_KEY", "") | |
| self.api_url = "https://router.huggingface.co/hf-inference/models" | |
| self.headers = {"Authorization": f"Bearer {self.api_key}"} if self.api_key else {} | |
| self.cache = {} | |
| self.last_request_time = 0 | |
| self.min_request_interval = 0.5 # Rate limiting | |
| def _rate_limit(self): | |
| """Simple rate limiting to avoid API throttling""" | |
| elapsed = time.time() - self.last_request_time | |
| if elapsed < self.min_request_interval: | |
| time.sleep(self.min_request_interval - elapsed) | |
| self.last_request_time = time.time() | |
| def query_model(self, model_id: str, payload: Dict) -> Dict: | |
| """ | |
| Query a Hugging Face model via Inference API. | |
| Args: | |
| model_id: The model identifier on HuggingFace Hub | |
| payload: The input data for the model | |
| Returns: | |
| Model prediction results | |
| """ | |
| self._rate_limit() | |
| url = f"{self.api_url}/{model_id}" | |
| try: | |
| response = requests.post(url, headers=self.headers, json=payload, timeout=30) | |
| if response.status_code == 200: | |
| return {"success": True, "data": response.json()} | |
| elif response.status_code == 503: | |
| # Model is loading, wait and retry | |
| return {"success": False, "error": "Model loading, please retry", "retry": True} | |
| else: | |
| return {"success": False, "error": f"API error: {response.status_code}"} | |
| except requests.exceptions.Timeout: | |
| return {"success": False, "error": "Request timeout"} | |
| except Exception as e: | |
| return {"success": False, "error": str(e)} | |
| def analyze_transaction_text(self, transaction_description: str) -> Dict: | |
| """ | |
| Use HuggingFace to analyze transaction text for fraud indicators. | |
| Args: | |
| transaction_description: Text description of the transaction | |
| Returns: | |
| Fraud analysis results | |
| """ | |
| payload = {"inputs": transaction_description, "options": {"wait_for_model": True}} | |
| result = self.query_model(self.MODELS["text_classification"], payload) | |
| if result["success"]: | |
| data = result["data"] | |
| if isinstance(data, list) and len(data) > 0: | |
| scores = data[0] if isinstance(data[0], list) else data | |
| # Map sentiment to fraud probability | |
| # Negative sentiment could indicate suspicious activity description | |
| fraud_score = 0.5 # Default neutral | |
| for item in scores: | |
| label = item.get("label", "").lower() | |
| score = item.get("score", 0.5) | |
| # Map negative sentiment to higher fraud risk | |
| if "negative" in label: | |
| fraud_score = 0.3 + (score * 0.4) # 0.3-0.7 range | |
| elif "positive" in label: | |
| fraud_score = 0.5 - (score * 0.3) # Lower risk for positive | |
| return {"success": True, "fraud_indicator": fraud_score, "raw": data} | |
| return {"success": False, "fraud_indicator": 0.5, "error": result.get("error", "Unknown error")} | |
| def get_embedding(self, text: str, model_id: str = "sentence-transformers/all-MiniLM-L6-v2") -> Dict: | |
| """ | |
| Get text embeddings from HuggingFace for feature extraction. | |
| Args: | |
| text: Input text to embed | |
| model_id: Embedding model to use | |
| Returns: | |
| Embedding vector | |
| """ | |
| payload = {"inputs": text, "options": {"wait_for_model": True}} | |
| result = self.query_model(model_id, payload) | |
| if result["success"]: | |
| return {"success": True, "embedding": result["data"]} | |
| return {"success": False, "embedding": None} | |
| def batch_classify(self, texts: List[str]) -> List[Dict]: | |
| """ | |
| Batch classify multiple transaction descriptions. | |
| Args: | |
| texts: List of transaction descriptions | |
| Returns: | |
| List of classification results | |
| """ | |
| results = [] | |
| for text in texts: | |
| result = self.analyze_transaction_text(text) | |
| results.append(result) | |
| return results | |
| class HuggingFaceQuantumHybrid: | |
| """ | |
| Hybrid system combining local quantum features with HuggingFace cloud inference. | |
| Best of both worlds: quantum advantage + cloud scalability. | |
| """ | |
| def __init__(self, hf_api_key: Optional[str] = None): | |
| self.hf = HuggingFaceIntegration(api_key=hf_api_key) | |
| self.local_quantum_weight = 0.3 # Weight for local quantum predictions | |
| self.cloud_ml_weight = 0.7 # Weight for cloud ML predictions | |
| def hybrid_predict( | |
| self, | |
| transaction_features: Dict, | |
| local_quantum_score: float, | |
| use_cloud: bool = True | |
| ) -> Dict: | |
| """ | |
| Combine local quantum predictions with HuggingFace cloud inference. | |
| Args: | |
| transaction_features: Transaction data dict | |
| local_quantum_score: Score from local quantum models (0-1) | |
| use_cloud: Whether to use HuggingFace API | |
| Returns: | |
| Combined prediction with detailed breakdown | |
| """ | |
| result = { | |
| "local_quantum_score": local_quantum_score, | |
| "cloud_ml_score": None, | |
| "hybrid_score": local_quantum_score, | |
| "prediction": "Legit" if local_quantum_score < 0.5 else "Fraud", | |
| "cloud_used": False | |
| } | |
| if use_cloud and self.hf.api_key: | |
| # Create transaction description for text-based analysis | |
| description = self._features_to_text(transaction_features) | |
| cloud_result = self.hf.analyze_transaction_text(description) | |
| if cloud_result["success"]: | |
| cloud_score = cloud_result["fraud_indicator"] | |
| result["cloud_ml_score"] = cloud_score | |
| result["cloud_used"] = True | |
| # Weighted combination | |
| result["hybrid_score"] = ( | |
| self.local_quantum_weight * local_quantum_score + | |
| self.cloud_ml_weight * cloud_score | |
| ) | |
| result["prediction"] = "Fraud" if result["hybrid_score"] >= 0.5 else "Legit" | |
| return result | |
| def _features_to_text(self, features: Dict) -> str: | |
| """Convert transaction features to text description for NLP models.""" | |
| amount = features.get("amt", features.get("amount", 0)) | |
| category = features.get("category", "unknown") | |
| hour = features.get("Hour_of_Day", features.get("hour", 12)) | |
| # Create natural language description | |
| time_of_day = "morning" if hour < 12 else "afternoon" if hour < 18 else "evening" | |
| risk_indicators = [] | |
| if amount > 500: | |
| risk_indicators.append("high value") | |
| if hour < 6 or hour > 22: | |
| risk_indicators.append("unusual time") | |
| risk_text = ", ".join(risk_indicators) if risk_indicators else "normal" | |
| description = f"Transaction of ${amount:.2f} in {category} category during {time_of_day}. Risk factors: {risk_text}." | |
| return description | |
| # ============== Usage Example ============== | |
| def example_usage(): | |
| """ | |
| Example of how to use HuggingFace integration. | |
| To use: | |
| 1. Get free API key from: https://huggingface.co/settings/tokens | |
| 2. Set environment variable: HUGGINGFACE_API_KEY=your_key_here | |
| 3. Or pass directly: HuggingFaceIntegration(api_key="your_key") | |
| """ | |
| # Initialize (will use env var HUGGINGFACE_API_KEY if set) | |
| hf = HuggingFaceIntegration() | |
| # Analyze a transaction | |
| result = hf.analyze_transaction_text( | |
| "Large purchase at 3am from unknown merchant overseas" | |
| ) | |
| print(f"Fraud Analysis: {result}") | |
| # Hybrid approach | |
| hybrid = HuggingFaceQuantumHybrid() | |
| prediction = hybrid.hybrid_predict( | |
| transaction_features={"amt": 1500, "category": "shopping", "Hour_of_Day": 3}, | |
| local_quantum_score=0.7, | |
| use_cloud=True | |
| ) | |
| print(f"Hybrid Prediction: {prediction}") | |
| if __name__ == "__main__": | |
| example_usage() | |