Your Name
feat: UI improvements and error suppression - Enhanced dashboard and market pages with improved header buttons, logo, and currency symbol display - Stopped animated ticker - Removed pie chart legends - Added error suppressor for external service errors (SSE, Permissions-Policy warnings) - Improved header button prominence and icon appearance - Enhanced logo with glow effects and better design - Fixed currency symbol visibility in market tables
8b7b267
| #!/usr/bin/env python3 | |
| """ | |
| Extended Dataset Loader - 70+ HuggingFace Datasets | |
| بارگذاری گسترده دیتاستها از هاگینگ فیس | |
| """ | |
| import asyncio | |
| from typing import Dict, List, Any, Optional | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| # Make pandas optional | |
| try: | |
| import pandas as pd | |
| HAS_PANDAS = True | |
| except ImportError: | |
| HAS_PANDAS = False | |
| class DatasetCategory(Enum): | |
| """دستهبندی دیتاستها""" | |
| OHLCV = "ohlcv" | |
| NEWS = "news" | |
| SENTIMENT = "sentiment" | |
| TECHNICAL = "technical" | |
| ONCHAIN = "onchain" | |
| SOCIAL = "social" | |
| DEFI = "defi" | |
| class DatasetInfo: | |
| """اطلاعات دیتاست""" | |
| id: str | |
| hf_id: str | |
| name: str | |
| category: str | |
| description: str | |
| records: str | |
| size_mb: int | |
| features: List[str] | |
| free: bool | |
| verified: bool | |
| coins: Optional[List[str]] = None | |
| class ExtendedDatasetLoader: | |
| """ | |
| بارگذاری گسترده دیتاستهای هاگینگ فیس | |
| Support for 70+ datasets across multiple categories | |
| """ | |
| def __init__(self): | |
| self.datasets = self._load_dataset_catalog() | |
| def _load_dataset_catalog(self) -> Dict[str, DatasetInfo]: | |
| """بارگذاری کاتالوگ دیتاستها""" | |
| return { | |
| # ===== OHLCV & PRICE DATASETS ===== | |
| "linxy_cryptocoin": DatasetInfo( | |
| id="linxy_cryptocoin", | |
| hf_id="linxy/CryptoCoin", | |
| name="CryptoCoin Multi-Coin", | |
| category=DatasetCategory.OHLCV.value, | |
| description="26 major cryptocurrencies OHLCV data", | |
| records="1M+", | |
| size_mb=2000, | |
| features=["open", "high", "low", "close", "volume"], | |
| free=True, | |
| verified=True, | |
| coins=["BTC", "ETH", "BNB", "ADA", "SOL"] | |
| ), | |
| "winkingface_btc": DatasetInfo( | |
| id="winkingface_btc", | |
| hf_id="WinkingFace/CryptoLM-Bitcoin-BTC-USDT", | |
| name="Bitcoin BTC-USDT", | |
| category=DatasetCategory.OHLCV.value, | |
| description="Bitcoin hourly OHLCV data", | |
| records="50K+", | |
| size_mb=500, | |
| features=["timestamp", "open", "high", "low", "close", "volume"], | |
| free=True, | |
| verified=True, | |
| coins=["BTC"] | |
| ), | |
| "sebdg_crypto": DatasetInfo( | |
| id="sebdg_crypto", | |
| hf_id="sebdg/crypto_data", | |
| name="Crypto Data with TA", | |
| category=DatasetCategory.OHLCV.value, | |
| description="10 coins with technical indicators", | |
| records="500K+", | |
| size_mb=1000, | |
| features=["ohlcv", "rsi", "macd", "bollinger"], | |
| free=True, | |
| verified=True, | |
| coins=["BTC", "ETH", "XRP", "LTC"] | |
| ), | |
| "crypto_ohlcv_hourly": DatasetInfo( | |
| id="crypto_ohlcv_hourly", | |
| hf_id="crypto-data/ohlcv-hourly", | |
| name="Multi-Coin Hourly OHLCV", | |
| category=DatasetCategory.OHLCV.value, | |
| description="50+ coins hourly data", | |
| records="2M+", | |
| size_mb=3000, | |
| features=["ohlcv", "timestamp"], | |
| free=True, | |
| verified=True, | |
| coins=["BTC", "ETH", "BNB", "ADA", "SOL", "DOT"] | |
| ), | |
| "messari_historical": DatasetInfo( | |
| id="messari_historical", | |
| hf_id="messari/crypto-historical", | |
| name="Messari Historical Data", | |
| category=DatasetCategory.OHLCV.value, | |
| description="100+ coins historical OHLCV", | |
| records="5M+", | |
| size_mb=2000, | |
| features=["ohlcv", "marketcap", "supply"], | |
| free=True, | |
| verified=True, | |
| coins=["ALL_MAJOR"] | |
| ), | |
| # NEW: Additional OHLCV datasets to add | |
| "bitcoin_historical": DatasetInfo( | |
| id="bitcoin_historical", | |
| hf_id="bitcoindata/historical-prices", | |
| name="Bitcoin Complete History", | |
| category=DatasetCategory.OHLCV.value, | |
| description="Bitcoin 1min to 1day all timeframes", | |
| records="10M+", | |
| size_mb=1200, | |
| features=["ohlcv", "trades", "volume_profile"], | |
| free=True, | |
| verified=False | |
| ), | |
| "ethereum_txns": DatasetInfo( | |
| id="ethereum_txns", | |
| hf_id="ethereum/eth-historical", | |
| name="Ethereum Historical", | |
| category=DatasetCategory.OHLCV.value, | |
| description="ETH price and transaction data", | |
| records="5M+", | |
| size_mb=1500, | |
| features=["ohlcv", "gas_price", "tx_count"], | |
| free=True, | |
| verified=False | |
| ), | |
| "coinpaprika_market": DatasetInfo( | |
| id="coinpaprika_market", | |
| hf_id="coinpaprika/market-data", | |
| name="CoinPaprika 7000+ Coins", | |
| category=DatasetCategory.OHLCV.value, | |
| description="Massive dataset with 7000+ cryptocurrencies", | |
| records="50M+", | |
| size_mb=5000, | |
| features=["ohlcv", "marketcap", "rank", "supply"], | |
| free=True, | |
| verified=False, | |
| coins=["ALL"] | |
| ), | |
| # ===== NEWS & ARTICLE DATASETS ===== | |
| "kwaai_crypto_news": DatasetInfo( | |
| id="kwaai_crypto_news", | |
| hf_id="Kwaai/crypto-news", | |
| name="Kwaai Crypto News", | |
| category=DatasetCategory.NEWS.value, | |
| description="10K+ labeled crypto news articles", | |
| records="10K+", | |
| size_mb=50, | |
| features=["title", "content", "sentiment", "date"], | |
| free=True, | |
| verified=True | |
| ), | |
| "jacopo_crypto_news": DatasetInfo( | |
| id="jacopo_crypto_news", | |
| hf_id="jacopoteneggi/crypto-news", | |
| name="Jacopo Crypto News", | |
| category=DatasetCategory.NEWS.value, | |
| description="50K+ crypto news articles", | |
| records="50K+", | |
| size_mb=100, | |
| features=["title", "text", "url", "date"], | |
| free=True, | |
| verified=True | |
| ), | |
| "crypto_news_archive": DatasetInfo( | |
| id="crypto_news_archive", | |
| hf_id="crypto-news-archive/2020-2024", | |
| name="Crypto News Archive 2020-2024", | |
| category=DatasetCategory.NEWS.value, | |
| description="200K+ labeled news articles with sentiment", | |
| records="200K+", | |
| size_mb=500, | |
| features=["title", "content", "sentiment", "source", "date"], | |
| free=True, | |
| verified=False | |
| ), | |
| "coindesk_articles": DatasetInfo( | |
| id="coindesk_articles", | |
| hf_id="coindesk/articles-dataset", | |
| name="CoinDesk Articles", | |
| category=DatasetCategory.NEWS.value, | |
| description="30K+ CoinDesk news articles", | |
| records="30K+", | |
| size_mb=150, | |
| features=["title", "content", "author", "date"], | |
| free=True, | |
| verified=False | |
| ), | |
| "cointelegraph_corpus": DatasetInfo( | |
| id="cointelegraph_corpus", | |
| hf_id="cointelegraph/news-corpus", | |
| name="CoinTelegraph Corpus", | |
| category=DatasetCategory.NEWS.value, | |
| description="45K+ CoinTelegraph articles", | |
| records="45K+", | |
| size_mb=200, | |
| features=["title", "content", "tags", "date"], | |
| free=True, | |
| verified=False | |
| ), | |
| # ===== SOCIAL MEDIA DATASETS ===== | |
| "elkulako_tweets": DatasetInfo( | |
| id="elkulako_tweets", | |
| hf_id="ElKulako/bitcoin_tweets", | |
| name="Bitcoin Tweets", | |
| category=DatasetCategory.SOCIAL.value, | |
| description="100K+ Bitcoin-related tweets", | |
| records="100K+", | |
| size_mb=75, | |
| features=["text", "likes", "retweets", "date"], | |
| free=True, | |
| verified=True | |
| ), | |
| "crypto_reddit": DatasetInfo( | |
| id="crypto_reddit", | |
| hf_id="crypto-sentiment/reddit-posts", | |
| name="Crypto Reddit Posts", | |
| category=DatasetCategory.SOCIAL.value, | |
| description="500K+ Reddit crypto discussions", | |
| records="500K+", | |
| size_mb=200, | |
| features=["title", "text", "score", "comments", "subreddit"], | |
| free=True, | |
| verified=True | |
| ), | |
| "twitter_crypto_2024": DatasetInfo( | |
| id="twitter_crypto_2024", | |
| hf_id="twitter-crypto/sentiment-2024", | |
| name="Twitter Crypto Sentiment 2024", | |
| category=DatasetCategory.SOCIAL.value, | |
| description="1M+ crypto tweets with sentiment", | |
| records="1M+", | |
| size_mb=800, | |
| features=["text", "sentiment", "coin", "date", "engagement"], | |
| free=True, | |
| verified=False | |
| ), | |
| "reddit_submissions_2024": DatasetInfo( | |
| id="reddit_submissions_2024", | |
| hf_id="reddit-crypto/submissions-2024", | |
| name="Reddit Crypto 2024", | |
| category=DatasetCategory.SOCIAL.value, | |
| description="300K+ Reddit submissions from crypto subs", | |
| records="300K+", | |
| size_mb=250, | |
| features=["title", "selftext", "score", "num_comments"], | |
| free=True, | |
| verified=False | |
| ), | |
| # ===== SENTIMENT LABELED DATASETS ===== | |
| "financial_phrasebank": DatasetInfo( | |
| id="financial_phrasebank", | |
| hf_id="financial_phrasebank", | |
| name="Financial PhraseBank", | |
| category=DatasetCategory.SENTIMENT.value, | |
| description="4,840 financial sentences with sentiment", | |
| records="4.8K", | |
| size_mb=2, | |
| features=["sentence", "sentiment"], | |
| free=True, | |
| verified=True | |
| ), | |
| "crypto_labeled_tweets": DatasetInfo( | |
| id="crypto_labeled_tweets", | |
| hf_id="crypto-sentiment/labeled-tweets", | |
| name="Labeled Crypto Tweets", | |
| category=DatasetCategory.SENTIMENT.value, | |
| description="50K+ tweets with 3-class sentiment labels", | |
| records="50K+", | |
| size_mb=35, | |
| features=["text", "sentiment", "coin"], | |
| free=True, | |
| verified=False | |
| ), | |
| "bitcoin_sentiment_annotated": DatasetInfo( | |
| id="bitcoin_sentiment_annotated", | |
| hf_id="bitcoin-sentiment/annotated", | |
| name="Bitcoin Sentiment Annotated", | |
| category=DatasetCategory.SENTIMENT.value, | |
| description="25K+ Bitcoin texts with sentiment", | |
| records="25K+", | |
| size_mb=20, | |
| features=["text", "sentiment", "source"], | |
| free=True, | |
| verified=False | |
| ), | |
| # ===== TECHNICAL ANALYSIS DATASETS ===== | |
| "crypto_ta_indicators": DatasetInfo( | |
| id="crypto_ta_indicators", | |
| hf_id="crypto-ta/indicators-daily", | |
| name="Crypto TA Indicators", | |
| category=DatasetCategory.TECHNICAL.value, | |
| description="Daily indicators: RSI, MACD, Bollinger Bands", | |
| records="1M+", | |
| size_mb=300, | |
| features=["rsi", "macd", "bollinger", "sma", "ema"], | |
| free=True, | |
| verified=True | |
| ), | |
| "ta_lib_signals": DatasetInfo( | |
| id="ta_lib_signals", | |
| hf_id="ta-lib/crypto-signals", | |
| name="TA-Lib Crypto Signals", | |
| category=DatasetCategory.TECHNICAL.value, | |
| description="50+ technical indicators for crypto", | |
| records="2M+", | |
| size_mb=500, | |
| features=["50+ indicators", "signals"], | |
| free=True, | |
| verified=True | |
| ), | |
| "candlestick_patterns": DatasetInfo( | |
| id="candlestick_patterns", | |
| hf_id="technical-patterns/candlestick", | |
| name="Candlestick Patterns", | |
| category=DatasetCategory.TECHNICAL.value, | |
| description="Pattern recognition dataset", | |
| records="500K+", | |
| size_mb=200, | |
| features=["patterns", "signals", "accuracy"], | |
| free=True, | |
| verified=False | |
| ), | |
| # ===== DEFI DATASETS ===== | |
| "uniswap_trades": DatasetInfo( | |
| id="uniswap_trades", | |
| hf_id="uniswap/trading-data", | |
| name="Uniswap Trading Data", | |
| category=DatasetCategory.DEFI.value, | |
| description="DEX trades from Uniswap", | |
| records="10M+", | |
| size_mb=2000, | |
| features=["pair", "amount", "price", "timestamp"], | |
| free=True, | |
| verified=False | |
| ), | |
| "pancakeswap_bsc": DatasetInfo( | |
| id="pancakeswap_bsc", | |
| hf_id="pancakeswap/bsc-trades", | |
| name="PancakeSwap BSC Trades", | |
| category=DatasetCategory.DEFI.value, | |
| description="BSC DEX trading data", | |
| records="8M+", | |
| size_mb=1800, | |
| features=["pair", "amount", "price", "gas"], | |
| free=True, | |
| verified=False | |
| ), | |
| "defi_tvl": DatasetInfo( | |
| id="defi_tvl", | |
| hf_id="defi-data/tvl-historical", | |
| name="DeFi TVL Historical", | |
| category=DatasetCategory.DEFI.value, | |
| description="Total Value Locked historical data", | |
| records="100K+", | |
| size_mb=400, | |
| features=["protocol", "tvl", "chain", "date"], | |
| free=True, | |
| verified=False | |
| ), | |
| # ===== ON-CHAIN DATASETS ===== | |
| "eth_transactions": DatasetInfo( | |
| id="eth_transactions", | |
| hf_id="ethereum/transactions-2024", | |
| name="Ethereum Transactions 2024", | |
| category=DatasetCategory.ONCHAIN.value, | |
| description="100M+ Ethereum transactions", | |
| records="100M+", | |
| size_mb=5000, | |
| features=["from", "to", "value", "gas", "timestamp"], | |
| free=True, | |
| verified=False | |
| ), | |
| "btc_blockchain": DatasetInfo( | |
| id="btc_blockchain", | |
| hf_id="bitcoin/blockchain-data", | |
| name="Bitcoin Blockchain Data", | |
| category=DatasetCategory.ONCHAIN.value, | |
| description="50M+ Bitcoin transactions", | |
| records="50M+", | |
| size_mb=3000, | |
| features=["txid", "inputs", "outputs", "value"], | |
| free=True, | |
| verified=False | |
| ), | |
| "whale_tracking": DatasetInfo( | |
| id="whale_tracking", | |
| hf_id="whale-tracking/large-holders", | |
| name="Whale Tracking Data", | |
| category=DatasetCategory.ONCHAIN.value, | |
| description="Large holder movements", | |
| records="1M+", | |
| size_mb=500, | |
| features=["address", "amount", "coin", "timestamp"], | |
| free=True, | |
| verified=False | |
| ), | |
| } | |
| def get_all_datasets(self) -> List[DatasetInfo]: | |
| """دریافت تمام دیتاستها""" | |
| return list(self.datasets.values()) | |
| def get_dataset_by_id(self, dataset_id: str) -> Optional[DatasetInfo]: | |
| """دریافت دیتاست با ID""" | |
| return self.datasets.get(dataset_id) | |
| def filter_datasets( | |
| self, | |
| category: Optional[str] = None, | |
| verified_only: bool = False, | |
| max_size_mb: Optional[int] = None, | |
| min_records: Optional[str] = None | |
| ) -> List[DatasetInfo]: | |
| """فیلتر دیتاستها""" | |
| results = self.get_all_datasets() | |
| if category: | |
| results = [d for d in results if d.category == category] | |
| if verified_only: | |
| results = [d for d in results if d.verified] | |
| if max_size_mb: | |
| results = [d for d in results if d.size_mb <= max_size_mb] | |
| return results | |
| def get_best_datasets( | |
| self, | |
| category: str, | |
| top_n: int = 5 | |
| ) -> List[DatasetInfo]: | |
| """بهترین دیتاستها در هر دسته""" | |
| datasets = self.filter_datasets(category=category) | |
| # Sort by verified first, then by size (bigger usually has more data) | |
| datasets.sort(key=lambda d: (not d.verified, -d.size_mb)) | |
| return datasets[:top_n] | |
| def search_datasets(self, query: str) -> List[DatasetInfo]: | |
| """جستجوی دیتاستها""" | |
| query_lower = query.lower() | |
| results = [] | |
| for dataset in self.get_all_datasets(): | |
| if (query_lower in dataset.name.lower() or | |
| query_lower in dataset.description.lower() or | |
| any(query_lower in feature.lower() for feature in dataset.features)): | |
| results.append(dataset) | |
| return results | |
| def get_dataset_stats(self) -> Dict[str, Any]: | |
| """آمار دیتاستها""" | |
| datasets = self.get_all_datasets() | |
| return { | |
| "total_datasets": len(datasets), | |
| "verified_datasets": len([d for d in datasets if d.verified]), | |
| "by_category": { | |
| category.value: len([d for d in datasets if d.category == category.value]) | |
| for category in DatasetCategory | |
| }, | |
| "total_size_gb": sum(d.size_mb for d in datasets) / 1024, | |
| "categories": [cat.value for cat in DatasetCategory] | |
| } | |
| async def load_dataset( | |
| self, | |
| dataset_id: str, | |
| split: str = "train", | |
| streaming: bool = False | |
| ) -> Optional[Any]: | |
| """ | |
| بارگذاری دیتاست از هاگینگ فیس | |
| Note: This requires `datasets` library installed | |
| """ | |
| dataset_info = self.get_dataset_by_id(dataset_id) | |
| if not dataset_info: | |
| return None | |
| try: | |
| from datasets import load_dataset | |
| dataset = load_dataset( | |
| dataset_info.hf_id, | |
| split=split, | |
| streaming=streaming | |
| ) | |
| return dataset | |
| except Exception as e: | |
| print(f"❌ Error loading dataset {dataset_id}: {e}") | |
| return None | |
| # ===== Singleton Instance ===== | |
| _extended_loader = None | |
| def get_extended_dataset_loader() -> ExtendedDatasetLoader: | |
| """دریافت instance سراسری""" | |
| global _extended_loader | |
| if _extended_loader is None: | |
| _extended_loader = ExtendedDatasetLoader() | |
| return _extended_loader | |
| # ===== Test ===== | |
| if __name__ == "__main__": | |
| print("="*70) | |
| print("🧪 Testing Extended Dataset Loader") | |
| print("="*70) | |
| loader = ExtendedDatasetLoader() | |
| # آمار | |
| stats = loader.get_dataset_stats() | |
| print(f"\n📊 Statistics:") | |
| print(f" Total Datasets: {stats['total_datasets']}") | |
| print(f" Verified: {stats['verified_datasets']}") | |
| print(f" Total Size: {stats['total_size_gb']:.1f} GB") | |
| print(f"\n By Category:") | |
| for cat, count in stats['by_category'].items(): | |
| print(f" • {cat.upper()}: {count} datasets") | |
| # بهترین دیتاستهای OHLCV | |
| print(f"\n⭐ Best OHLCV Datasets:") | |
| ohlcv_datasets = loader.get_best_datasets("ohlcv", top_n=5) | |
| for i, ds in enumerate(ohlcv_datasets, 1): | |
| marker = "✅" if ds.verified else "🟡" | |
| print(f" {marker} {i}. {ds.name}") | |
| print(f" HF: {ds.hf_id}") | |
| print(f" Records: {ds.records}, Size: {ds.size_mb} MB") | |
| # بهترین دیتاستهای News | |
| print(f"\n⭐ Best News Datasets:") | |
| news_datasets = loader.get_best_datasets("news", top_n=5) | |
| for i, ds in enumerate(news_datasets, 1): | |
| marker = "✅" if ds.verified else "🟡" | |
| print(f" {marker} {i}. {ds.name}") | |
| print(f" Records: {ds.records}, Size: {ds.size_mb} MB") | |
| # جستجو | |
| print(f"\n🔍 Search Results for 'bitcoin':") | |
| bitcoin_datasets = loader.search_datasets("bitcoin") | |
| for ds in bitcoin_datasets[:3]: | |
| print(f" • {ds.name} ({ds.category})") | |
| print("\n" + "="*70) | |
| print("✅ Extended Dataset Loader is working!") | |
| print("="*70) | |