| model: |
| name: "DeepXR/Helion-2.5-Rnd" |
| version: "2.5.0-research" |
| type: "transformer" |
| architecture: "llama" |
| description: "Helion-2.5 Research & Development - Advanced multimodal language model" |
| |
| capabilities: |
| - text_generation |
| - code_generation |
| - mathematical_reasoning |
| - multilingual_understanding |
| - instruction_following |
| - context_understanding |
| - creative_writing |
| - analytical_reasoning |
| - scientific_computation |
| - conversational_ai |
|
|
| model_parameters: |
| hidden_size: 4096 |
| num_hidden_layers: 32 |
| num_attention_heads: 32 |
| num_key_value_heads: 8 |
| intermediate_size: 14336 |
| vocab_size: 128256 |
| max_position_embeddings: 131072 |
| rope_theta: 500000.0 |
| rope_scaling: |
| type: "yarn" |
| factor: 8.0 |
| original_max_position_embeddings: 16384 |
| attention_bias: false |
| attention_dropout: 0.0 |
| mlp_bias: false |
| |
| tokenizer: |
| type: "sentencepiece" |
| model_max_length: 131072 |
| padding_side: "right" |
| truncation_side: "right" |
| chat_template: "{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}{% endfor %}{{ '<|im_start|>assistant\n' }}" |
|
|
| training: |
| base_model: "meta-llama/Meta-Llama-3.1-70B" |
| training_data: |
| - "scientific_papers" |
| - "code_repositories" |
| - "mathematical_proofs" |
| - "conversational_data" |
| - "multilingual_corpus" |
| - "technical_documentation" |
| total_tokens: "2.5T" |
| training_steps: 150000 |
| warmup_steps: 2000 |
| learning_rate: 2.0e-5 |
| weight_decay: 0.01 |
| gradient_accumulation_steps: 8 |
| per_device_batch_size: 4 |
| fp16: false |
| bf16: true |
| |
| optimization: |
| optimizer: "adamw_torch_fused" |
| scheduler: "cosine_with_restarts" |
| gradient_checkpointing: true |
| flash_attention: true |
| tensor_parallel_size: 4 |
| pipeline_parallel_size: 2 |
| |
| quantization: |
| bits: 16 |
| supported_formats: |
| - "fp16" |
| - "bf16" |
| - "int8" |
| - "int4" |
| - "awq" |
| - "gptq" |
| - "gguf" |
|
|
| inference: |
| default_parameters: |
| temperature: 0.7 |
| top_p: 0.9 |
| top_k: 50 |
| repetition_penalty: 1.1 |
| max_new_tokens: 4096 |
| do_sample: true |
| num_beams: 1 |
| |
| generation_config: |
| pad_token_id: 128001 |
| bos_token_id: 128000 |
| eos_token_id: 128009 |
| use_cache: true |
| output_attentions: false |
| output_hidden_states: false |
| return_dict_in_generate: true |
| |
| performance: |
| batch_size: 1 |
| max_batch_size: 32 |
| streaming: true |
| gpu_memory_utilization: 0.95 |
| tensor_parallel: true |
| |
| special_tokens: |
| bos_token: "<|begin_of_text|>" |
| eos_token: "<|end_of_text|>" |
| pad_token: "<|pad|>" |
| unk_token: "<|unk|>" |
| system_token: "<|im_start|>system" |
| user_token: "<|im_start|>user" |
| assistant_token: "<|im_start|>assistant" |
| end_token: "<|im_end|>" |
|
|
| deployment: |
| framework: "transformers" |
| recommended_hardware: |
| gpu: "A100 80GB (minimum 2x)" |
| vram: "160GB+" |
| ram: "256GB+" |
| storage: "500GB+ NVMe SSD" |
| |
| serving: |
| engine: "vllm" |
| max_concurrent_requests: 128 |
| max_model_len: 131072 |
| gpu_memory_utilization: 0.9 |
| swap_space: 16 |
| |
| endpoints: |
| - name: "completions" |
| path: "/v1/completions" |
| methods: ["POST"] |
| - name: "chat_completions" |
| path: "/v1/chat/completions" |
| methods: ["POST"] |
| - name: "embeddings" |
| path: "/v1/embeddings" |
| methods: ["POST"] |
| |
| research: |
| status: "experimental" |
| stage: "development" |
| evaluation_metrics: |
| perplexity: 2.34 |
| accuracy_mmlu: 0.847 |
| accuracy_gsm8k: 0.892 |
| accuracy_humaneval: 0.756 |
| accuracy_mbpp: 0.723 |
| |
| benchmarks: |
| reasoning: |
| arc_challenge: 0.834 |
| hellaswag: 0.889 |
| winogrande: 0.823 |
| code: |
| humaneval: 0.756 |
| mbpp: 0.723 |
| ds1000: 0.645 |
| mathematics: |
| gsm8k: 0.892 |
| math: 0.567 |
| minerva: 0.534 |
| knowledge: |
| mmlu: 0.847 |
| truthfulqa: 0.612 |
| |
| limitations: |
| - "Model is in research phase - outputs should be verified" |
| - "May exhibit biases present in training data" |
| - "Performance on specialized domains may vary" |
| - "Long context performance degrades beyond 64K tokens" |
| |
| license: "Apache-2.0" |
| citation: | |
| @misc{helion-2.5-rnd, |
| title={Helion-2.5-Rnd: Advanced Research Language Model}, |
| author={DeepXR Team}, |
| year={2025}, |
| publisher={DeepXR}, |
| url={https://huggingface.co/DeepXR/Helion-2.5-Rnd} |
| } |
| |
| safety: |
| content_filtering: true |
| toxicity_threshold: 0.5 |
| pii_detection: true |
| prompt_injection_protection: true |
| |
| metadata: |
| created_at: "2025-01-15" |
| updated_at: "2025-01-30" |
| status: "research" |
| visibility: "public" |
| tags: |
| - "language-model" |
| - "research" |
| - "multimodal" |
| - "instruction-tuned" |
| - "long-context" |