| from flask import Flask, request, jsonify |
| import requests |
| from typing import List, Dict, Union |
| import json |
|
|
| app = Flask(__name__) |
| models = ['cognitivecomputations/dolphin-2.6-mixtral-8x7b', 'databricks/dbrx-instruct', 'google/gemma-1.1-7b-it', 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', 'lizpreciatior/lzlv_70b_fp16_hf', 'meta-llama/Meta-Llama-3-70B-Instruct', 'meta-llama/Meta-Llama-3-8B-Instruct', 'microsoft/WizardLM-2-7B', 'microsoft/WizardLM-2-8x22B', 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mixtral-8x22B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'openchat/openchat-3.6-8b'] |
| class LLM: |
| def __init__(self, model: str): |
| self.model = model |
| self.conversation_history = [{"role": "system", "content": "You are a Helpful AI."}] |
| |
| def chat(self, messages: List[Dict[str, str]], system_message: str = None) -> Union[str, None]: |
| if system_message is not None: |
| self.conversation_history.insert(0, {"role": "system", "content": system_message}) |
| all_messages = self.conversation_history + messages |
| |
| url = "https://api.deepinfra.com/v1/openai/chat/completions" |
| headers = { |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', |
| 'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', |
| 'Cache-Control': 'no-cache', |
| 'Connection': 'keep-alive', |
| 'Content-Type': 'application/json', |
| 'Origin': 'https://deepinfra.com', |
| 'Pragma': 'no-cache', |
| 'Referer': 'https://deepinfra.com/', |
| 'Sec-Fetch-Dest': 'empty', |
| 'Sec-Fetch-Mode': 'cors', |
| 'Sec-Fetch-Site': 'same-site', |
| 'X-Deepinfra-Source': 'web-embed', |
| 'accept': 'text/event-stream', |
| 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', |
| 'sec-ch-ua-mobile': '?0', |
| 'sec-ch-ua-platform': '"macOS"' |
| } |
| data = json.dumps( |
| { |
| 'model': self.model, |
| 'messages': all_messages, |
| 'temperature': 0.7, |
| 'max_tokens': 8028, |
| 'stop': [], |
| 'stream': False |
| }, separators=(',', ':') |
| ) |
| try: |
| result = requests.post(url=url, data=data, headers=headers) |
| return result.json()['choices'][0]['message']['content'] |
| except: |
| return None |
|
|
| def GenerativeIO(text, Model, System_Prompt): |
| llm = LLM(model=Model) |
| messages = [ |
| {"role": "system", "content": text}, |
| {"role": "user", "content": System_Prompt} |
| ] |
| response = llm.chat(messages) |
| return response |
|
|
| @app.route('/generate', methods=['POST']) |
| def generate(): |
| data = request.get_json() |
| text = data.get('text') |
| Model = data.get('Model') |
| System_Prompt = data.get('System_Prompt') |
| response = GenerativeIO(text, Model, System_Prompt) |
| return jsonify({'response': response}) |
| |
| @app.route('/models', methods=['GET']) |
| def get_models(): |
| return jsonify(models) |
|
|
| if __name__ == '__main__': |
| app.run() |
|
|