Spaces:

coyotte508
/

chat-ui

Sleeping

App Files Files Community

coyotte508 commited on Nov 7

Commit

fc69895

0 Parent(s):

A new start

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.devcontainer/Dockerfile +9 -0
.devcontainer/devcontainer.json +36 -0
.dockerignore +13 -0
.env +170 -0
.env.ci +1 -0
.eslintignore +13 -0
.eslintrc.cjs +45 -0
.gitattributes +4 -0
.github/ISSUE_TEMPLATE/bug-report--chat-ui-.md +43 -0
.github/ISSUE_TEMPLATE/config-support.md +9 -0
.github/ISSUE_TEMPLATE/feature-request--chat-ui-.md +17 -0
.github/ISSUE_TEMPLATE/huggingchat.md +11 -0
.github/release.yml +16 -0
.github/workflows/build-docs.yml +18 -0
.github/workflows/build-image.yml +142 -0
.github/workflows/build-pr-docs.yml +20 -0
.github/workflows/deploy-dev.yml +62 -0
.github/workflows/deploy-prod.yml +78 -0
.github/workflows/lint-and-test.yml +84 -0
.github/workflows/slugify.yaml +72 -0
.github/workflows/trufflehog.yml +17 -0
.github/workflows/upload-pr-documentation.yml +16 -0
.gitignore +17 -0
.husky/lint-stage-config.js +4 -0
.husky/pre-commit +2 -0
.npmrc +1 -0
.prettierignore +14 -0
.prettierrc +7 -0
.vscode/launch.json +11 -0
.vscode/settings.json +14 -0
Dockerfile +93 -0
LICENSE +203 -0
PRIVACY.md +41 -0
README.md +165 -0
chart/Chart.yaml +5 -0
chart/env/dev.yaml +205 -0
chart/env/prod.yaml +218 -0
chart/templates/_helpers.tpl +22 -0
chart/templates/config.yaml +10 -0
chart/templates/deployment.yaml +81 -0
chart/templates/hpa.yaml +45 -0
chart/templates/infisical.yaml +24 -0
chart/templates/ingress-internal.yaml +32 -0
chart/templates/ingress.yaml +32 -0
chart/templates/network-policy.yaml +36 -0
chart/templates/service-account.yaml +13 -0
chart/templates/service-monitor.yaml +17 -0
chart/templates/service.yaml +21 -0
chart/values.yaml +73 -0
docker-compose.yml +21 -0

.devcontainer/Dockerfile ADDED Viewed

	@@ -0,0 +1,9 @@

+FROM mcr.microsoft.com/devcontainers/typescript-node:1-22-bookworm
+# Install MongoDB tools (mongosh, mongorestore, mongodump) directly from MongoDB repository
+RUN curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | gpg --dearmor -o /usr/share/keyrings/mongodb-server-8.0.gpg && \
+    echo "deb [ signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] http://repo.mongodb.org/apt/debian bookworm/mongodb-org/8.0 main" | tee /etc/apt/sources.list.d/mongodb-org-8.0.list && \
+    apt-get update && \
+    apt-get install -y mongodb-mongosh mongodb-database-tools vim && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,36 @@

+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node
+{
+	"name": "Node.js & TypeScript",
+	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	"build": {
+		"dockerfile": "Dockerfile"
+	},
+	"customizations": {
+		"vscode": {
+			"extensions": ["esbenp.prettier-vscode", "dbaeumer.vscode-eslint", "svelte.svelte-vscode"]
+		}
+	},
+	"features": {
+		// Install docker in container
+		"ghcr.io/devcontainers/features/docker-in-docker:2": {
+			// Use proprietary docker engine. I get a timeout error when using the default moby engine and loading
+			// microsoft's PGP keys
+			"moby": false
+		}
+	}
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "yarn install",
+	// Configure tool-specific properties.
+	// "customizations": {},
+	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "root"
+}

.dockerignore ADDED Viewed

	@@ -0,0 +1,13 @@

+Dockerfile
+.vscode/
+.idea
+.gitignore
+LICENSE
+README.md
+node_modules/
+.svelte-kit/
+.env*
+!.env
+.env.local
+db
+models/**

.env ADDED Viewed

	@@ -0,0 +1,170 @@

+# Use .env.local to change these variables
+# DO NOT EDIT THIS FILE WITH SENSITIVE DATA
+### Models ###
+# Models are sourced exclusively from an OpenAI-compatible base URL.
+# Example: https://router.huggingface.co/v1
+OPENAI_BASE_URL=https://router.huggingface.co/v1
+# Canonical auth token for any OpenAI-compatible provider
+OPENAI_API_KEY=#your provider API key (works for HF router, OpenAI, LM Studio, etc.).
+# When set to true, user token will be used for inference calls
+USE_USER_TOKEN=false
+# Automatically redirect to oauth login page if user is not logged in, when set to "true"
+AUTOMATIC_LOGIN=false
+### MongoDB ###
+MONGODB_URL=#your mongodb URL here, use chat-ui-db image if you don't want to set this
+MONGODB_DB_NAME=chat-ui
+MONGODB_DIRECT_CONNECTION=false
+## Public app configuration ##
+PUBLIC_APP_NAME=ChatUI # name used as title throughout the app
+PUBLIC_APP_ASSETS=chatui # used to find logos & favicons in static/$PUBLIC_APP_ASSETS
+PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone."# description used throughout the app
+PUBLIC_SMOOTH_UPDATES=false # set to true to enable smoothing of messages client-side, can be CPU intensive
+PUBLIC_ORIGIN=
+PUBLIC_SHARE_PREFIX=
+PUBLIC_GOOGLE_ANALYTICS_ID=
+PUBLIC_PLAUSIBLE_SCRIPT_URL=
+PUBLIC_APPLE_APP_ID=
+COUPLE_SESSION_WITH_COOKIE_NAME=
+# when OPEN_ID is configured, users are required to login after the welcome modal
+OPENID_CLIENT_ID="" # You can set to "__CIMD__" for automatic oauth app creation when deployed
+OPENID_CLIENT_SECRET=
+OPENID_SCOPES="openid profile inference-api"
+USE_USER_TOKEN=
+AUTOMATIC_LOGIN=# if true authentication is required on all routes
+### Local Storage ###
+MONGO_STORAGE_PATH= # where is the db folder stored
+## Models overrides
+MODELS=
+## Task model
+# Optional: set to the model id/name from the `${OPENAI_BASE_URL}/models` list
+# to use for internal tasks (title summarization, etc). If not set, the current model will be used
+TASK_MODEL=
+# Arch router (OpenAI-compatible) endpoint base URL used for route selection
+# Example: https://api.openai.com/v1 or your hosted Arch endpoint
+LLM_ROUTER_ARCH_BASE_URL=
+## LLM Router Configuration
+# Path to routes policy (JSON array). Defaults to llm-router/routes.chat.json
+LLM_ROUTER_ROUTES_PATH=
+# Model used at the Arch router endpoint for selection
+LLM_ROUTER_ARCH_MODEL=
+# Fallback behavior
+# Route to map "other" to (must exist in routes file)
+LLM_ROUTER_OTHER_ROUTE=casual_conversation
+# Model to call if the Arch selection fails entirely
+LLM_ROUTER_FALLBACK_MODEL=
+# Arch selection timeout in milliseconds (default 10000)
+LLM_ROUTER_ARCH_TIMEOUT_MS=10000
+# Maximum length (in characters) for assistant messages sent to router for route selection (default 500)
+LLM_ROUTER_MAX_ASSISTANT_LENGTH=500
+# Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400)
+LLM_ROUTER_MAX_PREV_USER_LENGTH=400
+# Enable router multimodal fallback (set to true to allow image inputs via router)
+LLM_ROUTER_ENABLE_MULTIMODAL=false
+# Optional: specific model to use for multimodal requests. If not set, uses first multimodal model
+LLM_ROUTER_MULTIMODAL_MODEL=
+# Router UI overrides (client-visible)
+# Public display name for the router entry in the model list. Defaults to "Omni".
+PUBLIC_LLM_ROUTER_DISPLAY_NAME=Omni
+# Optional: public logo URL for the router entry. If unset, the UI shows a Carbon icon.
+PUBLIC_LLM_ROUTER_LOGO_URL=
+# Public alias id used for the virtual router model (Omni). Defaults to "omni".
+PUBLIC_LLM_ROUTER_ALIAS_ID=omni
+### Authentication ###
+# Parameters to enable open id login
+OPENID_CONFIG=
+# if it's defined, only these emails will be allowed to use login
+ALLOWED_USER_EMAILS=[]
+# If it's defined, users with emails matching these domains will also be allowed to use login
+ALLOWED_USER_DOMAINS=[]
+# valid alternative redirect URLs for OAuth, used for HuggingChat apps
+ALTERNATIVE_REDIRECT_URLS=[]
+### Cookies
+# name of the cookie used to store the session
+COOKIE_NAME=hf-chat
+# If the value of this cookie changes, the session is destroyed. Useful if chat-ui is deployed on a subpath
+# of your domain, and you want chat ui sessions to reset if the user's auth changes
+COUPLE_SESSION_WITH_COOKIE_NAME=
+# specify secure behaviour for cookies
+COOKIE_SAMESITE=# can be "lax", "strict", "none" or left empty
+COOKIE_SECURE=# set to true to only allow cookies over https
+TRUSTED_EMAIL_HEADER=# header to use to get the user email, only use if you know what you are doing
+### Admin stuff ###
+ADMIN_CLI_LOGIN=true # set to false to disable the CLI login
+ADMIN_TOKEN=#We recommend leaving this empty, you can get the token from the terminal.
+### Feature Flags ###
+LLM_SUMMARIZATION=true # generate conversation titles with LLMs
+ALLOW_IFRAME=true # Allow the app to be embedded in an iframe
+ENABLE_DATA_EXPORT=true
+### Rate limits ###
+# See `src/lib/server/usageLimits.ts`
+# {
+#   conversations: number, # how many conversations
+#   messages: number, # how many messages in a conversation
+#   assistants: number, # how many assistants
+#   messageLength: number, # how long can a message be before we cut it off
+#   messagesPerMinute: number, # how many messages per minute
+#   tools: number # how many tools
+# }
+USAGE_LIMITS={}
+### HuggingFace specific ###
+## Feature flag & admin settings
+# Used for setting early access & admin flags to users
+HF_ORG_ADMIN=
+HF_ORG_EARLY_ACCESS=
+WEBHOOK_URL_REPORT_ASSISTANT=#provide slack webhook url to get notified for reports/feature requests
+### Metrics ###
+METRICS_ENABLED=false
+METRICS_PORT=5565
+LOG_LEVEL=info
+### Parquet export ###
+# Not in use anymore but useful to export conversations to a parquet file as a HuggingFace dataset
+PARQUET_EXPORT_DATASET=
+PARQUET_EXPORT_HF_TOKEN=
+ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or exporting parquet data
+### Config ###
+ENABLE_CONFIG_MANAGER=true
+### Docker build variables ###
+# These values cannot be updated at runtime
+# They need to be passed when building the docker image
+# See https://github.com/huggingface/chat-ui/main/.github/workflows/deploy-prod.yml#L44-L47
+APP_BASE="" # base path of the app, e.g. /chat, left blank as default
+### Body size limit for SvelteKit https://svelte.dev/docs/kit/adapter-node#Environment-variables-BODY_SIZE_LIMIT
+BODY_SIZE_LIMIT=15728640
+PUBLIC_COMMIT_SHA=
+### LEGACY parameters
+ALLOW_INSECURE_COOKIES=false # LEGACY! Use COOKIE_SECURE and COOKIE_SAMESITE instead
+PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
+RATE_LIMIT= # /!\ DEPRECATED definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
+OPENID_NAME_CLAIM="name" # Change to "username" for some providers that do not provide name
+OPENID_PROVIDER_URL=https://huggingface.co # for Google, use https://accounts.google.com
+OPENID_TOLERANCE=
+OPENID_RESOURCE=
+EXPOSE_API=# deprecated, API is now always exposed

.env.ci ADDED Viewed

	@@ -0,0 +1 @@


1	+ MONGODB_URL=mongodb://localhost:27017/

.eslintignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock

.eslintrc.cjs ADDED Viewed

	@@ -0,0 +1,45 @@

+module.exports = {
+	root: true,
+	parser: "@typescript-eslint/parser",
+	extends: [
+		"eslint:recommended",
+		"plugin:@typescript-eslint/recommended",
+		"plugin:svelte/recommended",
+		"prettier",
+	],
+	plugins: ["@typescript-eslint"],
+	ignorePatterns: ["*.cjs"],
+	overrides: [
+		{
+			files: ["*.svelte"],
+			parser: "svelte-eslint-parser",
+			parserOptions: {
+				parser: "@typescript-eslint/parser",
+			},
+		},
+	],
+	parserOptions: {
+		sourceType: "module",
+		ecmaVersion: 2020,
+		extraFileExtensions: [".svelte"],
+	},
+	rules: {
+		"no-empty": "off",
+		"require-yield": "off",
+		"@typescript-eslint/no-explicit-any": "error",
+		"@typescript-eslint/no-non-null-assertion": "error",
+		"@typescript-eslint/no-unused-vars": [
+			// prevent variables with a _ prefix from being marked as unused
+			"error",
+			{
+				argsIgnorePattern: "^_",
+			},
+		],
+		"object-shorthand": ["error", "always"],
+	},
+	env: {
+		browser: true,
+		es2017: true,
+		node: true,
+	},
+};

.gitattributes ADDED Viewed

	@@ -0,0 +1,4 @@

+*/.ttf filter=lfs diff=lfs merge=lfs -text
+static/huggingchat/tools-thumbnail.png filter=lfs diff=lfs merge=lfs -text
+static/huggingchat/assistants-thumbnail.png filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text

.github/ISSUE_TEMPLATE/bug-report--chat-ui-.md ADDED Viewed

	@@ -0,0 +1,43 @@

+---
+name: Bug Report (chat-ui)
+about: Use this for confirmed issues with chat-ui
+title: ""
+labels: bug
+assignees: ""
+---
+## Bug description
+<!-- A clear and concise description of what the bug is. -->
+## Steps to reproduce
+<!-- Steps to reproduce the issue -->
+## Screenshots
+<!-- If applicable, add screenshots to help explain your problem. -->
+## Context
+### Logs
+<!-- Add any logs that are relevant to your issue. Could be browser or server logs. Wrap in code blocks. -->
+```
+// logs here if relevant
+```
+### Specs
+- **OS**:
+- **Browser**:
+- **chat-ui commit**:
+### Config
+<!-- Add the environment variables you've used to setup chat-ui, making sure to redact any secrets. -->
+## Notes
+<!-- Anything else relevant to help the issue get solved -->

.github/ISSUE_TEMPLATE/config-support.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+name: Config Support
+about: Help with setting up chat-ui locally
+title: ""
+labels: support
+assignees: ""
+---
+**Please use the discussions on GitHub** for getting help with setting things up instead of opening an issue: https://github.com/huggingface/chat-ui/discussions

.github/ISSUE_TEMPLATE/feature-request--chat-ui-.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+name: Feature Request (chat-ui)
+about: Suggest new features to be added to chat-ui
+title: ""
+labels: enhancement
+assignees: ""
+---
+## Describe your feature request
+<!-- Short description of what this is about -->
+## Screenshots (if relevant)
+## Implementation idea
+<!-- If you know how this should be implemented in the codebase, share your thoughts. Let us know if you feel like implementing it yourself as well! -->

.github/ISSUE_TEMPLATE/huggingchat.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+name: HuggingChat
+about: Requests & reporting outages on HuggingChat, the hosted version of chat-ui.
+title: ""
+labels: huggingchat
+assignees: ""
+---
+**Do not use GitHub issues** for requesting models on HuggingChat or reporting issues with HuggingChat being down/overloaded.
+**Use the discussions page on the hub instead:** https://huggingface.co/spaces/huggingchat/chat-ui/discussions

.github/release.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+changelog:
+  exclude:
+    labels:
+      - huggingchat
+      - CI/CD
+      - documentation
+  categories:
+    - title: Features
+      labels:
+        - enhancement
+    - title: Bugfixes
+      labels:
+        - bug
+    - title: Other changes
+      labels:
+        - "*"

.github/workflows/build-docs.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+name: Build documentation
+on:
+  push:
+    branches:
+      - main
+      - v*-release
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
+    with:
+      commit_sha: ${{ github.sha }}
+      package: chat-ui
+      additional_args: --not_python_module
+    secrets:
+      token: ${{ secrets.HUGGINGFACE_PUSH }}
+      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}

.github/workflows/build-image.yml ADDED Viewed

	@@ -0,0 +1,142 @@

+name: Build and Publish Image
+permissions:
+  packages: write
+on:
+  push:
+    branches:
+      - "main"
+  pull_request:
+    branches:
+      - "*"
+    paths:
+      - "Dockerfile"
+      - "entrypoint.sh"
+  workflow_dispatch:
+  release:
+    types: [published, edited]
+jobs:
+  build-and-publish-image-with-db:
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Extract package version
+        id: package-version
+        run: |
+          VERSION=$(jq -r .version package.json)
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+          MAJOR=$(echo $VERSION | cut -d '.' -f1)
+          echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
+          MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2)
+          echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
+      - name: Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ghcr.io/huggingface/chat-ui-db
+          tags: |
+            type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}}
+            type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}}
+            type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}}
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=sha,enable={{is_default_branch}}
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Inject slug/short variables
+        uses: rlespinasse/[email protected]
+      - name: Build and Publish Docker Image with DB
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          build-args: |
+            INCLUDE_DB=true
+            PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
+  build-and-publish-image-nodb:
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Extract package version
+        id: package-version
+        run: |
+          VERSION=$(jq -r .version package.json)
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+          MAJOR=$(echo $VERSION | cut -d '.' -f1)
+          echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
+          MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2)
+          echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
+      - name: Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ghcr.io/huggingface/chat-ui
+          tags: |
+            type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}}
+            type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}}
+            type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}}
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=sha,enable={{is_default_branch}}
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Inject slug/short variables
+        uses: rlespinasse/[email protected]
+      - name: Build and Publish Docker Image without DB
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          build-args: |
+            INCLUDE_DB=false
+            PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}

.github/workflows/build-pr-docs.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Build PR Documentation
+on:
+  pull_request:
+    paths:
+      - "docs/source/**"
+      - ".github/workflows/build-pr-docs.yml"
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
+    with:
+      commit_sha: ${{ github.event.pull_request.head.sha }}
+      pr_number: ${{ github.event.number }}
+      package: chat-ui
+      additional_args: --not_python_module

.github/workflows/deploy-dev.yml ADDED Viewed

	@@ -0,0 +1,62 @@

+name: Deploy to ephemeral
+on:
+  pull_request:
+jobs:
+  branch-slug:
+    uses: ./.github/workflows/slugify.yaml
+    with:
+      value: ${{ github.head_ref }}
+  deploy-dev:
+    if: contains(github.event.pull_request.labels.*.name, 'preview')
+    runs-on: ubuntu-latest
+    needs: branch-slug
+    environment:
+      name: dev
+      url: https://${{ needs.branch-slug.outputs.slug }}.chat-dev.huggingface.tech/chat/
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Login to Registry
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - name: Inject slug/short variables
+        uses: rlespinasse/[email protected]
+      - name: Set GITHUB_SHA_SHORT from PR
+        if: env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT != null
+        run: echo "GITHUB_SHA_SHORT=${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT }}" >> $GITHUB_ENV
+      - name: Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            huggingface/chat-ui
+          tags: |
+            type=raw,value=dev-${{ env.GITHUB_SHA_SHORT }}
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Build and Publish HuggingChat image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64
+          cache-to: type=gha,mode=max,scope=amd64
+          cache-from: type=gha,scope=amd64
+          provenance: false
+          build-args: |
+            INCLUDE_DB=false
+            APP_BASE=/chat
+            PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}

.github/workflows/deploy-prod.yml ADDED Viewed

	@@ -0,0 +1,78 @@

+name: Deploy to k8s
+on:
+  # run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  build-and-publish-huggingchat-image:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Login to Registry
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - name: Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            huggingface/chat-ui
+          tags: |
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=sha,enable=true,prefix=sha-,format=short,sha-len=8
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Inject slug/short variables
+        uses: rlespinasse/[email protected]
+      - name: Build and Publish HuggingChat image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64
+          cache-to: type=gha,mode=max,scope=amd64
+          cache-from: type=gha,scope=amd64
+          provenance: false
+          build-args: |
+            INCLUDE_DB=false
+            APP_BASE=/chat
+            PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
+  deploy:
+    name: Deploy on prod
+    runs-on: ubuntu-latest
+    needs: ["build-and-publish-huggingchat-image"]
+    steps:
+      - name: Inject slug/short variables
+        uses: rlespinasse/[email protected]
+      - name: Gen values
+        run: |
+          VALUES=$(cat <<-END
+          image:
+            tag: "sha-${{ env.GITHUB_SHA_SHORT }}"
+          END
+          )
+          echo "VALUES=$(echo "$VALUES" | yq -o=json | jq tostring)" >> $GITHUB_ENV
+      - name: Deploy on infra-deployments
+        uses: aurelien-baudet/workflow-dispatch@v2
+        with:
+          workflow: Update application single value
+          repo: huggingface/infra-deployments
+          wait-for-completion: true
+          wait-for-completion-interval: 10s
+          display-workflow-run-url-interval: 10s
+          ref: refs/heads/main
+          token: ${{ secrets.GIT_TOKEN_INFRA_DEPLOYMENT }}
+          inputs: '{"path": "hub/chat-ui/chat-ui.yaml", "value": ${{ env.VALUES }}, "url": "${{ github.event.head_commit.url }}"}'

.github/workflows/lint-and-test.yml ADDED Viewed

	@@ -0,0 +1,84 @@

+name: Lint and test
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-node@v3
+        with:
+          node-version: "20"
+          cache: "npm"
+      - run: |
+          npm install ci
+      - name: "Checking lint/format errors"
+        run: |
+          npm run lint
+      - name: "Checking type errors"
+        run: |
+          npm run check
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-node@v3
+        with:
+          node-version: "20"
+          cache: "npm"
+      - run: |
+          npm ci
+          npx playwright install
+      - name: "Tests"
+        run: |
+          npm run test
+  build-check:
+    runs-on:
+      group: aws-general-8-plus
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v3
+      - name: Build Docker image
+        run: |
+          docker build \
+            --build-arg INCLUDE_DB=true \
+            -t chat-ui-test:latest .
+      - name: Run Docker container
+        run: |
+          export DOTENV_LOCAL=$(<.env.ci)
+          docker run -d --rm --network=host \
+            --name chat-ui-test \
+            -e DOTENV_LOCAL="$DOTENV_LOCAL" \
+            chat-ui-test:latest
+      - name: Wait for server to start
+        run: |
+          for i in {1..10}; do
+            if curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/ | grep -q "200"; then
+              echo "Server is up"
+              exit 0
+            fi
+            echo "Waiting for server..."
+            sleep 2
+          done
+          echo "Server did not start in time"
+          docker logs chat-ui-test
+          exit 1
+      - name: Stop Docker container
+        if: always()
+        run: |
+          docker stop chat-ui-test || true

.github/workflows/slugify.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+name: Generate Branch Slug
+on:
+  workflow_call:
+    inputs:
+      value:
+        description: 'Value to slugify'
+        required: true
+        type: string
+    outputs:
+      slug:
+        description: 'Slugified value'
+        value: ${{ jobs.generate-slug.outputs.slug }}
+jobs:
+  generate-slug:
+    runs-on: ubuntu-latest
+    outputs:
+      slug: ${{ steps.slugify.outputs.slug }}
+    steps:
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.21'
+      - name: Generate slug
+        id: slugify
+        run: |
+          # Create working directory
+          mkdir -p $HOME/slugify
+          cd $HOME/slugify
+          # Create Go script
+          cat > main.go << 'EOF'
+          package main
+          import (
+              "fmt"
+              "os"
+              "github.com/gosimple/slug"
+          )
+          func main() {
+              if len(os.Args) < 2 {
+                  fmt.Println("Usage: slugify <text>")
+                  os.Exit(1)
+              }
+              text := os.Args[1]
+              slugged := slug.Make(text)
+              fmt.Println(slugged)
+          }
+          EOF
+          # Initialize module and install dependency
+          go mod init slugify
+          go mod tidy
+          go get github.com/gosimple/slug
+          # Build
+          go build -o slugify main.go
+          # Generate slug
+          VALUE="${{ inputs.value }}"
+          echo "Input value: $VALUE"
+          SLUG=$(./slugify "$VALUE")
+          echo "Generated slug: $SLUG"
+          # Export
+          echo "slug=$SLUG" >> $GITHUB_OUTPUT

.github/workflows/trufflehog.yml ADDED Viewed

	@@ -0,0 +1,17 @@

+on:
+  push:
+name: Secret Leaks
+jobs:
+  trufflehog:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Secret Scanning
+        uses: trufflesecurity/trufflehog@main
+        with:
+          extra_args: --results=verified,unknown

.github/workflows/upload-pr-documentation.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+name: Upload PR Documentation
+on:
+  workflow_run:
+    workflows: ["Build PR Documentation"]
+    types:
+      - completed
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
+    with:
+      package_name: chat-ui
+    secrets:
+      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
+      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}

.gitignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+SECRET_CONFIG
+.idea
+!.env.ci
+!.env
+gcp-*.json
+db
+models/*
+!models/add-your-models-here.txt

.husky/lint-stage-config.js ADDED Viewed

	@@ -0,0 +1,4 @@

+export default {
+	"*.{js,jsx,ts,tsx}": ["prettier --write", "eslint --fix", "eslint"],
+	"*.json": ["prettier --write"],
+};

.husky/pre-commit ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ set -e
2	+ npx lint-staged --config ./.husky/lint-stage-config.js

.npmrc ADDED Viewed

	@@ -0,0 +1 @@


1	+ engine-strict=true

.prettierignore ADDED Viewed

	@@ -0,0 +1,14 @@

+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+/chart
+.env
+.env.*
+!.env.example
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock

.prettierrc ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+	"useTabs": true,
+	"trailingComma": "es5",
+	"printWidth": 100,
+	"plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"],
+	"overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
+}

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"version": "0.2.0",
+	"configurations": [
+		{
+			"command": "npm run dev",
+			"name": "Run development server",
+			"request": "launch",
+			"type": "node-terminal"
+		}
+	]
+}

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+	"editor.formatOnSave": true,
+	"editor.defaultFormatter": "esbenp.prettier-vscode",
+	"editor.codeActionsOnSave": {
+		"source.fixAll": "explicit"
+	},
+	"eslint.validate": ["javascript", "svelte"],
+	"[svelte]": {
+		"editor.defaultFormatter": "esbenp.prettier-vscode"
+	},
+	"[typescript]": {
+		"editor.defaultFormatter": "esbenp.prettier-vscode"
+	}
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,93 @@

+# syntax=docker/dockerfile:1
+ARG INCLUDE_DB=false
+FROM node:24-slim AS base
+# install dotenv-cli
+RUN npm install -g dotenv-cli
+# switch to a user that works for spaces
+RUN userdel -r node
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR /app
+# add a .env.local if the user doesn't bind a volume to it
+RUN touch /app/.env.local
+USER root
+RUN apt-get update
+RUN apt-get install -y libgomp1 libcurl4 curl dnsutils nano
+# ensure npm cache dir exists before adjusting ownership
+RUN mkdir -p /home/user/.npm && chown -R 1000:1000 /home/user/.npm
+USER user
+COPY --chown=1000 .env /app/.env
+COPY --chown=1000 entrypoint.sh /app/entrypoint.sh
+COPY --chown=1000 package.json /app/package.json
+COPY --chown=1000 package-lock.json /app/package-lock.json
+RUN chmod +x /app/entrypoint.sh
+FROM node:24 AS builder
+WORKDIR /app
+COPY --link --chown=1000 package-lock.json package.json ./
+ARG APP_BASE=
+ARG PUBLIC_APP_COLOR=
+ENV BODY_SIZE_LIMIT=15728640
+RUN --mount=type=cache,target=/app/.npm \
+    npm set cache /app/.npm && \
+    npm ci
+COPY --link --chown=1000 . .
+RUN git config --global --add safe.directory /app && \
+    npm run build
+# mongo image
+FROM mongo:7 AS mongo
+# image to be used if INCLUDE_DB is false
+FROM base AS local_db_false
+# image to be used if INCLUDE_DB is true
+FROM base AS local_db_true
+# copy mongo from the other stage
+COPY --from=mongo /usr/bin/mongo* /usr/bin/
+ENV MONGODB_URL=mongodb://localhost:27017
+USER root
+RUN mkdir -p /data/db
+RUN chown -R 1000:1000 /data/db
+USER user
+# final image
+FROM local_db_${INCLUDE_DB} AS final
+# build arg to determine if the database should be included
+ARG INCLUDE_DB=false
+ENV INCLUDE_DB=${INCLUDE_DB}
+# svelte requires APP_BASE at build time so it must be passed as a build arg
+ARG APP_BASE=
+ARG PUBLIC_APP_COLOR=
+ARG PUBLIC_COMMIT_SHA=
+ENV PUBLIC_COMMIT_SHA=${PUBLIC_COMMIT_SHA}
+ENV BODY_SIZE_LIMIT=15728640
+#import the build & dependencies
+COPY --from=builder --chown=1000 /app/build /app/build
+COPY --from=builder --chown=1000 /app/node_modules /app/node_modules
+CMD ["/bin/bash", "-c", "/app/entrypoint.sh"]

LICENSE ADDED Viewed

	@@ -0,0 +1,203 @@

+Copyright 2018- The Hugging Face team. All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

PRIVACY.md ADDED Viewed

	@@ -0,0 +1,41 @@

+## Privacy
+> Last updated: Sep 15, 2025
+Basics:
+- Sign-in: You authenticate with your Hugging Face account.
+- Conversation history: Stored so you can access past chats; you can delete any conversation at any time from the UI.
+🗓 Please also consult huggingface.co's main privacy policy at <https://huggingface.co/privacy>. To exercise any of your legal privacy rights, please send an email to <[email protected]>.
+## Data handling and processing
+HuggingChat uses Hugging Face’s Inference Providers to access models from multiple partners via a single API. Depending on the model and availability, inference runs with the corresponding provider.
+- Inference Providers documentation: <https://huggingface.co/docs/inference-providers>
+- Security & Compliance: <https://huggingface.co/docs/inference-providers/security>
+Security and routing facts
+- Hugging Face does not store any user data for training purposes.
+- Hugging Face does not store the request body or the response when routing requests through Hugging Face.
+- Logs are kept for debugging purposes for up to 30 days, but no user data or tokens are stored in those logs.
+- Inference Provider routing uses TLS/SSL to encrypt data in transit.
+- The Hugging Face Hub (which Inference Providers is a feature of) is SOC 2 Type 2 certified. See <https://huggingface.co/docs/hub/security>.
+External providers are responsible for their own security and data handling. Please consult each provider’s respective security and privacy policies via the Inference Providers documentation linked above.
+## Technical details
+[![chat-ui](https://img.shields.io/github/stars/huggingface/chat-ui)](https://github.com/huggingface/chat-ui)
+The app is completely open source, and further development takes place on the [huggingface/chat-ui](https://github.com/huggingface/chat-ui) GitHub repo. We're always open to contributions!
+You can find the production configuration for HuggingChat [here](https://github.com/huggingface/chat-ui/blob/main/chart/env/prod.yaml).
+HuggingChat connects to the OpenAI‑compatible Inference Providers router at `https://router.huggingface.co/v1` to access models across multiple providers. Provider selection may be automatic or fixed depending on the model configuration.
+We welcome any feedback on this app: please participate in the public discussion at <https://huggingface.co/spaces/huggingchat/chat-ui/discussions>
+<a target="_blank" href="https://huggingface.co/spaces/huggingchat/chat-ui/discussions"><img src="https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-discussion-xl.svg" title="open a discussion"></a>

README.md ADDED Viewed

	@@ -0,0 +1,165 @@

+---
+title: Chat Ui
+emoji: 🐠
+colorFrom: pink
+colorTo: gray
+sdk: docker
+pinned: false
+app_port: 3000
+---
+# Chat UI
+![Chat UI repository thumbnail](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/chat-ui/chat-ui-2026.png)
+A chat interface for LLMs. It is a SvelteKit app and it powers the [HuggingChat app on hf.co/chat](https://huggingface.co/chat).
+0. [Quickstart](#quickstart)
+1. [Database Options](#database-options)
+2. [Launch](#launch)
+3. [Optional Docker Image](#optional-docker-image)
+4. [Extra parameters](#extra-parameters)
+5. [Building](#building)
+> [!NOTE]
+> Chat UI only supports OpenAI-compatible APIs via `OPENAI_BASE_URL` and the `/models` endpoint. Provider-specific integrations (legacy `MODELS` env var, GGUF discovery, embeddings, web-search helpers, etc.) are removed, but any service that speaks the OpenAI protocol (llama.cpp server, Ollama, OpenRouter, etc. will work by default).
+> [!NOTE]
+> The old version is still available on the [legacy branch](https://github.com/huggingface/chat-ui/tree/legacy)
+## Quickstart
+Chat UI speaks to OpenAI-compatible APIs only. The fastest way to get running is with the Hugging Face Inference Providers router plus your personal Hugging Face access token.
+**Step 1 – Create `.env.local`:**
+```env
+OPENAI_BASE_URL=https://router.huggingface.co/v1
+OPENAI_API_KEY=hf_************************
+# Fill in once you pick a database option below
+MONGODB_URL=
+```
+`OPENAI_API_KEY` can come from any OpenAI-compatible endpoint you plan to call. Pick the combo that matches your setup and drop the values into `.env.local`:
+| Provider                                      | Example `OPENAI_BASE_URL`          | Example key env                                                         |
+| --------------------------------------------- | ---------------------------------- | ----------------------------------------------------------------------- |
+| Hugging Face Inference Providers router       | `https://router.huggingface.co/v1` | `OPENAI_API_KEY=hf_xxx` (or `HF_TOKEN` legacy alias)                    |
+| llama.cpp server (`llama.cpp --server --api`) | `http://127.0.0.1:8080/v1`         | `OPENAI_API_KEY=sk-local-demo` (any string works; llama.cpp ignores it) |
+| Ollama (with OpenAI-compatible bridge)        | `http://127.0.0.1:11434/v1`        | `OPENAI_API_KEY=ollama`                                                 |
+| OpenRouter                                    | `https://openrouter.ai/api/v1`     | `OPENAI_API_KEY=sk-or-v1-...`                                           |
+| Poe                                           | `https://api.poe.com/v1`           | `OPENAI_API_KEY=pk_...`                                                 |
+Check the root [`.env` template](./.env) for the full list of optional variables you can override.
+**Step 2 – Choose where MongoDB lives:** Either provision a managed cluster (for example MongoDB Atlas) or run a local container. Both approaches are described in [Database Options](#database-options). After you have the URI, drop it into `MONGODB_URL` (and, if desired, set `MONGODB_DB_NAME`).
+**Step 3 – Install and launch the dev server:**
+```bash
+git clone https://github.com/huggingface/chat-ui
+cd chat-ui
+npm install
+npm run dev -- --open
+```
+You now have Chat UI running against the Hugging Face router without needing to host MongoDB yourself.
+## Database Options
+Chat history, users, settings, files, and stats all live in MongoDB. You can point Chat UI at any MongoDB 6/7 deployment.
+### MongoDB Atlas (managed)
+1. Create a free cluster at [mongodb.com](https://www.mongodb.com/pricing).
+2. Add your IP (or `0.0.0.0/0` for development) to the network access list.
+3. Create a database user and copy the connection string.
+4. Paste that string into `MONGODB_URL` in `.env.local`. Keep the default `MONGODB_DB_NAME=chat-ui` or change it per environment.
+Atlas keeps MongoDB off your laptop, which is ideal for teams or cloud deployments.
+### Local MongoDB (container)
+If you prefer to run MongoDB locally:
+```bash
+docker run -d -p 27017:27017 --name mongo-chatui mongo:latest
+```
+Then set `MONGODB_URL=mongodb://localhost:27017` in `.env.local`. You can also supply `MONGO_STORAGE_PATH` if you want Chat UI’s fallback in-memory server to persist under a specific folder.
+## Launch
+After configuring your environment variables, start Chat UI with:
+```bash
+npm install
+npm run dev
+```
+The dev server listens on `http://localhost:5173` by default. Use `npm run build` / `npm run preview` for production builds.
+## Optional Docker Image
+Prefer containerized setup? You can run everything in one container as long as you supply a MongoDB URI (local or hosted):
+```bash
+docker run \
+  -p 3000 \
+  -e MONGODB_URL=mongodb://host.docker.internal:27017 \
+  -e OPENAI_BASE_URL=https://router.huggingface.co/v1 \
+  -e OPENAI_API_KEY=hf_*** \
+  -v db:/data \
+  ghcr.io/huggingface/chat-ui-db:latest
+```
+`host.docker.internal` lets the container reach a MongoDB instance on your host machine; swap it for your Atlas URI if you use the hosted option. All environment variables accepted in `.env.local` can be provided as `-e` flags.
+## Extra parameters
+### Theming
+You can use a few environment variables to customize the look and feel of chat-ui. These are by default:
+```env
+PUBLIC_APP_NAME=ChatUI
+PUBLIC_APP_ASSETS=chatui
+PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone."
+PUBLIC_APP_DATA_SHARING=
+```
+- `PUBLIC_APP_NAME` The name used as a title throughout the app.
+- `PUBLIC_APP_ASSETS` Is used to find logos & favicons in `static/$PUBLIC_APP_ASSETS`, current options are `chatui` and `huggingchat`.
+- `PUBLIC_APP_DATA_SHARING` Can be set to 1 to add a toggle in the user settings that lets your users opt-in to data sharing with models creator.
+### Models
+This build does not use the `MODELS` env var or GGUF discovery. Configure models via `OPENAI_BASE_URL` only; Chat UI will fetch `${OPENAI_BASE_URL}/models` and populate the list automatically. Authorization uses `OPENAI_API_KEY` (preferred). `HF_TOKEN` remains a legacy alias.
+### LLM Router (Optional)
+Chat UI can perform client-side routing [katanemo/Arch-Router-1.5B](https://huggingface.co/katanemo/Arch-Router-1.5B) as the routing model without running a separate router service. The UI exposes a virtual model alias called "Omni" (configurable) that, when selected, chooses the best route/model for each message.
+- Provide a routes policy JSON via `LLM_ROUTER_ROUTES_PATH`. No sample file ships with this branch, so you must point the variable to a JSON array you create yourself (for example, commit one in your project like `config/routes.chat.json`). Each route entry needs `name`, `description`, `primary_model`, and optional `fallback_models`.
+- Configure the Arch router selection endpoint with `LLM_ROUTER_ARCH_BASE_URL` (OpenAI-compatible `/chat/completions`) and `LLM_ROUTER_ARCH_MODEL` (e.g. `router/omni`). The Arch call reuses `OPENAI_API_KEY` for auth.
+- Map `other` to a concrete route via `LLM_ROUTER_OTHER_ROUTE` (default: `casual_conversation`). If Arch selection fails, calls fall back to `LLM_ROUTER_FALLBACK_MODEL`.
+- Selection timeout can be tuned via `LLM_ROUTER_ARCH_TIMEOUT_MS` (default 10000).
+- Omni alias configuration: `PUBLIC_LLM_ROUTER_ALIAS_ID` (default `omni`), `PUBLIC_LLM_ROUTER_DISPLAY_NAME` (default `Omni`), and optional `PUBLIC_LLM_ROUTER_LOGO_URL`.
+When you select Omni in the UI, Chat UI will:
+- Call the Arch endpoint once (non-streaming) to pick the best route for the last turns.
+- Emit RouterMetadata immediately (route and actual model used) so the UI can display it.
+- Stream from the selected model via your configured `OPENAI_BASE_URL`. On errors, it tries route fallbacks.
+## Building
+To create a production version of your app:
+```bash
+npm run build
+```
+You can preview the production build with `npm run preview`.
+> To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.

chart/Chart.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+apiVersion: v2
+name: chat-ui
+version: 0.0.1-latest
+type: application
+icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg

chart/env/dev.yaml ADDED Viewed

	@@ -0,0 +1,205 @@

+image:
+  repository: huggingface
+  name: chat-ui
+#nodeSelector:
+#  role-huggingchat: "true"
+#
+#tolerations:
+#  - key: "huggingface.co/huggingchat"
+#    operator: "Equal"
+#    value: "true"
+#    effect: "NoSchedule"
+serviceAccount:
+  enabled: true
+  create: true
+  name: huggingchat-ephemeral
+ingress:
+  enabled: false
+ingressInternal:
+  enabled: true
+  path: "/chat"
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.chat-dev.huggingface.tech"
+    alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
+    alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
+    alb.ingress.kubernetes.io/group.name: "chat-dev-internal-public"
+    alb.ingress.kubernetes.io/load-balancer-name: "chat-dev-internal-public"
+    alb.ingress.kubernetes.io/ssl-redirect: "443"
+    alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
+    alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
+    alb.ingress.kubernetes.io/target-type: "ip"
+    alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/bc3eb446-1c04-432c-ac6b-946a88d725da"
+    kubernetes.io/ingress.class: "alb"
+envVars:
+  TEST: "test"
+  COUPLE_SESSION_WITH_COOKIE_NAME: "token"
+  OPENID_SCOPES: "openid profile inference-api"
+  USE_USER_TOKEN: "true"
+  AUTOMATIC_LOGIN: "false"
+  ADDRESS_HEADER: "X-Forwarded-For"
+  APP_BASE: "/chat"
+  ALLOW_IFRAME: "false"
+  COOKIE_SAMESITE: "lax"
+  COOKIE_SECURE: "true"
+  EXPOSE_API: "true"
+  METRICS_ENABLED: "true"
+  LOG_LEVEL: "debug"
+  NODE_LOG_STRUCTURED_DATA: "true"
+  OPENAI_BASE_URL: "https://router.huggingface.co/v1"
+  PUBLIC_APP_ASSETS: "huggingchat"
+  PUBLIC_APP_NAME: "HuggingChat"
+  PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
+  PUBLIC_ORIGIN: "https://huggingface.co"
+  PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"
+  TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
+  LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
+  LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
+  LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
+  LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
+  LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
+  LLM_ROUTER_ENABLE_MULTIMODAL: "true"
+  LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Thinking"
+  PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
+  PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
+  PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
+  MODELS: >
+    [
+      { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
+      { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
+      { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
+      { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
+      { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
+      { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
+      { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
+      { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
+      { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
+      { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
+      { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
+      { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
+      { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
+      { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
+      { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
+      { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
+      { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
+      { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
+      { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
+      { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
+      { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
+      { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
+      { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
+      { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
+      { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
+      { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
+      { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
+      { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
+      { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
+      { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
+      { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
+      { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
+      { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
+      { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
+      { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
+      { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
+      { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
+      { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
+      { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
+      { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
+      { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
+      { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
+      { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
+      { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
+      { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
+      { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
+      { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
+      { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
+      { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
+      { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
+      { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
+      { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
+      { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
+      { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
+      { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
+      { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
+      { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
+      { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
+      { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
+      { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
+      { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
+      { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
+      { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
+      { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
+      { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
+      { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
+      { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
+      { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
+      { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
+      { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
+      { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
+      { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
+      { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
+      { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
+      { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
+      { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
+      { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
+      { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
+      { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
+      { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
+      { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
+      { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
+      { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
+      { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
+      { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
+      { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
+      { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
+      { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
+      { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
+      { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
+      { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
+      { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
+      { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
+      { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
+      { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
+      { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
+      { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
+      { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
+      { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
+      { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
+      { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
+      { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
+      { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
+      { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
+      { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
+      { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
+      { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
+      { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
+    ]
+infisical:
+  enabled: true
+  env: "ephemeral-us-east-1"
+replicas: 1
+autoscaling:
+  enabled: false
+resources:
+  requests:
+    cpu: 2
+    memory: 4Gi
+  limits:
+    cpu: 4
+    memory: 8Gi

chart/env/prod.yaml ADDED Viewed

	@@ -0,0 +1,218 @@

+image:
+  repository: huggingface
+  name: chat-ui
+nodeSelector:
+  role-huggingchat: "true"
+tolerations:
+  - key: "huggingface.co/huggingchat"
+    operator: "Equal"
+    value: "true"
+    effect: "NoSchedule"
+serviceAccount:
+  enabled: true
+  create: true
+  name: huggingchat-prod
+ingress:
+  path: "/chat"
+  annotations:
+    alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
+    alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
+    alb.ingress.kubernetes.io/load-balancer-name: "hub-utils-prod-cloudfront"
+    alb.ingress.kubernetes.io/group.name: "hub-utils-prod-cloudfront"
+    alb.ingress.kubernetes.io/scheme: "internal"
+    alb.ingress.kubernetes.io/ssl-redirect: "443"
+    alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
+    alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
+    alb.ingress.kubernetes.io/target-type: "ip"
+    alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/5b25b145-75db-4837-b9f3-7f238ba8a9c7,arn:aws:acm:us-east-1:707930574880:certificate/bfdf509c-f44b-400f-b9e1-6f7a861abe91"
+    kubernetes.io/ingress.class: "alb"
+ingressInternal:
+  enabled: true
+  path: "/chat"
+  annotations:
+    alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
+    alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
+    alb.ingress.kubernetes.io/group.name: "hub-prod-internal-public"
+    alb.ingress.kubernetes.io/load-balancer-name: "hub-prod-internal-public"
+    alb.ingress.kubernetes.io/ssl-redirect: "443"
+    alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
+    alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
+    alb.ingress.kubernetes.io/target-type: "ip"
+    alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/5b25b145-75db-4837-b9f3-7f238ba8a9c7,arn:aws:acm:us-east-1:707930574880:certificate/bfdf509c-f44b-400f-b9e1-6f7a861abe91"
+    kubernetes.io/ingress.class: "alb"
+envVars:
+  COUPLE_SESSION_WITH_COOKIE_NAME: "token"
+  OPENID_SCOPES: "openid profile inference-api"
+  USE_USER_TOKEN: "true"
+  AUTOMATIC_LOGIN: "false"
+  ADDRESS_HEADER: "X-Forwarded-For"
+  APP_BASE: "/chat"
+  ALLOW_IFRAME: "false"
+  COOKIE_SAMESITE: "lax"
+  COOKIE_SECURE: "true"
+  EXPOSE_API: "true"
+  METRICS_ENABLED: "true"
+  LOG_LEVEL: "debug"
+  NODE_LOG_STRUCTURED_DATA: "true"
+  OPENAI_BASE_URL: "https://router.huggingface.co/v1"
+  PUBLIC_APP_ASSETS: "huggingchat"
+  PUBLIC_APP_NAME: "HuggingChat"
+  PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
+  PUBLIC_ORIGIN: "https://huggingface.co"
+  PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"
+  TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
+  LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
+  LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
+  LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
+  LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
+  LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
+  LLM_ROUTER_ENABLE_MULTIMODAL: "true"
+  LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Thinking"
+  PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
+  PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
+  PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
+  MODELS: >
+    [
+      { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
+      { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
+      { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
+      { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
+      { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
+      { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
+      { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
+      { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
+      { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
+      { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
+      { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
+      { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
+      { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
+      { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
+      { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
+      { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
+      { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
+      { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
+      { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
+      { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
+      { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
+      { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
+      { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
+      { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
+      { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
+      { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
+      { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
+      { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
+      { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
+      { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
+      { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
+      { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
+      { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
+      { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
+      { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
+      { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
+      { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
+      { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
+      { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
+      { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
+      { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
+      { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
+      { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
+      { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
+      { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
+      { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
+      { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
+      { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
+      { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
+      { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
+      { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
+      { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
+      { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
+      { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
+      { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
+      { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
+      { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
+      { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
+      { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
+      { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
+      { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
+      { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
+      { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
+      { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
+      { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
+      { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
+      { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
+      { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
+      { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
+      { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
+      { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
+      { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
+      { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
+      { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
+      { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
+      { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
+      { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
+      { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
+      { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
+      { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
+      { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
+      { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
+      { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
+      { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
+      { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
+      { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
+      { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
+      { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
+      { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
+      { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
+      { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
+      { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
+      { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
+      { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
+      { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
+      { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
+      { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
+      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
+      { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
+      { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
+      { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
+      { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
+      { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
+      { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
+      { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
+      { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
+      { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
+      { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
+      { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
+    ]
+infisical:
+  enabled: true
+  env: "prod-us-east-1"
+autoscaling:
+  enabled: true
+  minReplicas: 2
+  maxReplicas: 30
+  targetMemoryUtilizationPercentage: "50"
+  targetCPUUtilizationPercentage: "50"
+resources:
+  requests:
+    cpu: 2
+    memory: 4Gi
+  limits:
+    cpu: 4
+    memory: 8Gi

chart/templates/_helpers.tpl ADDED Viewed

	@@ -0,0 +1,22 @@

+{{- define "name" -}}
+{{- default $.Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- define "app.name" -}}
+chat-ui
+{{- end -}}
+{{- define "labels.standard" -}}
+release: {{ $.Release.Name | quote }}
+heritage: {{ $.Release.Service | quote }}
+chart: "{{ include "name" . }}"
+app: "{{ include "app.name" . }}"
+{{- end -}}
+{{- define "labels.resolver" -}}
+release: {{ $.Release.Name | quote }}
+heritage: {{ $.Release.Service | quote }}
+chart: "{{ include "name" . }}"
+app: "{{ include "app.name" . }}-resolver"
+{{- end -}}

chart/templates/config.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+apiVersion: v1
+kind: ConfigMap
+metadata:
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  name: {{ include "name" . }}
+  namespace: {{ .Release.Namespace }}
+data:
+  {{- range $key, $value := $.Values.envVars }}
+  {{ $key }}: {{ $value | quote }}
+  {{- end }}

chart/templates/deployment.yaml ADDED Viewed

	@@ -0,0 +1,81 @@

+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  name: {{ include "name" . }}
+  namespace: {{ .Release.Namespace }}
+  {{- if .Values.infisical.enabled }}
+  annotations:
+    secrets.infisical.com/auto-reload: "true"
+  {{- end }}
+spec:
+  progressDeadlineSeconds: 600
+  {{- if not $.Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicas }}
+  {{- end }}
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels: {{ include "labels.standard" . | nindent 6 }}
+  strategy:
+    rollingUpdate:
+      maxSurge: 25%
+      maxUnavailable: 25%
+    type: RollingUpdate
+  template:
+    metadata:
+      labels: {{ include "labels.standard" . | nindent 8 }}
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/config.yaml") . | sha256sum }}
+        {{- if $.Values.envVars.NODE_LOG_STRUCTURED_DATA }}
+        co.elastic.logs/json.expand_keys: "true"
+        {{- end }}
+    spec:
+      {{- if .Values.serviceAccount.enabled }}
+      serviceAccountName: "{{ .Values.serviceAccount.name | default (include "name" .) }}"
+      {{- end }}
+      containers:
+        - name: chat-ui
+          image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          readinessProbe:
+            failureThreshold: 30
+            periodSeconds: 10
+            httpGet:
+              path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck
+              port: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
+          livenessProbe:
+            failureThreshold: 30
+            periodSeconds: 10
+            httpGet:
+              path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck
+              port: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
+          ports:
+            - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
+              name: http
+              protocol: TCP
+            {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
+            - containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
+              name: metrics
+              protocol: TCP
+            {{- end }}
+          resources: {{ toYaml .Values.resources | nindent 12 }}
+          {{- with $.Values.extraEnv }}
+          env:
+            {{- toYaml . | nindent 14 }}
+          {{- end }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "name" . }}
+          {{- if $.Values.infisical.enabled }}
+            - secretRef:
+                name: {{ include "name" $ }}-secs
+          {{- end }}
+          {{- with $.Values.extraEnvFrom }}
+            {{- toYaml . | nindent 14 }}
+          {{- end }}
+      nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }}
+      tolerations: {{ toYaml .Values.tolerations | nindent 8 }}
+      volumes:
+        - name: config
+          configMap:
+            name: {{ include "name" . }}

chart/templates/hpa.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+{{- if $.Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  name: {{ include "name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "name" . }}
+  minReplicas: {{ $.Values.autoscaling.minReplicas }}
+  maxReplicas: {{ $.Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if ne "" $.Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ $.Values.autoscaling.targetMemoryUtilizationPercentage | int }}
+    {{- end }}
+    {{- if ne "" $.Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ $.Values.autoscaling.targetCPUUtilizationPercentage | int }}
+    {{- end }}
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 600
+      policies:
+        - type: Percent
+          value: 10
+          periodSeconds: 60
+    scaleUp:
+      stabilizationWindowSeconds: 0
+      policies:
+        - type: Pods
+          value: 1
+          periodSeconds: 30
+{{- end }}

chart/templates/infisical.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+{{- if .Values.infisical.enabled }}
+apiVersion: secrets.infisical.com/v1alpha1
+kind: InfisicalSecret
+metadata:
+  name: {{ include "name" $ }}-infisical-secret
+  namespace: {{ $.Release.Namespace }}
+spec:
+  authentication:
+    universalAuth:
+      credentialsRef:
+        secretName: {{ .Values.infisical.operatorSecretName | quote }}
+        secretNamespace: {{ .Values.infisical.operatorSecretNamespace | quote }}
+      secretsScope:
+        envSlug: {{ .Values.infisical.env | quote }}
+        projectSlug: {{ .Values.infisical.project | quote }}
+        secretsPath: /
+  hostAPI: {{ .Values.infisical.url | quote }}
+  managedSecretReference:
+    creationPolicy: Owner
+    secretName: {{ include "name" $ }}-secs
+    secretNamespace: {{ .Release.Namespace | quote }}
+    secretType: Opaque
+  resyncInterval: {{ .Values.infisical.resyncInterval }}
+{{- end }}

chart/templates/ingress-internal.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+{{- if $.Values.ingressInternal.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations: {{ toYaml .Values.ingressInternal.annotations | nindent 4 }}
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  name: {{ include "name" . }}-internal
+  namespace: {{ .Release.Namespace }}
+spec:
+  {{ if $.Values.ingressInternal.className }}
+  ingressClassName: {{ .Values.ingressInternal.className }}
+  {{ end }}
+  {{- with .Values.ingressInternal.tls }}
+  tls:
+    - hosts:
+        - {{ $.Values.domain | quote }}
+      {{- with .secretName }}
+      secretName: {{ . }}
+      {{- end }}
+  {{- end }}
+  rules:
+    - host: {{ .Values.domain }}
+      http:
+        paths:
+          - backend:
+              service:
+                name: {{ include "name" . }}
+                port:
+                  name: http
+            path: {{ $.Values.ingressInternal.path | default "/" }}
+            pathType: Prefix
+{{- end }}

chart/templates/ingress.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+{{- if $.Values.ingress.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations: {{ toYaml .Values.ingress.annotations | nindent 4 }}
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  name: {{ include "name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  {{ if $.Values.ingress.className }}
+  ingressClassName: {{ .Values.ingress.className }}
+  {{ end }}
+  {{- with .Values.ingress.tls }}
+  tls:
+    - hosts:
+        - {{ $.Values.domain | quote }}
+      {{- with .secretName }}
+      secretName: {{ . }}
+      {{- end }}
+  {{- end }}
+  rules:
+    - host: {{ .Values.domain }}
+      http:
+        paths:
+          - backend:
+              service:
+                name: {{ include "name" . }}
+                port:
+                  name: http
+            path: {{ $.Values.ingress.path | default "/" }}
+            pathType: Prefix
+{{- end }}

chart/templates/network-policy.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+{{- if $.Values.networkPolicy.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: {{ include "name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  egress:
+    - ports:
+        - port: 53
+          protocol: UDP
+      to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: kube-system
+          podSelector:
+            matchLabels:
+              k8s-app: kube-dns
+    - to:
+        {{- range $ip := .Values.networkPolicy.allowedBlocks }}
+        - ipBlock:
+            cidr: {{ $ip | quote }}
+        {{- end }}
+    - to:
+        - ipBlock:
+            cidr: 0.0.0.0/0
+            except:
+              - 10.0.0.0/8
+              - 172.16.0.0/12
+              - 192.168.0.0/16
+              - 169.254.169.254/32
+  podSelector:
+    matchLabels: {{ include "labels.standard" . | nindent 6 }}
+  policyTypes:
+    - Egress
+{{- end }}

chart/templates/service-account.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+{{- if and .Values.serviceAccount.enabled .Values.serviceAccount.create }}
+apiVersion: v1
+kind: ServiceAccount
+automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
+metadata:
+  name: "{{ .Values.serviceAccount.name | default (include "name" .) }}"
+  namespace: {{ .Release.Namespace }}
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{- end }}

chart/templates/service-monitor.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+{{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  labels: {{ include "labels.standard" . | nindent 4 }}
+  name: {{ include "name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  selector:
+    matchLabels: {{ include "labels.standard" . | nindent 6 }}
+  endpoints:
+    - port: metrics
+      path: /metrics
+      interval: 10s
+      scheme: http
+      scrapeTimeout: 10s
+{{- end }}

chart/templates/service.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+apiVersion: v1
+kind: Service
+metadata:
+  name: "{{ include "name" . }}"
+  annotations: {{ toYaml .Values.service.annotations | nindent 4 }}
+  namespace: {{ .Release.Namespace }}
+  labels: {{ include "labels.standard" . | nindent 4 }}
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: http
+  {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
+  - name: metrics
+    port: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
+    protocol: TCP
+    targetPort: metrics
+  {{- end }}
+  selector: {{ include "labels.standard" . | nindent 4 }}
+  type: {{.Values.service.type}}

chart/values.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+image:
+  repository: ghcr.io/huggingface
+  name: chat-ui
+  tag: 0.0.0-latest
+  pullPolicy: IfNotPresent
+replicas: 3
+domain: huggingface.co
+networkPolicy:
+  enabled: false
+  allowedBlocks: []
+service:
+  type: NodePort
+  annotations: { }
+serviceAccount:
+  enabled: false
+  create: false
+  name: ""
+  automountServiceAccountToken: true
+  annotations: { }
+ingress:
+  enabled: true
+  path: "/"
+  annotations: { }
+  # className: "nginx"
+  tls: { }
+    # secretName: XXX
+ingressInternal:
+  enabled: false
+  path: "/"
+  annotations: { }
+  # className: "nginx"
+  tls: { }
+resources:
+  requests:
+    cpu: 2
+    memory: 4Gi
+  limits:
+    cpu: 2
+    memory: 4Gi
+nodeSelector: {}
+tolerations: []
+envVars: { }
+infisical:
+  enabled: false
+  env: ""
+  project: "huggingchat-v2-a1"
+  url: ""
+  resyncInterval: 60
+  operatorSecretName: "huggingchat-operator-secrets"
+  operatorSecretNamespace: "hub-utils"
+# Allow to environment injections on top or instead of infisical
+extraEnvFrom: []
+extraEnv: []
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 2
+  targetMemoryUtilizationPercentage: ""
+  targetCPUUtilizationPercentage: ""
+## Metrics removed; monitoring configuration no longer used

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+# For development only
+# Set MONGODB_URL=mongodb://localhost:27017 in .env.local to use this container
+services:
+  mongo:
+    image: mongo:8
+    hostname: mongodb
+    ports:
+      - ${LOCAL_MONGO_PORT:-27017}:27017
+    command: --replSet rs0 --bind_ip_all #--setParameter notablescan=1
+    mem_limit: "5g"
+    mem_reservation: "3g"
+    healthcheck:
+      # need to specify the hostname here because the default is the container name, and we run the app outside of docker
+      test: test $$(mongosh --quiet --eval 'try {rs.status().ok} catch(e) {rs.initiate({_id:"rs0",members:[{_id:0,host:"127.0.0.1:${LOCAL_MONGO_PORT:-27017}"}]}).ok}') -eq 1
+      interval: 5s
+    volumes:
+      - mongodb-data:/data/db
+    restart: always
+volumes:
+  mongodb-data: