coyotte508 commited on
Commit
fc69895
·
0 Parent(s):

A new start

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .devcontainer/Dockerfile +9 -0
  2. .devcontainer/devcontainer.json +36 -0
  3. .dockerignore +13 -0
  4. .env +170 -0
  5. .env.ci +1 -0
  6. .eslintignore +13 -0
  7. .eslintrc.cjs +45 -0
  8. .gitattributes +4 -0
  9. .github/ISSUE_TEMPLATE/bug-report--chat-ui-.md +43 -0
  10. .github/ISSUE_TEMPLATE/config-support.md +9 -0
  11. .github/ISSUE_TEMPLATE/feature-request--chat-ui-.md +17 -0
  12. .github/ISSUE_TEMPLATE/huggingchat.md +11 -0
  13. .github/release.yml +16 -0
  14. .github/workflows/build-docs.yml +18 -0
  15. .github/workflows/build-image.yml +142 -0
  16. .github/workflows/build-pr-docs.yml +20 -0
  17. .github/workflows/deploy-dev.yml +62 -0
  18. .github/workflows/deploy-prod.yml +78 -0
  19. .github/workflows/lint-and-test.yml +84 -0
  20. .github/workflows/slugify.yaml +72 -0
  21. .github/workflows/trufflehog.yml +17 -0
  22. .github/workflows/upload-pr-documentation.yml +16 -0
  23. .gitignore +17 -0
  24. .husky/lint-stage-config.js +4 -0
  25. .husky/pre-commit +2 -0
  26. .npmrc +1 -0
  27. .prettierignore +14 -0
  28. .prettierrc +7 -0
  29. .vscode/launch.json +11 -0
  30. .vscode/settings.json +14 -0
  31. Dockerfile +93 -0
  32. LICENSE +203 -0
  33. PRIVACY.md +41 -0
  34. README.md +165 -0
  35. chart/Chart.yaml +5 -0
  36. chart/env/dev.yaml +205 -0
  37. chart/env/prod.yaml +218 -0
  38. chart/templates/_helpers.tpl +22 -0
  39. chart/templates/config.yaml +10 -0
  40. chart/templates/deployment.yaml +81 -0
  41. chart/templates/hpa.yaml +45 -0
  42. chart/templates/infisical.yaml +24 -0
  43. chart/templates/ingress-internal.yaml +32 -0
  44. chart/templates/ingress.yaml +32 -0
  45. chart/templates/network-policy.yaml +36 -0
  46. chart/templates/service-account.yaml +13 -0
  47. chart/templates/service-monitor.yaml +17 -0
  48. chart/templates/service.yaml +21 -0
  49. chart/values.yaml +73 -0
  50. docker-compose.yml +21 -0
.devcontainer/Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM mcr.microsoft.com/devcontainers/typescript-node:1-22-bookworm
2
+
3
+ # Install MongoDB tools (mongosh, mongorestore, mongodump) directly from MongoDB repository
4
+ RUN curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | gpg --dearmor -o /usr/share/keyrings/mongodb-server-8.0.gpg && \
5
+ echo "deb [ signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] http://repo.mongodb.org/apt/debian bookworm/mongodb-org/8.0 main" | tee /etc/apt/sources.list.d/mongodb-org-8.0.list && \
6
+ apt-get update && \
7
+ apt-get install -y mongodb-mongosh mongodb-database-tools vim && \
8
+ apt-get autoremove -y && \
9
+ rm -rf /var/lib/apt/lists/*
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
+ // README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node
3
+ {
4
+ "name": "Node.js & TypeScript",
5
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
6
+ "build": {
7
+ "dockerfile": "Dockerfile"
8
+ },
9
+
10
+ "customizations": {
11
+ "vscode": {
12
+ "extensions": ["esbenp.prettier-vscode", "dbaeumer.vscode-eslint", "svelte.svelte-vscode"]
13
+ }
14
+ },
15
+
16
+ "features": {
17
+ // Install docker in container
18
+ "ghcr.io/devcontainers/features/docker-in-docker:2": {
19
+ // Use proprietary docker engine. I get a timeout error when using the default moby engine and loading
20
+ // microsoft's PGP keys
21
+ "moby": false
22
+ }
23
+ }
24
+
25
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
26
+ // "forwardPorts": [],
27
+
28
+ // Use 'postCreateCommand' to run commands after the container is created.
29
+ // "postCreateCommand": "yarn install",
30
+
31
+ // Configure tool-specific properties.
32
+ // "customizations": {},
33
+
34
+ // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
35
+ // "remoteUser": "root"
36
+ }
.dockerignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Dockerfile
2
+ .vscode/
3
+ .idea
4
+ .gitignore
5
+ LICENSE
6
+ README.md
7
+ node_modules/
8
+ .svelte-kit/
9
+ .env*
10
+ !.env
11
+ .env.local
12
+ db
13
+ models/**
.env ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use .env.local to change these variables
2
+ # DO NOT EDIT THIS FILE WITH SENSITIVE DATA
3
+
4
+ ### Models ###
5
+ # Models are sourced exclusively from an OpenAI-compatible base URL.
6
+ # Example: https://router.huggingface.co/v1
7
+ OPENAI_BASE_URL=https://router.huggingface.co/v1
8
+
9
+ # Canonical auth token for any OpenAI-compatible provider
10
+ OPENAI_API_KEY=#your provider API key (works for HF router, OpenAI, LM Studio, etc.).
11
+ # When set to true, user token will be used for inference calls
12
+ USE_USER_TOKEN=false
13
+ # Automatically redirect to oauth login page if user is not logged in, when set to "true"
14
+ AUTOMATIC_LOGIN=false
15
+
16
+ ### MongoDB ###
17
+ MONGODB_URL=#your mongodb URL here, use chat-ui-db image if you don't want to set this
18
+ MONGODB_DB_NAME=chat-ui
19
+ MONGODB_DIRECT_CONNECTION=false
20
+
21
+
22
+ ## Public app configuration ##
23
+ PUBLIC_APP_NAME=ChatUI # name used as title throughout the app
24
+ PUBLIC_APP_ASSETS=chatui # used to find logos & favicons in static/$PUBLIC_APP_ASSETS
25
+ PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone."# description used throughout the app
26
+ PUBLIC_SMOOTH_UPDATES=false # set to true to enable smoothing of messages client-side, can be CPU intensive
27
+ PUBLIC_ORIGIN=
28
+ PUBLIC_SHARE_PREFIX=
29
+ PUBLIC_GOOGLE_ANALYTICS_ID=
30
+ PUBLIC_PLAUSIBLE_SCRIPT_URL=
31
+ PUBLIC_APPLE_APP_ID=
32
+
33
+ COUPLE_SESSION_WITH_COOKIE_NAME=
34
+ # when OPEN_ID is configured, users are required to login after the welcome modal
35
+ OPENID_CLIENT_ID="" # You can set to "__CIMD__" for automatic oauth app creation when deployed
36
+ OPENID_CLIENT_SECRET=
37
+ OPENID_SCOPES="openid profile inference-api"
38
+ USE_USER_TOKEN=
39
+ AUTOMATIC_LOGIN=# if true authentication is required on all routes
40
+
41
+ ### Local Storage ###
42
+ MONGO_STORAGE_PATH= # where is the db folder stored
43
+
44
+ ## Models overrides
45
+ MODELS=
46
+
47
+ ## Task model
48
+ # Optional: set to the model id/name from the `${OPENAI_BASE_URL}/models` list
49
+ # to use for internal tasks (title summarization, etc). If not set, the current model will be used
50
+ TASK_MODEL=
51
+
52
+ # Arch router (OpenAI-compatible) endpoint base URL used for route selection
53
+ # Example: https://api.openai.com/v1 or your hosted Arch endpoint
54
+ LLM_ROUTER_ARCH_BASE_URL=
55
+
56
+ ## LLM Router Configuration
57
+ # Path to routes policy (JSON array). Defaults to llm-router/routes.chat.json
58
+ LLM_ROUTER_ROUTES_PATH=
59
+
60
+ # Model used at the Arch router endpoint for selection
61
+ LLM_ROUTER_ARCH_MODEL=
62
+
63
+ # Fallback behavior
64
+ # Route to map "other" to (must exist in routes file)
65
+ LLM_ROUTER_OTHER_ROUTE=casual_conversation
66
+ # Model to call if the Arch selection fails entirely
67
+ LLM_ROUTER_FALLBACK_MODEL=
68
+ # Arch selection timeout in milliseconds (default 10000)
69
+ LLM_ROUTER_ARCH_TIMEOUT_MS=10000
70
+ # Maximum length (in characters) for assistant messages sent to router for route selection (default 500)
71
+ LLM_ROUTER_MAX_ASSISTANT_LENGTH=500
72
+ # Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400)
73
+ LLM_ROUTER_MAX_PREV_USER_LENGTH=400
74
+
75
+ # Enable router multimodal fallback (set to true to allow image inputs via router)
76
+ LLM_ROUTER_ENABLE_MULTIMODAL=false
77
+ # Optional: specific model to use for multimodal requests. If not set, uses first multimodal model
78
+ LLM_ROUTER_MULTIMODAL_MODEL=
79
+
80
+ # Router UI overrides (client-visible)
81
+ # Public display name for the router entry in the model list. Defaults to "Omni".
82
+ PUBLIC_LLM_ROUTER_DISPLAY_NAME=Omni
83
+ # Optional: public logo URL for the router entry. If unset, the UI shows a Carbon icon.
84
+ PUBLIC_LLM_ROUTER_LOGO_URL=
85
+ # Public alias id used for the virtual router model (Omni). Defaults to "omni".
86
+ PUBLIC_LLM_ROUTER_ALIAS_ID=omni
87
+
88
+ ### Authentication ###
89
+ # Parameters to enable open id login
90
+ OPENID_CONFIG=
91
+ # if it's defined, only these emails will be allowed to use login
92
+ ALLOWED_USER_EMAILS=[]
93
+ # If it's defined, users with emails matching these domains will also be allowed to use login
94
+ ALLOWED_USER_DOMAINS=[]
95
+ # valid alternative redirect URLs for OAuth, used for HuggingChat apps
96
+ ALTERNATIVE_REDIRECT_URLS=[]
97
+ ### Cookies
98
+ # name of the cookie used to store the session
99
+ COOKIE_NAME=hf-chat
100
+ # If the value of this cookie changes, the session is destroyed. Useful if chat-ui is deployed on a subpath
101
+ # of your domain, and you want chat ui sessions to reset if the user's auth changes
102
+ COUPLE_SESSION_WITH_COOKIE_NAME=
103
+ # specify secure behaviour for cookies
104
+ COOKIE_SAMESITE=# can be "lax", "strict", "none" or left empty
105
+ COOKIE_SECURE=# set to true to only allow cookies over https
106
+ TRUSTED_EMAIL_HEADER=# header to use to get the user email, only use if you know what you are doing
107
+
108
+ ### Admin stuff ###
109
+ ADMIN_CLI_LOGIN=true # set to false to disable the CLI login
110
+ ADMIN_TOKEN=#We recommend leaving this empty, you can get the token from the terminal.
111
+
112
+ ### Feature Flags ###
113
+ LLM_SUMMARIZATION=true # generate conversation titles with LLMs
114
+
115
+ ALLOW_IFRAME=true # Allow the app to be embedded in an iframe
116
+ ENABLE_DATA_EXPORT=true
117
+
118
+ ### Rate limits ###
119
+ # See `src/lib/server/usageLimits.ts`
120
+ # {
121
+ # conversations: number, # how many conversations
122
+ # messages: number, # how many messages in a conversation
123
+ # assistants: number, # how many assistants
124
+ # messageLength: number, # how long can a message be before we cut it off
125
+ # messagesPerMinute: number, # how many messages per minute
126
+ # tools: number # how many tools
127
+ # }
128
+ USAGE_LIMITS={}
129
+
130
+ ### HuggingFace specific ###
131
+ ## Feature flag & admin settings
132
+ # Used for setting early access & admin flags to users
133
+ HF_ORG_ADMIN=
134
+ HF_ORG_EARLY_ACCESS=
135
+ WEBHOOK_URL_REPORT_ASSISTANT=#provide slack webhook url to get notified for reports/feature requests
136
+
137
+
138
+ ### Metrics ###
139
+ METRICS_ENABLED=false
140
+ METRICS_PORT=5565
141
+ LOG_LEVEL=info
142
+
143
+
144
+ ### Parquet export ###
145
+ # Not in use anymore but useful to export conversations to a parquet file as a HuggingFace dataset
146
+ PARQUET_EXPORT_DATASET=
147
+ PARQUET_EXPORT_HF_TOKEN=
148
+ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or exporting parquet data
149
+
150
+ ### Config ###
151
+ ENABLE_CONFIG_MANAGER=true
152
+
153
+ ### Docker build variables ###
154
+ # These values cannot be updated at runtime
155
+ # They need to be passed when building the docker image
156
+ # See https://github.com/huggingface/chat-ui/main/.github/workflows/deploy-prod.yml#L44-L47
157
+ APP_BASE="" # base path of the app, e.g. /chat, left blank as default
158
+ ### Body size limit for SvelteKit https://svelte.dev/docs/kit/adapter-node#Environment-variables-BODY_SIZE_LIMIT
159
+ BODY_SIZE_LIMIT=15728640
160
+ PUBLIC_COMMIT_SHA=
161
+
162
+ ### LEGACY parameters
163
+ ALLOW_INSECURE_COOKIES=false # LEGACY! Use COOKIE_SECURE and COOKIE_SAMESITE instead
164
+ PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
165
+ RATE_LIMIT= # /!\ DEPRECATED definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
166
+ OPENID_NAME_CLAIM="name" # Change to "username" for some providers that do not provide name
167
+ OPENID_PROVIDER_URL=https://huggingface.co # for Google, use https://accounts.google.com
168
+ OPENID_TOLERANCE=
169
+ OPENID_RESOURCE=
170
+ EXPOSE_API=# deprecated, API is now always exposed
.env.ci ADDED
@@ -0,0 +1 @@
 
 
1
+ MONGODB_URL=mongodb://localhost:27017/
.eslintignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ node_modules
3
+ /build
4
+ /.svelte-kit
5
+ /package
6
+ .env
7
+ .env.*
8
+ !.env.example
9
+
10
+ # Ignore files for PNPM, NPM and YARN
11
+ pnpm-lock.yaml
12
+ package-lock.json
13
+ yarn.lock
.eslintrc.cjs ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ root: true,
3
+ parser: "@typescript-eslint/parser",
4
+ extends: [
5
+ "eslint:recommended",
6
+ "plugin:@typescript-eslint/recommended",
7
+ "plugin:svelte/recommended",
8
+ "prettier",
9
+ ],
10
+ plugins: ["@typescript-eslint"],
11
+ ignorePatterns: ["*.cjs"],
12
+ overrides: [
13
+ {
14
+ files: ["*.svelte"],
15
+ parser: "svelte-eslint-parser",
16
+ parserOptions: {
17
+ parser: "@typescript-eslint/parser",
18
+ },
19
+ },
20
+ ],
21
+ parserOptions: {
22
+ sourceType: "module",
23
+ ecmaVersion: 2020,
24
+ extraFileExtensions: [".svelte"],
25
+ },
26
+ rules: {
27
+ "no-empty": "off",
28
+ "require-yield": "off",
29
+ "@typescript-eslint/no-explicit-any": "error",
30
+ "@typescript-eslint/no-non-null-assertion": "error",
31
+ "@typescript-eslint/no-unused-vars": [
32
+ // prevent variables with a _ prefix from being marked as unused
33
+ "error",
34
+ {
35
+ argsIgnorePattern: "^_",
36
+ },
37
+ ],
38
+ "object-shorthand": ["error", "always"],
39
+ },
40
+ env: {
41
+ browser: true,
42
+ es2017: true,
43
+ node: true,
44
+ },
45
+ };
.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ */.ttf filter=lfs diff=lfs merge=lfs -text
2
+ static/huggingchat/tools-thumbnail.png filter=lfs diff=lfs merge=lfs -text
3
+ static/huggingchat/assistants-thumbnail.png filter=lfs diff=lfs merge=lfs -text
4
+ *.ttf filter=lfs diff=lfs merge=lfs -text
.github/ISSUE_TEMPLATE/bug-report--chat-ui-.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug Report (chat-ui)
3
+ about: Use this for confirmed issues with chat-ui
4
+ title: ""
5
+ labels: bug
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Bug description
10
+
11
+ <!-- A clear and concise description of what the bug is. -->
12
+
13
+ ## Steps to reproduce
14
+
15
+ <!-- Steps to reproduce the issue -->
16
+
17
+ ## Screenshots
18
+
19
+ <!-- If applicable, add screenshots to help explain your problem. -->
20
+
21
+ ## Context
22
+
23
+ ### Logs
24
+
25
+ <!-- Add any logs that are relevant to your issue. Could be browser or server logs. Wrap in code blocks. -->
26
+
27
+ ```
28
+ // logs here if relevant
29
+ ```
30
+
31
+ ### Specs
32
+
33
+ - **OS**:
34
+ - **Browser**:
35
+ - **chat-ui commit**:
36
+
37
+ ### Config
38
+
39
+ <!-- Add the environment variables you've used to setup chat-ui, making sure to redact any secrets. -->
40
+
41
+ ## Notes
42
+
43
+ <!-- Anything else relevant to help the issue get solved -->
.github/ISSUE_TEMPLATE/config-support.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Config Support
3
+ about: Help with setting up chat-ui locally
4
+ title: ""
5
+ labels: support
6
+ assignees: ""
7
+ ---
8
+
9
+ **Please use the discussions on GitHub** for getting help with setting things up instead of opening an issue: https://github.com/huggingface/chat-ui/discussions
.github/ISSUE_TEMPLATE/feature-request--chat-ui-.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature Request (chat-ui)
3
+ about: Suggest new features to be added to chat-ui
4
+ title: ""
5
+ labels: enhancement
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Describe your feature request
10
+
11
+ <!-- Short description of what this is about -->
12
+
13
+ ## Screenshots (if relevant)
14
+
15
+ ## Implementation idea
16
+
17
+ <!-- If you know how this should be implemented in the codebase, share your thoughts. Let us know if you feel like implementing it yourself as well! -->
.github/ISSUE_TEMPLATE/huggingchat.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: HuggingChat
3
+ about: Requests & reporting outages on HuggingChat, the hosted version of chat-ui.
4
+ title: ""
5
+ labels: huggingchat
6
+ assignees: ""
7
+ ---
8
+
9
+ **Do not use GitHub issues** for requesting models on HuggingChat or reporting issues with HuggingChat being down/overloaded.
10
+
11
+ **Use the discussions page on the hub instead:** https://huggingface.co/spaces/huggingchat/chat-ui/discussions
.github/release.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ changelog:
2
+ exclude:
3
+ labels:
4
+ - huggingchat
5
+ - CI/CD
6
+ - documentation
7
+ categories:
8
+ - title: Features
9
+ labels:
10
+ - enhancement
11
+ - title: Bugfixes
12
+ labels:
13
+ - bug
14
+ - title: Other changes
15
+ labels:
16
+ - "*"
.github/workflows/build-docs.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build documentation
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ - v*-release
8
+
9
+ jobs:
10
+ build:
11
+ uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
12
+ with:
13
+ commit_sha: ${{ github.sha }}
14
+ package: chat-ui
15
+ additional_args: --not_python_module
16
+ secrets:
17
+ token: ${{ secrets.HUGGINGFACE_PUSH }}
18
+ hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
.github/workflows/build-image.yml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and Publish Image
2
+
3
+ permissions:
4
+ packages: write
5
+
6
+ on:
7
+ push:
8
+ branches:
9
+ - "main"
10
+ pull_request:
11
+ branches:
12
+ - "*"
13
+ paths:
14
+ - "Dockerfile"
15
+ - "entrypoint.sh"
16
+ workflow_dispatch:
17
+ release:
18
+ types: [published, edited]
19
+
20
+ jobs:
21
+ build-and-publish-image-with-db:
22
+ runs-on:
23
+ group: aws-general-8-plus
24
+ steps:
25
+ - name: Checkout
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Extract package version
29
+ id: package-version
30
+ run: |
31
+ VERSION=$(jq -r .version package.json)
32
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
33
+ MAJOR=$(echo $VERSION | cut -d '.' -f1)
34
+ echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
35
+ MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2)
36
+ echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
37
+
38
+ - name: Docker metadata
39
+ id: meta
40
+ uses: docker/metadata-action@v5
41
+ with:
42
+ images: |
43
+ ghcr.io/huggingface/chat-ui-db
44
+ tags: |
45
+ type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}}
46
+ type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}}
47
+ type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}}
48
+ type=raw,value=latest,enable={{is_default_branch}}
49
+ type=sha,enable={{is_default_branch}}
50
+
51
+ - name: Set up QEMU
52
+ uses: docker/setup-qemu-action@v3
53
+
54
+ - name: Set up Docker Buildx
55
+ uses: docker/setup-buildx-action@v3
56
+
57
+ - name: Login to GitHub Container Registry
58
+ if: github.event_name != 'pull_request'
59
+ uses: docker/login-action@v3
60
+ with:
61
+ registry: ghcr.io
62
+ username: ${{ github.repository_owner }}
63
+ password: ${{ secrets.GITHUB_TOKEN }}
64
+
65
+ - name: Inject slug/short variables
66
+ uses: rlespinasse/[email protected]
67
+
68
+ - name: Build and Publish Docker Image with DB
69
+ uses: docker/build-push-action@v5
70
+ with:
71
+ context: .
72
+ file: Dockerfile
73
+ push: ${{ github.event_name != 'pull_request' }}
74
+ tags: ${{ steps.meta.outputs.tags }}
75
+ labels: ${{ steps.meta.outputs.labels }}
76
+ platforms: linux/amd64,linux/arm64
77
+ cache-from: type=gha
78
+ cache-to: type=gha,mode=max
79
+ build-args: |
80
+ INCLUDE_DB=true
81
+ PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
82
+ build-and-publish-image-nodb:
83
+ runs-on:
84
+ group: aws-general-8-plus
85
+ steps:
86
+ - name: Checkout
87
+ uses: actions/checkout@v4
88
+
89
+ - name: Extract package version
90
+ id: package-version
91
+ run: |
92
+ VERSION=$(jq -r .version package.json)
93
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
94
+ MAJOR=$(echo $VERSION | cut -d '.' -f1)
95
+ echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
96
+ MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2)
97
+ echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
98
+
99
+ - name: Docker metadata
100
+ id: meta
101
+ uses: docker/metadata-action@v5
102
+ with:
103
+ images: |
104
+ ghcr.io/huggingface/chat-ui
105
+ tags: |
106
+ type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}}
107
+ type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}}
108
+ type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}}
109
+ type=raw,value=latest,enable={{is_default_branch}}
110
+ type=sha,enable={{is_default_branch}}
111
+
112
+ - name: Set up QEMU
113
+ uses: docker/setup-qemu-action@v3
114
+
115
+ - name: Set up Docker Buildx
116
+ uses: docker/setup-buildx-action@v3
117
+
118
+ - name: Login to GitHub Container Registry
119
+ if: github.event_name != 'pull_request'
120
+ uses: docker/login-action@v3
121
+ with:
122
+ registry: ghcr.io
123
+ username: ${{ github.repository_owner }}
124
+ password: ${{ secrets.GITHUB_TOKEN }}
125
+
126
+ - name: Inject slug/short variables
127
+ uses: rlespinasse/[email protected]
128
+
129
+ - name: Build and Publish Docker Image without DB
130
+ uses: docker/build-push-action@v5
131
+ with:
132
+ context: .
133
+ file: Dockerfile
134
+ push: ${{ github.event_name != 'pull_request' }}
135
+ tags: ${{ steps.meta.outputs.tags }}
136
+ labels: ${{ steps.meta.outputs.labels }}
137
+ platforms: linux/amd64,linux/arm64
138
+ cache-from: type=gha
139
+ cache-to: type=gha,mode=max
140
+ build-args: |
141
+ INCLUDE_DB=false
142
+ PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
.github/workflows/build-pr-docs.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build PR Documentation
2
+
3
+ on:
4
+ pull_request:
5
+ paths:
6
+ - "docs/source/**"
7
+ - ".github/workflows/build-pr-docs.yml"
8
+
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ build:
15
+ uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
16
+ with:
17
+ commit_sha: ${{ github.event.pull_request.head.sha }}
18
+ pr_number: ${{ github.event.number }}
19
+ package: chat-ui
20
+ additional_args: --not_python_module
.github/workflows/deploy-dev.yml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to ephemeral
2
+ on:
3
+ pull_request:
4
+
5
+ jobs:
6
+ branch-slug:
7
+ uses: ./.github/workflows/slugify.yaml
8
+ with:
9
+ value: ${{ github.head_ref }}
10
+
11
+ deploy-dev:
12
+ if: contains(github.event.pull_request.labels.*.name, 'preview')
13
+ runs-on: ubuntu-latest
14
+ needs: branch-slug
15
+ environment:
16
+ name: dev
17
+ url: https://${{ needs.branch-slug.outputs.slug }}.chat-dev.huggingface.tech/chat/
18
+ steps:
19
+ - name: Checkout
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Login to Registry
23
+ uses: docker/login-action@v3
24
+ with:
25
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
26
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
27
+
28
+ - name: Inject slug/short variables
29
+ uses: rlespinasse/[email protected]
30
+
31
+ - name: Set GITHUB_SHA_SHORT from PR
32
+ if: env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT != null
33
+ run: echo "GITHUB_SHA_SHORT=${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT }}" >> $GITHUB_ENV
34
+
35
+ - name: Docker metadata
36
+ id: meta
37
+ uses: docker/metadata-action@v5
38
+ with:
39
+ images: |
40
+ huggingface/chat-ui
41
+ tags: |
42
+ type=raw,value=dev-${{ env.GITHUB_SHA_SHORT }}
43
+
44
+ - name: Set up Docker Buildx
45
+ uses: docker/setup-buildx-action@v3
46
+
47
+ - name: Build and Publish HuggingChat image
48
+ uses: docker/build-push-action@v5
49
+ with:
50
+ context: .
51
+ file: Dockerfile
52
+ push: true
53
+ tags: ${{ steps.meta.outputs.tags }}
54
+ labels: ${{ steps.meta.outputs.labels }}
55
+ platforms: linux/amd64
56
+ cache-to: type=gha,mode=max,scope=amd64
57
+ cache-from: type=gha,scope=amd64
58
+ provenance: false
59
+ build-args: |
60
+ INCLUDE_DB=false
61
+ APP_BASE=/chat
62
+ PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
.github/workflows/deploy-prod.yml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to k8s
2
+ on:
3
+ # run this workflow manually from the Actions tab
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ build-and-publish-huggingchat-image:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - name: Checkout
11
+ uses: actions/checkout@v4
12
+
13
+ - name: Login to Registry
14
+ uses: docker/login-action@v3
15
+ with:
16
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
17
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
18
+
19
+ - name: Docker metadata
20
+ id: meta
21
+ uses: docker/metadata-action@v5
22
+ with:
23
+ images: |
24
+ huggingface/chat-ui
25
+ tags: |
26
+ type=raw,value=latest,enable={{is_default_branch}}
27
+ type=sha,enable=true,prefix=sha-,format=short,sha-len=8
28
+
29
+ - name: Set up Docker Buildx
30
+ uses: docker/setup-buildx-action@v3
31
+
32
+ - name: Inject slug/short variables
33
+ uses: rlespinasse/[email protected]
34
+
35
+ - name: Build and Publish HuggingChat image
36
+ uses: docker/build-push-action@v5
37
+ with:
38
+ context: .
39
+ file: Dockerfile
40
+ push: ${{ github.event_name != 'pull_request' }}
41
+ tags: ${{ steps.meta.outputs.tags }}
42
+ labels: ${{ steps.meta.outputs.labels }}
43
+ platforms: linux/amd64
44
+ cache-to: type=gha,mode=max,scope=amd64
45
+ cache-from: type=gha,scope=amd64
46
+ provenance: false
47
+ build-args: |
48
+ INCLUDE_DB=false
49
+ APP_BASE=/chat
50
+ PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
51
+ deploy:
52
+ name: Deploy on prod
53
+ runs-on: ubuntu-latest
54
+ needs: ["build-and-publish-huggingchat-image"]
55
+ steps:
56
+ - name: Inject slug/short variables
57
+ uses: rlespinasse/[email protected]
58
+
59
+ - name: Gen values
60
+ run: |
61
+ VALUES=$(cat <<-END
62
+ image:
63
+ tag: "sha-${{ env.GITHUB_SHA_SHORT }}"
64
+ END
65
+ )
66
+ echo "VALUES=$(echo "$VALUES" | yq -o=json | jq tostring)" >> $GITHUB_ENV
67
+
68
+ - name: Deploy on infra-deployments
69
+ uses: aurelien-baudet/workflow-dispatch@v2
70
+ with:
71
+ workflow: Update application single value
72
+ repo: huggingface/infra-deployments
73
+ wait-for-completion: true
74
+ wait-for-completion-interval: 10s
75
+ display-workflow-run-url-interval: 10s
76
+ ref: refs/heads/main
77
+ token: ${{ secrets.GIT_TOKEN_INFRA_DEPLOYMENT }}
78
+ inputs: '{"path": "hub/chat-ui/chat-ui.yaml", "value": ${{ env.VALUES }}, "url": "${{ github.event.head_commit.url }}"}'
.github/workflows/lint-and-test.yml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Lint and test
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches:
7
+ - main
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ timeout-minutes: 10
13
+
14
+ steps:
15
+ - uses: actions/checkout@v3
16
+
17
+ - uses: actions/setup-node@v3
18
+ with:
19
+ node-version: "20"
20
+ cache: "npm"
21
+ - run: |
22
+ npm install ci
23
+ - name: "Checking lint/format errors"
24
+ run: |
25
+ npm run lint
26
+ - name: "Checking type errors"
27
+ run: |
28
+ npm run check
29
+
30
+ test:
31
+ runs-on: ubuntu-latest
32
+ timeout-minutes: 10
33
+
34
+ steps:
35
+ - uses: actions/checkout@v3
36
+ - uses: actions/setup-node@v3
37
+ with:
38
+ node-version: "20"
39
+ cache: "npm"
40
+ - run: |
41
+ npm ci
42
+ npx playwright install
43
+ - name: "Tests"
44
+ run: |
45
+ npm run test
46
+
47
+ build-check:
48
+ runs-on:
49
+ group: aws-general-8-plus
50
+ timeout-minutes: 10
51
+ steps:
52
+ - uses: actions/checkout@v3
53
+ - name: Build Docker image
54
+ run: |
55
+ docker build \
56
+ --build-arg INCLUDE_DB=true \
57
+ -t chat-ui-test:latest .
58
+
59
+ - name: Run Docker container
60
+ run: |
61
+ export DOTENV_LOCAL=$(<.env.ci)
62
+ docker run -d --rm --network=host \
63
+ --name chat-ui-test \
64
+ -e DOTENV_LOCAL="$DOTENV_LOCAL" \
65
+ chat-ui-test:latest
66
+
67
+ - name: Wait for server to start
68
+ run: |
69
+ for i in {1..10}; do
70
+ if curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/ | grep -q "200"; then
71
+ echo "Server is up"
72
+ exit 0
73
+ fi
74
+ echo "Waiting for server..."
75
+ sleep 2
76
+ done
77
+ echo "Server did not start in time"
78
+ docker logs chat-ui-test
79
+ exit 1
80
+
81
+ - name: Stop Docker container
82
+ if: always()
83
+ run: |
84
+ docker stop chat-ui-test || true
.github/workflows/slugify.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Generate Branch Slug
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ value:
7
+ description: 'Value to slugify'
8
+ required: true
9
+ type: string
10
+ outputs:
11
+ slug:
12
+ description: 'Slugified value'
13
+ value: ${{ jobs.generate-slug.outputs.slug }}
14
+
15
+ jobs:
16
+ generate-slug:
17
+ runs-on: ubuntu-latest
18
+ outputs:
19
+ slug: ${{ steps.slugify.outputs.slug }}
20
+
21
+ steps:
22
+ - name: Setup Go
23
+ uses: actions/setup-go@v5
24
+ with:
25
+ go-version: '1.21'
26
+
27
+ - name: Generate slug
28
+ id: slugify
29
+ run: |
30
+ # Create working directory
31
+ mkdir -p $HOME/slugify
32
+ cd $HOME/slugify
33
+
34
+ # Create Go script
35
+ cat > main.go << 'EOF'
36
+ package main
37
+
38
+ import (
39
+ "fmt"
40
+ "os"
41
+ "github.com/gosimple/slug"
42
+ )
43
+
44
+ func main() {
45
+ if len(os.Args) < 2 {
46
+ fmt.Println("Usage: slugify <text>")
47
+ os.Exit(1)
48
+ }
49
+
50
+ text := os.Args[1]
51
+ slugged := slug.Make(text)
52
+ fmt.Println(slugged)
53
+ }
54
+ EOF
55
+
56
+ # Initialize module and install dependency
57
+ go mod init slugify
58
+ go mod tidy
59
+ go get github.com/gosimple/slug
60
+
61
+ # Build
62
+ go build -o slugify main.go
63
+
64
+ # Generate slug
65
+ VALUE="${{ inputs.value }}"
66
+ echo "Input value: $VALUE"
67
+
68
+ SLUG=$(./slugify "$VALUE")
69
+ echo "Generated slug: $SLUG"
70
+
71
+ # Export
72
+ echo "slug=$SLUG" >> $GITHUB_OUTPUT
.github/workflows/trufflehog.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ on:
2
+ push:
3
+
4
+ name: Secret Leaks
5
+
6
+ jobs:
7
+ trufflehog:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - name: Checkout code
11
+ uses: actions/checkout@v4
12
+ with:
13
+ fetch-depth: 0
14
+ - name: Secret Scanning
15
+ uses: trufflesecurity/trufflehog@main
16
+ with:
17
+ extra_args: --results=verified,unknown
.github/workflows/upload-pr-documentation.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Upload PR Documentation
2
+
3
+ on:
4
+ workflow_run:
5
+ workflows: ["Build PR Documentation"]
6
+ types:
7
+ - completed
8
+
9
+ jobs:
10
+ build:
11
+ uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12
+ with:
13
+ package_name: chat-ui
14
+ secrets:
15
+ hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16
+ comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
.gitignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ node_modules
3
+ /build
4
+ /.svelte-kit
5
+ /package
6
+ .env
7
+ .env.*
8
+ vite.config.js.timestamp-*
9
+ vite.config.ts.timestamp-*
10
+ SECRET_CONFIG
11
+ .idea
12
+ !.env.ci
13
+ !.env
14
+ gcp-*.json
15
+ db
16
+ models/*
17
+ !models/add-your-models-here.txt
.husky/lint-stage-config.js ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ export default {
2
+ "*.{js,jsx,ts,tsx}": ["prettier --write", "eslint --fix", "eslint"],
3
+ "*.json": ["prettier --write"],
4
+ };
.husky/pre-commit ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ set -e
2
+ npx lint-staged --config ./.husky/lint-stage-config.js
.npmrc ADDED
@@ -0,0 +1 @@
 
 
1
+ engine-strict=true
.prettierignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ node_modules
3
+ /build
4
+ /.svelte-kit
5
+ /package
6
+ /chart
7
+ .env
8
+ .env.*
9
+ !.env.example
10
+
11
+ # Ignore files for PNPM, NPM and YARN
12
+ pnpm-lock.yaml
13
+ package-lock.json
14
+ yarn.lock
.prettierrc ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "useTabs": true,
3
+ "trailingComma": "es5",
4
+ "printWidth": 100,
5
+ "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"],
6
+ "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
7
+ }
.vscode/launch.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "command": "npm run dev",
6
+ "name": "Run development server",
7
+ "request": "launch",
8
+ "type": "node-terminal"
9
+ }
10
+ ]
11
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "editor.defaultFormatter": "esbenp.prettier-vscode",
4
+ "editor.codeActionsOnSave": {
5
+ "source.fixAll": "explicit"
6
+ },
7
+ "eslint.validate": ["javascript", "svelte"],
8
+ "[svelte]": {
9
+ "editor.defaultFormatter": "esbenp.prettier-vscode"
10
+ },
11
+ "[typescript]": {
12
+ "editor.defaultFormatter": "esbenp.prettier-vscode"
13
+ }
14
+ }
Dockerfile ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+ ARG INCLUDE_DB=false
3
+
4
+ FROM node:24-slim AS base
5
+
6
+ # install dotenv-cli
7
+ RUN npm install -g dotenv-cli
8
+
9
+ # switch to a user that works for spaces
10
+ RUN userdel -r node
11
+ RUN useradd -m -u 1000 user
12
+ USER user
13
+
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
+ WORKDIR /app
18
+
19
+ # add a .env.local if the user doesn't bind a volume to it
20
+ RUN touch /app/.env.local
21
+
22
+ USER root
23
+ RUN apt-get update
24
+ RUN apt-get install -y libgomp1 libcurl4 curl dnsutils nano
25
+
26
+ # ensure npm cache dir exists before adjusting ownership
27
+ RUN mkdir -p /home/user/.npm && chown -R 1000:1000 /home/user/.npm
28
+
29
+ USER user
30
+
31
+
32
+ COPY --chown=1000 .env /app/.env
33
+ COPY --chown=1000 entrypoint.sh /app/entrypoint.sh
34
+ COPY --chown=1000 package.json /app/package.json
35
+ COPY --chown=1000 package-lock.json /app/package-lock.json
36
+
37
+ RUN chmod +x /app/entrypoint.sh
38
+
39
+ FROM node:24 AS builder
40
+
41
+ WORKDIR /app
42
+
43
+ COPY --link --chown=1000 package-lock.json package.json ./
44
+
45
+ ARG APP_BASE=
46
+ ARG PUBLIC_APP_COLOR=
47
+ ENV BODY_SIZE_LIMIT=15728640
48
+
49
+ RUN --mount=type=cache,target=/app/.npm \
50
+ npm set cache /app/.npm && \
51
+ npm ci
52
+
53
+ COPY --link --chown=1000 . .
54
+
55
+ RUN git config --global --add safe.directory /app && \
56
+ npm run build
57
+
58
+ # mongo image
59
+ FROM mongo:7 AS mongo
60
+
61
+ # image to be used if INCLUDE_DB is false
62
+ FROM base AS local_db_false
63
+
64
+ # image to be used if INCLUDE_DB is true
65
+ FROM base AS local_db_true
66
+
67
+ # copy mongo from the other stage
68
+ COPY --from=mongo /usr/bin/mongo* /usr/bin/
69
+
70
+ ENV MONGODB_URL=mongodb://localhost:27017
71
+ USER root
72
+ RUN mkdir -p /data/db
73
+ RUN chown -R 1000:1000 /data/db
74
+ USER user
75
+ # final image
76
+ FROM local_db_${INCLUDE_DB} AS final
77
+
78
+ # build arg to determine if the database should be included
79
+ ARG INCLUDE_DB=false
80
+ ENV INCLUDE_DB=${INCLUDE_DB}
81
+
82
+ # svelte requires APP_BASE at build time so it must be passed as a build arg
83
+ ARG APP_BASE=
84
+ ARG PUBLIC_APP_COLOR=
85
+ ARG PUBLIC_COMMIT_SHA=
86
+ ENV PUBLIC_COMMIT_SHA=${PUBLIC_COMMIT_SHA}
87
+ ENV BODY_SIZE_LIMIT=15728640
88
+
89
+ #import the build & dependencies
90
+ COPY --from=builder --chown=1000 /app/build /app/build
91
+ COPY --from=builder --chown=1000 /app/node_modules /app/node_modules
92
+
93
+ CMD ["/bin/bash", "-c", "/app/entrypoint.sh"]
LICENSE ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2018- The Hugging Face team. All rights reserved.
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright [yyyy] [name of copyright owner]
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
PRIVACY.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Privacy
2
+
3
+ > Last updated: Sep 15, 2025
4
+
5
+ Basics:
6
+
7
+ - Sign-in: You authenticate with your Hugging Face account.
8
+ - Conversation history: Stored so you can access past chats; you can delete any conversation at any time from the UI.
9
+
10
+ 🗓 Please also consult huggingface.co's main privacy policy at <https://huggingface.co/privacy>. To exercise any of your legal privacy rights, please send an email to <[email protected]>.
11
+
12
+ ## Data handling and processing
13
+
14
+ HuggingChat uses Hugging Face’s Inference Providers to access models from multiple partners via a single API. Depending on the model and availability, inference runs with the corresponding provider.
15
+
16
+ - Inference Providers documentation: <https://huggingface.co/docs/inference-providers>
17
+ - Security & Compliance: <https://huggingface.co/docs/inference-providers/security>
18
+
19
+ Security and routing facts
20
+
21
+ - Hugging Face does not store any user data for training purposes.
22
+ - Hugging Face does not store the request body or the response when routing requests through Hugging Face.
23
+ - Logs are kept for debugging purposes for up to 30 days, but no user data or tokens are stored in those logs.
24
+ - Inference Provider routing uses TLS/SSL to encrypt data in transit.
25
+ - The Hugging Face Hub (which Inference Providers is a feature of) is SOC 2 Type 2 certified. See <https://huggingface.co/docs/hub/security>.
26
+
27
+ External providers are responsible for their own security and data handling. Please consult each provider’s respective security and privacy policies via the Inference Providers documentation linked above.
28
+
29
+ ## Technical details
30
+
31
+ [![chat-ui](https://img.shields.io/github/stars/huggingface/chat-ui)](https://github.com/huggingface/chat-ui)
32
+
33
+ The app is completely open source, and further development takes place on the [huggingface/chat-ui](https://github.com/huggingface/chat-ui) GitHub repo. We're always open to contributions!
34
+
35
+ You can find the production configuration for HuggingChat [here](https://github.com/huggingface/chat-ui/blob/main/chart/env/prod.yaml).
36
+
37
+ HuggingChat connects to the OpenAI‑compatible Inference Providers router at `https://router.huggingface.co/v1` to access models across multiple providers. Provider selection may be automatic or fixed depending on the model configuration.
38
+
39
+ We welcome any feedback on this app: please participate in the public discussion at <https://huggingface.co/spaces/huggingchat/chat-ui/discussions>
40
+
41
+ <a target="_blank" href="https://huggingface.co/spaces/huggingchat/chat-ui/discussions"><img src="https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-discussion-xl.svg" title="open a discussion"></a>
README.md ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chat Ui
3
+ emoji: 🐠
4
+ colorFrom: pink
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 3000
9
+ ---
10
+
11
+ # Chat UI
12
+
13
+ ![Chat UI repository thumbnail](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/chat-ui/chat-ui-2026.png)
14
+
15
+ A chat interface for LLMs. It is a SvelteKit app and it powers the [HuggingChat app on hf.co/chat](https://huggingface.co/chat).
16
+
17
+ 0. [Quickstart](#quickstart)
18
+ 1. [Database Options](#database-options)
19
+ 2. [Launch](#launch)
20
+ 3. [Optional Docker Image](#optional-docker-image)
21
+ 4. [Extra parameters](#extra-parameters)
22
+ 5. [Building](#building)
23
+
24
+ > [!NOTE]
25
+ > Chat UI only supports OpenAI-compatible APIs via `OPENAI_BASE_URL` and the `/models` endpoint. Provider-specific integrations (legacy `MODELS` env var, GGUF discovery, embeddings, web-search helpers, etc.) are removed, but any service that speaks the OpenAI protocol (llama.cpp server, Ollama, OpenRouter, etc. will work by default).
26
+
27
+ > [!NOTE]
28
+ > The old version is still available on the [legacy branch](https://github.com/huggingface/chat-ui/tree/legacy)
29
+
30
+ ## Quickstart
31
+
32
+ Chat UI speaks to OpenAI-compatible APIs only. The fastest way to get running is with the Hugging Face Inference Providers router plus your personal Hugging Face access token.
33
+
34
+ **Step 1 – Create `.env.local`:**
35
+
36
+ ```env
37
+ OPENAI_BASE_URL=https://router.huggingface.co/v1
38
+ OPENAI_API_KEY=hf_************************
39
+ # Fill in once you pick a database option below
40
+ MONGODB_URL=
41
+ ```
42
+
43
+ `OPENAI_API_KEY` can come from any OpenAI-compatible endpoint you plan to call. Pick the combo that matches your setup and drop the values into `.env.local`:
44
+
45
+ | Provider | Example `OPENAI_BASE_URL` | Example key env |
46
+ | --------------------------------------------- | ---------------------------------- | ----------------------------------------------------------------------- |
47
+ | Hugging Face Inference Providers router | `https://router.huggingface.co/v1` | `OPENAI_API_KEY=hf_xxx` (or `HF_TOKEN` legacy alias) |
48
+ | llama.cpp server (`llama.cpp --server --api`) | `http://127.0.0.1:8080/v1` | `OPENAI_API_KEY=sk-local-demo` (any string works; llama.cpp ignores it) |
49
+ | Ollama (with OpenAI-compatible bridge) | `http://127.0.0.1:11434/v1` | `OPENAI_API_KEY=ollama` |
50
+ | OpenRouter | `https://openrouter.ai/api/v1` | `OPENAI_API_KEY=sk-or-v1-...` |
51
+ | Poe | `https://api.poe.com/v1` | `OPENAI_API_KEY=pk_...` |
52
+
53
+ Check the root [`.env` template](./.env) for the full list of optional variables you can override.
54
+
55
+ **Step 2 – Choose where MongoDB lives:** Either provision a managed cluster (for example MongoDB Atlas) or run a local container. Both approaches are described in [Database Options](#database-options). After you have the URI, drop it into `MONGODB_URL` (and, if desired, set `MONGODB_DB_NAME`).
56
+
57
+ **Step 3 – Install and launch the dev server:**
58
+
59
+ ```bash
60
+ git clone https://github.com/huggingface/chat-ui
61
+ cd chat-ui
62
+ npm install
63
+ npm run dev -- --open
64
+ ```
65
+
66
+ You now have Chat UI running against the Hugging Face router without needing to host MongoDB yourself.
67
+
68
+ ## Database Options
69
+
70
+ Chat history, users, settings, files, and stats all live in MongoDB. You can point Chat UI at any MongoDB 6/7 deployment.
71
+
72
+ ### MongoDB Atlas (managed)
73
+
74
+ 1. Create a free cluster at [mongodb.com](https://www.mongodb.com/pricing).
75
+ 2. Add your IP (or `0.0.0.0/0` for development) to the network access list.
76
+ 3. Create a database user and copy the connection string.
77
+ 4. Paste that string into `MONGODB_URL` in `.env.local`. Keep the default `MONGODB_DB_NAME=chat-ui` or change it per environment.
78
+
79
+ Atlas keeps MongoDB off your laptop, which is ideal for teams or cloud deployments.
80
+
81
+ ### Local MongoDB (container)
82
+
83
+ If you prefer to run MongoDB locally:
84
+
85
+ ```bash
86
+ docker run -d -p 27017:27017 --name mongo-chatui mongo:latest
87
+ ```
88
+
89
+ Then set `MONGODB_URL=mongodb://localhost:27017` in `.env.local`. You can also supply `MONGO_STORAGE_PATH` if you want Chat UI’s fallback in-memory server to persist under a specific folder.
90
+
91
+ ## Launch
92
+
93
+ After configuring your environment variables, start Chat UI with:
94
+
95
+ ```bash
96
+ npm install
97
+ npm run dev
98
+ ```
99
+
100
+ The dev server listens on `http://localhost:5173` by default. Use `npm run build` / `npm run preview` for production builds.
101
+
102
+ ## Optional Docker Image
103
+
104
+ Prefer containerized setup? You can run everything in one container as long as you supply a MongoDB URI (local or hosted):
105
+
106
+ ```bash
107
+ docker run \
108
+ -p 3000 \
109
+ -e MONGODB_URL=mongodb://host.docker.internal:27017 \
110
+ -e OPENAI_BASE_URL=https://router.huggingface.co/v1 \
111
+ -e OPENAI_API_KEY=hf_*** \
112
+ -v db:/data \
113
+ ghcr.io/huggingface/chat-ui-db:latest
114
+ ```
115
+
116
+ `host.docker.internal` lets the container reach a MongoDB instance on your host machine; swap it for your Atlas URI if you use the hosted option. All environment variables accepted in `.env.local` can be provided as `-e` flags.
117
+
118
+ ## Extra parameters
119
+
120
+ ### Theming
121
+
122
+ You can use a few environment variables to customize the look and feel of chat-ui. These are by default:
123
+
124
+ ```env
125
+ PUBLIC_APP_NAME=ChatUI
126
+ PUBLIC_APP_ASSETS=chatui
127
+ PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone."
128
+ PUBLIC_APP_DATA_SHARING=
129
+ ```
130
+
131
+ - `PUBLIC_APP_NAME` The name used as a title throughout the app.
132
+ - `PUBLIC_APP_ASSETS` Is used to find logos & favicons in `static/$PUBLIC_APP_ASSETS`, current options are `chatui` and `huggingchat`.
133
+ - `PUBLIC_APP_DATA_SHARING` Can be set to 1 to add a toggle in the user settings that lets your users opt-in to data sharing with models creator.
134
+
135
+ ### Models
136
+
137
+ This build does not use the `MODELS` env var or GGUF discovery. Configure models via `OPENAI_BASE_URL` only; Chat UI will fetch `${OPENAI_BASE_URL}/models` and populate the list automatically. Authorization uses `OPENAI_API_KEY` (preferred). `HF_TOKEN` remains a legacy alias.
138
+
139
+ ### LLM Router (Optional)
140
+
141
+ Chat UI can perform client-side routing [katanemo/Arch-Router-1.5B](https://huggingface.co/katanemo/Arch-Router-1.5B) as the routing model without running a separate router service. The UI exposes a virtual model alias called "Omni" (configurable) that, when selected, chooses the best route/model for each message.
142
+
143
+ - Provide a routes policy JSON via `LLM_ROUTER_ROUTES_PATH`. No sample file ships with this branch, so you must point the variable to a JSON array you create yourself (for example, commit one in your project like `config/routes.chat.json`). Each route entry needs `name`, `description`, `primary_model`, and optional `fallback_models`.
144
+ - Configure the Arch router selection endpoint with `LLM_ROUTER_ARCH_BASE_URL` (OpenAI-compatible `/chat/completions`) and `LLM_ROUTER_ARCH_MODEL` (e.g. `router/omni`). The Arch call reuses `OPENAI_API_KEY` for auth.
145
+ - Map `other` to a concrete route via `LLM_ROUTER_OTHER_ROUTE` (default: `casual_conversation`). If Arch selection fails, calls fall back to `LLM_ROUTER_FALLBACK_MODEL`.
146
+ - Selection timeout can be tuned via `LLM_ROUTER_ARCH_TIMEOUT_MS` (default 10000).
147
+ - Omni alias configuration: `PUBLIC_LLM_ROUTER_ALIAS_ID` (default `omni`), `PUBLIC_LLM_ROUTER_DISPLAY_NAME` (default `Omni`), and optional `PUBLIC_LLM_ROUTER_LOGO_URL`.
148
+
149
+ When you select Omni in the UI, Chat UI will:
150
+
151
+ - Call the Arch endpoint once (non-streaming) to pick the best route for the last turns.
152
+ - Emit RouterMetadata immediately (route and actual model used) so the UI can display it.
153
+ - Stream from the selected model via your configured `OPENAI_BASE_URL`. On errors, it tries route fallbacks.
154
+
155
+ ## Building
156
+
157
+ To create a production version of your app:
158
+
159
+ ```bash
160
+ npm run build
161
+ ```
162
+
163
+ You can preview the production build with `npm run preview`.
164
+
165
+ > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.
chart/Chart.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ apiVersion: v2
2
+ name: chat-ui
3
+ version: 0.0.1-latest
4
+ type: application
5
+ icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
chart/env/dev.yaml ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image:
2
+ repository: huggingface
3
+ name: chat-ui
4
+
5
+ #nodeSelector:
6
+ # role-huggingchat: "true"
7
+ #
8
+ #tolerations:
9
+ # - key: "huggingface.co/huggingchat"
10
+ # operator: "Equal"
11
+ # value: "true"
12
+ # effect: "NoSchedule"
13
+
14
+ serviceAccount:
15
+ enabled: true
16
+ create: true
17
+ name: huggingchat-ephemeral
18
+
19
+ ingress:
20
+ enabled: false
21
+
22
+ ingressInternal:
23
+ enabled: true
24
+ path: "/chat"
25
+ annotations:
26
+ external-dns.alpha.kubernetes.io/hostname: "*.chat-dev.huggingface.tech"
27
+ alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
28
+ alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
29
+ alb.ingress.kubernetes.io/group.name: "chat-dev-internal-public"
30
+ alb.ingress.kubernetes.io/load-balancer-name: "chat-dev-internal-public"
31
+ alb.ingress.kubernetes.io/ssl-redirect: "443"
32
+ alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
33
+ alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
34
+ alb.ingress.kubernetes.io/target-type: "ip"
35
+ alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/bc3eb446-1c04-432c-ac6b-946a88d725da"
36
+ kubernetes.io/ingress.class: "alb"
37
+
38
+ envVars:
39
+ TEST: "test"
40
+ COUPLE_SESSION_WITH_COOKIE_NAME: "token"
41
+ OPENID_SCOPES: "openid profile inference-api"
42
+ USE_USER_TOKEN: "true"
43
+ AUTOMATIC_LOGIN: "false"
44
+
45
+ ADDRESS_HEADER: "X-Forwarded-For"
46
+ APP_BASE: "/chat"
47
+ ALLOW_IFRAME: "false"
48
+ COOKIE_SAMESITE: "lax"
49
+ COOKIE_SECURE: "true"
50
+ EXPOSE_API: "true"
51
+ METRICS_ENABLED: "true"
52
+ LOG_LEVEL: "debug"
53
+ NODE_LOG_STRUCTURED_DATA: "true"
54
+
55
+ OPENAI_BASE_URL: "https://router.huggingface.co/v1"
56
+ PUBLIC_APP_ASSETS: "huggingchat"
57
+ PUBLIC_APP_NAME: "HuggingChat"
58
+ PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
59
+ PUBLIC_ORIGIN: "https://huggingface.co"
60
+ PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"
61
+
62
+ TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
63
+ LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
64
+ LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
65
+ LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
66
+ LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
67
+ LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
68
+ LLM_ROUTER_ENABLE_MULTIMODAL: "true"
69
+ LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Thinking"
70
+ PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
71
+ PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
72
+ PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
73
+ MODELS: >
74
+ [
75
+ { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
76
+ { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
77
+ { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
78
+ { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
79
+ { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
80
+ { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
81
+ { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
82
+ { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
83
+ { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
84
+ { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
85
+ { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
86
+ { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
87
+ { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
88
+ { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
89
+ { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
90
+ { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
91
+ { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
92
+ { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
93
+ { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
94
+ { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
95
+ { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
96
+ { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
97
+ { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
98
+ { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
99
+ { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
100
+ { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
101
+ { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
102
+ { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
103
+ { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
104
+ { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
105
+ { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
106
+ { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
107
+ { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
108
+ { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
109
+ { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
110
+ { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
111
+ { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
112
+ { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
113
+ { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
114
+ { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
115
+ { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
116
+ { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
117
+ { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
118
+ { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
119
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
120
+ { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
121
+ { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
122
+ { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
123
+ { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
124
+ { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
125
+ { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
126
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
127
+ { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
128
+ { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
129
+ { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
130
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
131
+ { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
132
+ { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
133
+ { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
134
+ { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
135
+ { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
136
+ { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
137
+ { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
138
+ { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
139
+ { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
140
+ { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
141
+ { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
142
+ { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
143
+ { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
144
+ { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
145
+ { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
146
+ { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
147
+ { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
148
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
149
+ { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
150
+ { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
151
+ { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
152
+ { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
153
+ { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
154
+ { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
155
+ { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
156
+ { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
157
+ { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
158
+ { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
159
+ { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
160
+ { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
161
+ { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
162
+ { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
163
+ { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
164
+ { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
165
+ { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
166
+ { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
167
+ { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
168
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
169
+ { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
170
+ { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
171
+ { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
172
+ { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
173
+ { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
174
+ { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
175
+ { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
176
+ { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
177
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
178
+ { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
179
+ { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
180
+ { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
181
+ { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
182
+ { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
183
+ { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
184
+ { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
185
+ { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
186
+ { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
187
+ { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
188
+ { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
189
+ ]
190
+
191
+ infisical:
192
+ enabled: true
193
+ env: "ephemeral-us-east-1"
194
+
195
+ replicas: 1
196
+ autoscaling:
197
+ enabled: false
198
+
199
+ resources:
200
+ requests:
201
+ cpu: 2
202
+ memory: 4Gi
203
+ limits:
204
+ cpu: 4
205
+ memory: 8Gi
chart/env/prod.yaml ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image:
2
+ repository: huggingface
3
+ name: chat-ui
4
+
5
+ nodeSelector:
6
+ role-huggingchat: "true"
7
+
8
+ tolerations:
9
+ - key: "huggingface.co/huggingchat"
10
+ operator: "Equal"
11
+ value: "true"
12
+ effect: "NoSchedule"
13
+
14
+ serviceAccount:
15
+ enabled: true
16
+ create: true
17
+ name: huggingchat-prod
18
+
19
+ ingress:
20
+ path: "/chat"
21
+ annotations:
22
+ alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
23
+ alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
24
+ alb.ingress.kubernetes.io/load-balancer-name: "hub-utils-prod-cloudfront"
25
+ alb.ingress.kubernetes.io/group.name: "hub-utils-prod-cloudfront"
26
+ alb.ingress.kubernetes.io/scheme: "internal"
27
+ alb.ingress.kubernetes.io/ssl-redirect: "443"
28
+ alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
29
+ alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
30
+ alb.ingress.kubernetes.io/target-type: "ip"
31
+ alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/5b25b145-75db-4837-b9f3-7f238ba8a9c7,arn:aws:acm:us-east-1:707930574880:certificate/bfdf509c-f44b-400f-b9e1-6f7a861abe91"
32
+ kubernetes.io/ingress.class: "alb"
33
+
34
+ ingressInternal:
35
+ enabled: true
36
+ path: "/chat"
37
+ annotations:
38
+ alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
39
+ alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
40
+ alb.ingress.kubernetes.io/group.name: "hub-prod-internal-public"
41
+ alb.ingress.kubernetes.io/load-balancer-name: "hub-prod-internal-public"
42
+ alb.ingress.kubernetes.io/ssl-redirect: "443"
43
+ alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
44
+ alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
45
+ alb.ingress.kubernetes.io/target-type: "ip"
46
+ alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/5b25b145-75db-4837-b9f3-7f238ba8a9c7,arn:aws:acm:us-east-1:707930574880:certificate/bfdf509c-f44b-400f-b9e1-6f7a861abe91"
47
+ kubernetes.io/ingress.class: "alb"
48
+
49
+ envVars:
50
+ COUPLE_SESSION_WITH_COOKIE_NAME: "token"
51
+ OPENID_SCOPES: "openid profile inference-api"
52
+ USE_USER_TOKEN: "true"
53
+ AUTOMATIC_LOGIN: "false"
54
+
55
+ ADDRESS_HEADER: "X-Forwarded-For"
56
+ APP_BASE: "/chat"
57
+ ALLOW_IFRAME: "false"
58
+ COOKIE_SAMESITE: "lax"
59
+ COOKIE_SECURE: "true"
60
+ EXPOSE_API: "true"
61
+ METRICS_ENABLED: "true"
62
+ LOG_LEVEL: "debug"
63
+ NODE_LOG_STRUCTURED_DATA: "true"
64
+
65
+ OPENAI_BASE_URL: "https://router.huggingface.co/v1"
66
+ PUBLIC_APP_ASSETS: "huggingchat"
67
+ PUBLIC_APP_NAME: "HuggingChat"
68
+ PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
69
+ PUBLIC_ORIGIN: "https://huggingface.co"
70
+ PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"
71
+
72
+ TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
73
+ LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
74
+ LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
75
+ LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
76
+ LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
77
+ LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
78
+ LLM_ROUTER_ENABLE_MULTIMODAL: "true"
79
+ LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Thinking"
80
+ PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
81
+ PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
82
+ PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
83
+ MODELS: >
84
+ [
85
+ { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
86
+ { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
87
+ { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
88
+ { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
89
+ { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
90
+ { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
91
+ { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
92
+ { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
93
+ { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
94
+ { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
95
+ { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
96
+ { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
97
+ { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
98
+ { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
99
+ { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
100
+ { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
101
+ { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
102
+ { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
103
+ { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
104
+ { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
105
+ { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
106
+ { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
107
+ { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
108
+ { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
109
+ { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
110
+ { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
111
+ { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
112
+ { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
113
+ { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
114
+ { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
115
+ { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
116
+ { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
117
+ { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
118
+ { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
119
+ { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
120
+ { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
121
+ { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
122
+ { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
123
+ { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
124
+ { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
125
+ { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
126
+ { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
127
+ { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
128
+ { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
129
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
130
+ { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
131
+ { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
132
+ { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
133
+ { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
134
+ { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
135
+ { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
136
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
137
+ { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
138
+ { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
139
+ { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
140
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
141
+ { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
142
+ { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
143
+ { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
144
+ { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
145
+ { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
146
+ { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
147
+ { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
148
+ { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
149
+ { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
150
+ { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
151
+ { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
152
+ { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
153
+ { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
154
+ { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
155
+ { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
156
+ { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
157
+ { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
158
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
159
+ { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
160
+ { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
161
+ { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
162
+ { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
163
+ { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
164
+ { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
165
+ { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
166
+ { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
167
+ { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
168
+ { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
169
+ { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
170
+ { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
171
+ { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
172
+ { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
173
+ { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
174
+ { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
175
+ { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
176
+ { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
177
+ { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
178
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
179
+ { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
180
+ { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
181
+ { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
182
+ { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
183
+ { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
184
+ { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
185
+ { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
186
+ { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
187
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
188
+ { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
189
+ { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
190
+ { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
191
+ { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
192
+ { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
193
+ { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
194
+ { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
195
+ { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
196
+ { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
197
+ { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
198
+ { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
199
+ ]
200
+
201
+ infisical:
202
+ enabled: true
203
+ env: "prod-us-east-1"
204
+
205
+ autoscaling:
206
+ enabled: true
207
+ minReplicas: 2
208
+ maxReplicas: 30
209
+ targetMemoryUtilizationPercentage: "50"
210
+ targetCPUUtilizationPercentage: "50"
211
+
212
+ resources:
213
+ requests:
214
+ cpu: 2
215
+ memory: 4Gi
216
+ limits:
217
+ cpu: 4
218
+ memory: 8Gi
chart/templates/_helpers.tpl ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- define "name" -}}
2
+ {{- default $.Release.Name | trunc 63 | trimSuffix "-" -}}
3
+ {{- end -}}
4
+
5
+ {{- define "app.name" -}}
6
+ chat-ui
7
+ {{- end -}}
8
+
9
+ {{- define "labels.standard" -}}
10
+ release: {{ $.Release.Name | quote }}
11
+ heritage: {{ $.Release.Service | quote }}
12
+ chart: "{{ include "name" . }}"
13
+ app: "{{ include "app.name" . }}"
14
+ {{- end -}}
15
+
16
+ {{- define "labels.resolver" -}}
17
+ release: {{ $.Release.Name | quote }}
18
+ heritage: {{ $.Release.Service | quote }}
19
+ chart: "{{ include "name" . }}"
20
+ app: "{{ include "app.name" . }}-resolver"
21
+ {{- end -}}
22
+
chart/templates/config.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: v1
2
+ kind: ConfigMap
3
+ metadata:
4
+ labels: {{ include "labels.standard" . | nindent 4 }}
5
+ name: {{ include "name" . }}
6
+ namespace: {{ .Release.Namespace }}
7
+ data:
8
+ {{- range $key, $value := $.Values.envVars }}
9
+ {{ $key }}: {{ $value | quote }}
10
+ {{- end }}
chart/templates/deployment.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ labels: {{ include "labels.standard" . | nindent 4 }}
5
+ name: {{ include "name" . }}
6
+ namespace: {{ .Release.Namespace }}
7
+ {{- if .Values.infisical.enabled }}
8
+ annotations:
9
+ secrets.infisical.com/auto-reload: "true"
10
+ {{- end }}
11
+ spec:
12
+ progressDeadlineSeconds: 600
13
+ {{- if not $.Values.autoscaling.enabled }}
14
+ replicas: {{ .Values.replicas }}
15
+ {{- end }}
16
+ revisionHistoryLimit: 10
17
+ selector:
18
+ matchLabels: {{ include "labels.standard" . | nindent 6 }}
19
+ strategy:
20
+ rollingUpdate:
21
+ maxSurge: 25%
22
+ maxUnavailable: 25%
23
+ type: RollingUpdate
24
+ template:
25
+ metadata:
26
+ labels: {{ include "labels.standard" . | nindent 8 }}
27
+ annotations:
28
+ checksum/config: {{ include (print $.Template.BasePath "/config.yaml") . | sha256sum }}
29
+ {{- if $.Values.envVars.NODE_LOG_STRUCTURED_DATA }}
30
+ co.elastic.logs/json.expand_keys: "true"
31
+ {{- end }}
32
+ spec:
33
+ {{- if .Values.serviceAccount.enabled }}
34
+ serviceAccountName: "{{ .Values.serviceAccount.name | default (include "name" .) }}"
35
+ {{- end }}
36
+ containers:
37
+ - name: chat-ui
38
+ image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}"
39
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
40
+ readinessProbe:
41
+ failureThreshold: 30
42
+ periodSeconds: 10
43
+ httpGet:
44
+ path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck
45
+ port: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
46
+ livenessProbe:
47
+ failureThreshold: 30
48
+ periodSeconds: 10
49
+ httpGet:
50
+ path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck
51
+ port: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
52
+ ports:
53
+ - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
54
+ name: http
55
+ protocol: TCP
56
+ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
57
+ - containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
58
+ name: metrics
59
+ protocol: TCP
60
+ {{- end }}
61
+ resources: {{ toYaml .Values.resources | nindent 12 }}
62
+ {{- with $.Values.extraEnv }}
63
+ env:
64
+ {{- toYaml . | nindent 14 }}
65
+ {{- end }}
66
+ envFrom:
67
+ - configMapRef:
68
+ name: {{ include "name" . }}
69
+ {{- if $.Values.infisical.enabled }}
70
+ - secretRef:
71
+ name: {{ include "name" $ }}-secs
72
+ {{- end }}
73
+ {{- with $.Values.extraEnvFrom }}
74
+ {{- toYaml . | nindent 14 }}
75
+ {{- end }}
76
+ nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }}
77
+ tolerations: {{ toYaml .Values.tolerations | nindent 8 }}
78
+ volumes:
79
+ - name: config
80
+ configMap:
81
+ name: {{ include "name" . }}
chart/templates/hpa.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if $.Values.autoscaling.enabled }}
2
+ apiVersion: autoscaling/v2
3
+ kind: HorizontalPodAutoscaler
4
+ metadata:
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" . }}
7
+ namespace: {{ .Release.Namespace }}
8
+ spec:
9
+ scaleTargetRef:
10
+ apiVersion: apps/v1
11
+ kind: Deployment
12
+ name: {{ include "name" . }}
13
+ minReplicas: {{ $.Values.autoscaling.minReplicas }}
14
+ maxReplicas: {{ $.Values.autoscaling.maxReplicas }}
15
+ metrics:
16
+ {{- if ne "" $.Values.autoscaling.targetMemoryUtilizationPercentage }}
17
+ - type: Resource
18
+ resource:
19
+ name: memory
20
+ target:
21
+ type: Utilization
22
+ averageUtilization: {{ $.Values.autoscaling.targetMemoryUtilizationPercentage | int }}
23
+ {{- end }}
24
+ {{- if ne "" $.Values.autoscaling.targetCPUUtilizationPercentage }}
25
+ - type: Resource
26
+ resource:
27
+ name: cpu
28
+ target:
29
+ type: Utilization
30
+ averageUtilization: {{ $.Values.autoscaling.targetCPUUtilizationPercentage | int }}
31
+ {{- end }}
32
+ behavior:
33
+ scaleDown:
34
+ stabilizationWindowSeconds: 600
35
+ policies:
36
+ - type: Percent
37
+ value: 10
38
+ periodSeconds: 60
39
+ scaleUp:
40
+ stabilizationWindowSeconds: 0
41
+ policies:
42
+ - type: Pods
43
+ value: 1
44
+ periodSeconds: 30
45
+ {{- end }}
chart/templates/infisical.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if .Values.infisical.enabled }}
2
+ apiVersion: secrets.infisical.com/v1alpha1
3
+ kind: InfisicalSecret
4
+ metadata:
5
+ name: {{ include "name" $ }}-infisical-secret
6
+ namespace: {{ $.Release.Namespace }}
7
+ spec:
8
+ authentication:
9
+ universalAuth:
10
+ credentialsRef:
11
+ secretName: {{ .Values.infisical.operatorSecretName | quote }}
12
+ secretNamespace: {{ .Values.infisical.operatorSecretNamespace | quote }}
13
+ secretsScope:
14
+ envSlug: {{ .Values.infisical.env | quote }}
15
+ projectSlug: {{ .Values.infisical.project | quote }}
16
+ secretsPath: /
17
+ hostAPI: {{ .Values.infisical.url | quote }}
18
+ managedSecretReference:
19
+ creationPolicy: Owner
20
+ secretName: {{ include "name" $ }}-secs
21
+ secretNamespace: {{ .Release.Namespace | quote }}
22
+ secretType: Opaque
23
+ resyncInterval: {{ .Values.infisical.resyncInterval }}
24
+ {{- end }}
chart/templates/ingress-internal.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if $.Values.ingressInternal.enabled }}
2
+ apiVersion: networking.k8s.io/v1
3
+ kind: Ingress
4
+ metadata:
5
+ annotations: {{ toYaml .Values.ingressInternal.annotations | nindent 4 }}
6
+ labels: {{ include "labels.standard" . | nindent 4 }}
7
+ name: {{ include "name" . }}-internal
8
+ namespace: {{ .Release.Namespace }}
9
+ spec:
10
+ {{ if $.Values.ingressInternal.className }}
11
+ ingressClassName: {{ .Values.ingressInternal.className }}
12
+ {{ end }}
13
+ {{- with .Values.ingressInternal.tls }}
14
+ tls:
15
+ - hosts:
16
+ - {{ $.Values.domain | quote }}
17
+ {{- with .secretName }}
18
+ secretName: {{ . }}
19
+ {{- end }}
20
+ {{- end }}
21
+ rules:
22
+ - host: {{ .Values.domain }}
23
+ http:
24
+ paths:
25
+ - backend:
26
+ service:
27
+ name: {{ include "name" . }}
28
+ port:
29
+ name: http
30
+ path: {{ $.Values.ingressInternal.path | default "/" }}
31
+ pathType: Prefix
32
+ {{- end }}
chart/templates/ingress.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if $.Values.ingress.enabled }}
2
+ apiVersion: networking.k8s.io/v1
3
+ kind: Ingress
4
+ metadata:
5
+ annotations: {{ toYaml .Values.ingress.annotations | nindent 4 }}
6
+ labels: {{ include "labels.standard" . | nindent 4 }}
7
+ name: {{ include "name" . }}
8
+ namespace: {{ .Release.Namespace }}
9
+ spec:
10
+ {{ if $.Values.ingress.className }}
11
+ ingressClassName: {{ .Values.ingress.className }}
12
+ {{ end }}
13
+ {{- with .Values.ingress.tls }}
14
+ tls:
15
+ - hosts:
16
+ - {{ $.Values.domain | quote }}
17
+ {{- with .secretName }}
18
+ secretName: {{ . }}
19
+ {{- end }}
20
+ {{- end }}
21
+ rules:
22
+ - host: {{ .Values.domain }}
23
+ http:
24
+ paths:
25
+ - backend:
26
+ service:
27
+ name: {{ include "name" . }}
28
+ port:
29
+ name: http
30
+ path: {{ $.Values.ingress.path | default "/" }}
31
+ pathType: Prefix
32
+ {{- end }}
chart/templates/network-policy.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if $.Values.networkPolicy.enabled }}
2
+ apiVersion: networking.k8s.io/v1
3
+ kind: NetworkPolicy
4
+ metadata:
5
+ name: {{ include "name" . }}
6
+ namespace: {{ .Release.Namespace }}
7
+ spec:
8
+ egress:
9
+ - ports:
10
+ - port: 53
11
+ protocol: UDP
12
+ to:
13
+ - namespaceSelector:
14
+ matchLabels:
15
+ kubernetes.io/metadata.name: kube-system
16
+ podSelector:
17
+ matchLabels:
18
+ k8s-app: kube-dns
19
+ - to:
20
+ {{- range $ip := .Values.networkPolicy.allowedBlocks }}
21
+ - ipBlock:
22
+ cidr: {{ $ip | quote }}
23
+ {{- end }}
24
+ - to:
25
+ - ipBlock:
26
+ cidr: 0.0.0.0/0
27
+ except:
28
+ - 10.0.0.0/8
29
+ - 172.16.0.0/12
30
+ - 192.168.0.0/16
31
+ - 169.254.169.254/32
32
+ podSelector:
33
+ matchLabels: {{ include "labels.standard" . | nindent 6 }}
34
+ policyTypes:
35
+ - Egress
36
+ {{- end }}
chart/templates/service-account.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if and .Values.serviceAccount.enabled .Values.serviceAccount.create }}
2
+ apiVersion: v1
3
+ kind: ServiceAccount
4
+ automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
5
+ metadata:
6
+ name: "{{ .Values.serviceAccount.name | default (include "name" .) }}"
7
+ namespace: {{ .Release.Namespace }}
8
+ labels: {{ include "labels.standard" . | nindent 4 }}
9
+ {{- with .Values.serviceAccount.annotations }}
10
+ annotations:
11
+ {{- toYaml . | nindent 4 }}
12
+ {{- end }}
13
+ {{- end }}
chart/templates/service-monitor.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
2
+ apiVersion: monitoring.coreos.com/v1
3
+ kind: ServiceMonitor
4
+ metadata:
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" . }}
7
+ namespace: {{ .Release.Namespace }}
8
+ spec:
9
+ selector:
10
+ matchLabels: {{ include "labels.standard" . | nindent 6 }}
11
+ endpoints:
12
+ - port: metrics
13
+ path: /metrics
14
+ interval: 10s
15
+ scheme: http
16
+ scrapeTimeout: 10s
17
+ {{- end }}
chart/templates/service.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: v1
2
+ kind: Service
3
+ metadata:
4
+ name: "{{ include "name" . }}"
5
+ annotations: {{ toYaml .Values.service.annotations | nindent 4 }}
6
+ namespace: {{ .Release.Namespace }}
7
+ labels: {{ include "labels.standard" . | nindent 4 }}
8
+ spec:
9
+ ports:
10
+ - name: http
11
+ port: 80
12
+ protocol: TCP
13
+ targetPort: http
14
+ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
15
+ - name: metrics
16
+ port: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
17
+ protocol: TCP
18
+ targetPort: metrics
19
+ {{- end }}
20
+ selector: {{ include "labels.standard" . | nindent 4 }}
21
+ type: {{.Values.service.type}}
chart/values.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image:
2
+ repository: ghcr.io/huggingface
3
+ name: chat-ui
4
+ tag: 0.0.0-latest
5
+ pullPolicy: IfNotPresent
6
+
7
+ replicas: 3
8
+
9
+ domain: huggingface.co
10
+
11
+ networkPolicy:
12
+ enabled: false
13
+ allowedBlocks: []
14
+
15
+ service:
16
+ type: NodePort
17
+ annotations: { }
18
+
19
+ serviceAccount:
20
+ enabled: false
21
+ create: false
22
+ name: ""
23
+ automountServiceAccountToken: true
24
+ annotations: { }
25
+
26
+ ingress:
27
+ enabled: true
28
+ path: "/"
29
+ annotations: { }
30
+ # className: "nginx"
31
+ tls: { }
32
+ # secretName: XXX
33
+
34
+ ingressInternal:
35
+ enabled: false
36
+ path: "/"
37
+ annotations: { }
38
+ # className: "nginx"
39
+ tls: { }
40
+
41
+ resources:
42
+ requests:
43
+ cpu: 2
44
+ memory: 4Gi
45
+ limits:
46
+ cpu: 2
47
+ memory: 4Gi
48
+ nodeSelector: {}
49
+ tolerations: []
50
+
51
+ envVars: { }
52
+
53
+ infisical:
54
+ enabled: false
55
+ env: ""
56
+ project: "huggingchat-v2-a1"
57
+ url: ""
58
+ resyncInterval: 60
59
+ operatorSecretName: "huggingchat-operator-secrets"
60
+ operatorSecretNamespace: "hub-utils"
61
+
62
+ # Allow to environment injections on top or instead of infisical
63
+ extraEnvFrom: []
64
+ extraEnv: []
65
+
66
+ autoscaling:
67
+ enabled: false
68
+ minReplicas: 1
69
+ maxReplicas: 2
70
+ targetMemoryUtilizationPercentage: ""
71
+ targetCPUUtilizationPercentage: ""
72
+
73
+ ## Metrics removed; monitoring configuration no longer used
docker-compose.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For development only
2
+ # Set MONGODB_URL=mongodb://localhost:27017 in .env.local to use this container
3
+ services:
4
+ mongo:
5
+ image: mongo:8
6
+ hostname: mongodb
7
+ ports:
8
+ - ${LOCAL_MONGO_PORT:-27017}:27017
9
+ command: --replSet rs0 --bind_ip_all #--setParameter notablescan=1
10
+ mem_limit: "5g"
11
+ mem_reservation: "3g"
12
+ healthcheck:
13
+ # need to specify the hostname here because the default is the container name, and we run the app outside of docker
14
+ test: test $$(mongosh --quiet --eval 'try {rs.status().ok} catch(e) {rs.initiate({_id:"rs0",members:[{_id:0,host:"127.0.0.1:${LOCAL_MONGO_PORT:-27017}"}]}).ok}') -eq 1
15
+ interval: 5s
16
+ volumes:
17
+ - mongodb-data:/data/db
18
+ restart: always
19
+
20
+ volumes:
21
+ mongodb-data: