Compare commits

..

1 Commits

Author SHA1 Message Date
Juan Calderon-Perez
a6a69a17f7
Add support for dynamic threads 2023-09-19 23:06:35 -04:00
61 changed files with 4950 additions and 6740 deletions

View File

@ -12,6 +12,7 @@ CONTRIBUTING.md
Dockerfile
docker-compose.yml
docker-compose.dev.yml
/vendor
.vscode/
**/node_modules/

View File

@ -1,15 +0,0 @@
name: Sweep Issue
title: 'Sweep: '
description: For small bugs, features, refactors, and tests to be handled by Sweep, an AI-powered junior developer.
labels: sweep
body:
- type: textarea
id: description
attributes:
label: Details
description: Tell Sweep where and what to edit and provide enough context for a new developer to the codebase
placeholder: |
Unit Tests: Write unit tests for <FILE>. Test each function in the file. Make sure to test edge cases.
Bugs: The bug might be in <FILE>. Here are the logs: ...
Features: the new endpoint should use the ... class from <FILE> because it contains ... logic.
Refactors: We are migrating this function to ... version because ...

View File

@ -13,17 +13,12 @@ categories:
- title: '📚 Documentation:'
labels:
- '📒 Documentation'
- title: '🧠 Models'
labels:
- '🧠 Models'
- title: '🧹 Updates:'
labels:
- '🧹 Updates'
- title: '🤖 Dependencies:'
labels:
- '🤖 Dependencies'
change-template: '- $TITLE (#$NUMBER)'
change-title-escapes: '\<*_&'
change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
exclude-contributors:
- dependabot
- dependabot[bot]
@ -43,7 +38,6 @@ version-resolver:
- '☢️ Bug'
- '🤖 Dependencies'
- '🧹 Updates'
- '🧠 Models'
default: patch
template: |
$CHANGES
@ -62,18 +56,15 @@ autolabeler:
- '*.md'
title:
- '/(docs|doc:|\[doc\]|typos|comment|documentation)/i'
- label: '🧠 Models'
files:
- 'api/src/serge/data/*.json'
- label: '☢️ Bug'
title:
- '/(fix|bug|missing|correct)/i'
- '/(fix|race|bug|missing|correct)/i'
- label: '🧹 Updates'
title:
- '/(improve|update|migrate|refactor|deprecated|remove|unused|test)/i'
- '/(improve|update|update|refactor|deprecated|remove|unused|test)/i'
- label: '🤖 Dependencies'
title:
- '/(bump|dependencies)/i'
- label: '✏️ Feature'
title:
- '/(feature|feat|create|implement)/i'
- '/(feature|feat|create|implement|add)/i'

View File

@ -1,4 +1,4 @@
name: CI Checks
name: CI/CD Process
on:
push:
@ -49,10 +49,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: 'pip' # caching pip dependencies
- name: Install dependencies with poetry
working-directory: ./api
run: |
@ -61,15 +60,14 @@ jobs:
- name: Run unit tests
working-directory: ./api
run: |
poetry run python -m pytest -v --color=yes
poetry run python -m pytest
check-sh-files:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: luizm/action-sh-checker@v0.8.0
- uses: luizm/action-sh-checker@v0.7.0
env:
SHFMT_OPTS: "-s"
SHELLCHECK_OPTS: "-P scripts/ -e SC1091"
with:
sh_checker_only_diff: false
sh_checker_comment: false
@ -77,25 +75,24 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: 'pip' # caching pip dependencies
- name: Run ruff check
uses: chartboost/ruff-action@v1
with:
src: "./api"
args: "check --verbose"
- name: Run ruff format check
uses: chartboost/ruff-action@v1
args: "--verbose"
- name: Run black check
uses: psf/black@stable
with:
options: "--check --diff --verbose"
src: "./api"
args: "format --check --verbose"
check-web-code:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
- uses: actions/setup-node@v3
with:
node-version: '20'
- name: Install Web

View File

@ -1,4 +1,4 @@
name: Docker
name: CI/CD Docker Build/Publish
on:
push:
@ -58,7 +58,7 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and Publish Docker Image
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: .
push: ${{ github.event_name != 'pull_request' }}

View File

@ -1,4 +1,4 @@
name: Helm
name: Lint and Test Helm Chart
on:
push:
@ -37,17 +37,17 @@ jobs:
fetch-depth: 0
- name: Set up Helm
uses: azure/setup-helm@v4
uses: azure/setup-helm@v3
with:
version: v3.12.3
- uses: actions/setup-python@v5
- uses: actions/setup-python@v4
with:
python-version: '3.10'
check-latest: true
- name: Set up chart-testing
uses: helm/chart-testing-action@v2.6.1
uses: helm/chart-testing-action@v2.4.0
- name: Run chart-testing (list-changed)
id: list-changed
@ -63,7 +63,7 @@ jobs:
- name: Create kind cluster
if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@v1.10.0
uses: helm/kind-action@v1.8.0
- name: Run chart-testing (install)
if: steps.list-changed.outputs.changed == 'true'

View File

@ -1,4 +1,4 @@
name: LLM Healthcheck
name: LLM Models Healthcheck
on:
push:
@ -34,7 +34,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies with poetry
@ -45,4 +45,4 @@ jobs:
- name: Run model health check
working-directory: ./api
run: |
poetry run python -m pytest -v --color=yes test/healthcheck_models.py
poetry run python -m pytest test/healthcheck_models.py

View File

@ -17,6 +17,6 @@ jobs:
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v6
- uses: release-drafter/release-drafter@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@ -12,4 +12,3 @@ api/static/*
**/node_modules/
**/dist
**/.mypy_cache/
.vscode

View File

@ -29,14 +29,11 @@ COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
COPY ./api /usr/src/app/api
COPY scripts/deploy.sh /usr/src/app/deploy.sh
COPY scripts/serge.env /usr/src/app/serge.env
COPY vendor/requirements.txt /usr/src/app/requirements.txt
# Install api dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends dumb-init libgomp1 musl-dev \
&& apt-get install -y --no-install-recommends cmake build-essential dumb-init curl \
&& pip install --no-cache-dir ./api \
&& pip install -r /usr/src/app/requirements.txt \
&& apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \
&& chmod 755 /usr/src/app/deploy.sh \
&& chmod 755 /usr/local/bin/redis-server \
@ -45,8 +42,7 @@ RUN apt-get update \
&& mkdir -p /data/db \
&& mkdir -p /usr/src/app/weights \
&& echo "appendonly yes" >> /etc/redis/redis.conf \
&& echo "dir /data/db/" >> /etc/redis/redis.conf \
&& ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
&& echo "dir /data/db/" >> /etc/redis/redis.conf
EXPOSE 8008
ENTRYPOINT ["/usr/bin/dumb-init", "--"]

View File

@ -17,15 +17,13 @@ ENV NODE_ENV='development'
# Install dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends dumb-init musl-dev
&& apt-get install -y --no-install-recommends cmake build-essential dumb-init curl
# Copy database, source code, and scripts
COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
COPY --from=node_base /usr/local /usr/local
COPY scripts/dev.sh /usr/src/app/dev.sh
COPY scripts/serge.env /usr/src/app/serge.env
COPY vendor/requirements.txt /usr/src/app/requirements.txt
COPY ./web/package.json ./web/package-lock.json ./
RUN npm ci \
@ -36,8 +34,7 @@ RUN npm ci \
&& mkdir -p /data/db \
&& mkdir -p /usr/src/app/weights \
&& echo "appendonly yes" >> /etc/redis/redis.conf \
&& echo "dir /data/db/" >> /etc/redis/redis.conf \
&& ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
&& echo "dir /data/db/" >> /etc/redis/redis.conf
EXPOSE 8008
EXPOSE 9124

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023-present Nathan Sarrazin and Contributors
Copyright (c) 2023 Nathan Sarrazin and contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.

View File

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2023 Nathan Sarrazin and contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

154
README.md
View File

@ -3,7 +3,7 @@
![License](https://img.shields.io/github/license/serge-chat/serge)
[![Discord](https://img.shields.io/discord/1088427963801948201?label=Discord)](https://discord.gg/62Hc6FEYQH)
Serge is a chat interface crafted with [llama.cpp](https://github.com/ggerganov/llama.cpp) for running GGUF models. No API keys, entirely self-hosted!
Serge is a chat interface crafted with [llama.cpp](https://github.com/ggerganov/llama.cpp) for running Alpaca models. No API keys, entirely self-hosted!
- 🌐 **SvelteKit** frontend
- 💾 **[Redis](https://github.com/redis/redis)** for storing chat history & parameters
@ -43,24 +43,13 @@ volumes:
datadb:
```
Then, just visit http://localhost:8008, You can find the API documentation at http://localhost:8008/api/docs
Then, just visit http://localhost:8008/, You can find the API documentation at http://localhost:8008/api/docs
### 🌍 Environment Variables
## 🖥️ Windows Setup
The following Environment Variables are available:
Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
| Variable Name | Description | Default Value |
|-----------------------|---------------------------------------------------------|--------------------------------------|
| `SERGE_DATABASE_URL` | Database connection string | `sqlite:////data/db/sql_app.db` |
| `SERGE_JWT_SECRET` | Key for auth token encryption. Use a random string | `uF7FGN5uzfGdFiPzR` |
| `SERGE_SESSION_EXPIRY`| Duration in minutes before a user must reauthenticate | `60` |
| `NODE_ENV` | Node.js running environment | `production` |
## 🖥️ Windows
Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
## ☁️ Kubernetes
## ☁️ Kubernetes & Docker Compose Setup
Instructions for setting up Serge on Kubernetes can be found in the [wiki](https://github.com/serge-chat/serge/wiki/Integrating-Serge-in-your-orchestration#kubernetes-example).
@ -68,57 +57,79 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https
| Category | Models |
|:-------------:|:-------|
| **Alfred** | 40B-1023 |
| **BioMistral** | 7B |
| **Code** | 13B, 33B |
| **CodeLLaMA** | 7B, 7B-Instruct, 7B-Python, 13B, 13B-Instruct, 13B-Python, 34B, 34B-Instruct, 34B-Python |
| **Codestral** | 22B v0.1 |
| **Gemma** | 2B, 1.1-2B-Instruct, 7B, 1.1-7B-Instruct |
| **Gorilla** | Falcon-7B-HF-v0, 7B-HF-v1, Openfunctions-v1, Openfunctions-v2 |
| **Falcon** | 7B, 7B-Instruct, 40B, 40B-Instruct |
| **LLaMA 2** | 7B, 7B-Chat, 7B-Coder, 13B, 13B-Chat, 70B, 70B-Chat, 70B-OASST |
| **LLaMA 3** | 11B-Instruct, 13B-Instruct, 16B-Instruct |
| **LLaMA Pro** | 8B, 8B-Instruct |
| **Med42** | 70B |
| **Medalpaca** | 13B |
| **Medicine** | Chat, LLM |
| **Meditron** | 7B, 7B-Chat, 70B |
| **Meta-LlaMA-3** | 8B, 8B-Instruct, 70B, 70B-Instruct |
| **Mistral** | 7B-V0.1, 7B-Instruct-v0.2, 7B-OpenOrca |
| **MistralLite** | 7B |
| **Mixtral** | 8x7B-v0.1, 8x7B-Dolphin-2.7, 8x7B-Instruct-v0.1 |
| **Neural-Chat** | 7B-v3.3 |
| **Notus** | 7B-v1 |
| **Notux** | 8x7b-v1 |
| **Nous-Hermes 2** | Mistral-7B-DPO, Mixtral-8x7B-DPO, Mistral-8x7B-SFT |
| **OpenChat** | 7B-v3.5-1210 |
| **OpenCodeInterpreter** | DS-6.7B, DS-33B, CL-7B, CL-13B, CL-70B |
| **OpenLLaMA** | 3B-v2, 7B-v2, 13B-v2 |
| **Orca 2** | 7B, 13B |
| **Phi 2** | 2.7B |
| **Phi 3** | mini-4k-instruct, medium-4k-instruct, medium-128k-instruct |
| **Python Code** | 13B, 33B |
| **PsyMedRP** | 13B-v1, 20B-v1 |
| **Starling LM** | 7B-Alpha |
| **SOLAR** | 10.7B-v1.0, 10.7B-instruct-v1.0 |
| **TinyLlama** | 1.1B |
| **Vicuna** | 7B-v1.5, 13B-v1.5, 33B-v1.3, 33B-Coder |
| **WizardLM** | 2-7B, 13B-v1.2, 70B-v1.0 |
| **Zephyr** | 3B, 7B-Alpha, 7B-Beta |
| **Alpaca 🦙** | Alpaca-LoRA-65B, GPT4-Alpaca-LoRA-30B |
| **Chronos 🌑**| Chronos-13B, Chronos-33B, Chronos-Hermes-13B |
| **GPT4All 🌍**| GPT4All-13B |
| **Koala 🐨** | Koala-7B, Koala-13B |
| **LLaMA 🦙** | FinLLaMA-33B, LLaMA-Supercot-30B, LLaMA2 7B, LLaMA2 13B, LLaMA2 70B |
| **Lazarus 💀**| Lazarus-30B |
| **Nous 🧠** | Nous-Hermes-13B |
| **OpenAssistant 🎙️** | OpenAssistant-30B |
| **Orca 🐬** | Orca-Mini-v2-7B, Orca-Mini-v2-13B, OpenOrca-Preview1-13B |
| **Samantha 👩**| Samantha-7B, Samantha-13B, Samantha-33B |
| **Vicuna 🦙** | Stable-Vicuna-13B, Vicuna-CoT-7B, Vicuna-CoT-13B, Vicuna-v1.1-7B, Vicuna-v1.1-13B, VicUnlocked-30B, VicUnlocked-65B |
| **Wizard 🧙** | Wizard-Mega-13B, WizardLM-Uncensored-7B, WizardLM-Uncensored-13B, WizardLM-Uncensored-30B, WizardCoder-Python-13B-V1.0 |
Additional models can be requested by opening a GitHub issue. Other models are also available at [Serge Models](https://github.com/Smartappli/serge-models).
Additional weights can be added to the `serge_weights` volume using `docker cp`:
```bash
docker cp ./my_weight.bin serge:/usr/src/app/weights/
```
## ⚠️ Memory Usage
LLaMA will crash if you don't have enough available memory for the model
LLaMA will crash if you don't have enough available memory for the model:
| Model | Max RAM Required |
|-------------|------------------|
| 7B | 4.5GB |
| 7B-q2_K | 5.37GB |
| 7B-q3_K_L | 6.10GB |
| 7B-q4_1 | 6.71GB |
| 7B-q4_K_M | 6.58GB |
| 7B-q5_1 | 7.56GB |
| 7B-q5_K_M | 7.28GB |
| 7B-q6_K | 8.03GB |
| 7B-q8_0 | 9.66GB |
| 13B | 12GB |
| 13B-q2_K | 8.01GB |
| 13B-q3_K_L | 9.43GB |
| 13B-q4_1 | 10.64GB |
| 13B-q4_K_M | 10.37GB |
| 13B-q5_1 | 12.26GB |
| 13B-q5_K_M | 11.73GB |
| 13B-q6_K | 13.18GB |
| 13B-q8_0 | 16.33GB |
| 33B | 20GB |
| 33B-q2_K | 16.21GB |
| 33B-q3_K_L | 19.78GB |
| 33B-q4_1 | 22.83GB |
| 33B-q4_K_M | 22.12GB |
| 33B-q5_1 | 26.90GB |
| 33B-q5_K_M | 25.55GB |
| 33B-q6_K | 29.19GB |
| 33B-q8_0 | 37.06GB |
| 65B | 50GB |
| 65B-q2_K | 29.95GB |
| 65B-q3_K_L | 37.15GB |
| 65B-q4_1 | 43.31GB |
| 65B-q4_K_M | 41.85GB |
| 65B-q5_1 | 51.47GB |
| 65B-q5_K_M | 48.74GB |
| 65B-q6_K | 56.06GB |
| 65B-q8_0 | 71.87GB |
## 💬 Support
Need help? Join our [Discord](https://discord.gg/62Hc6FEYQH)
## ⭐️ Stargazers
<img src="https://starchart.cc/serge-chat/serge.svg" alt="Stargazers over time" style="max-width: 100%">
## 🧾 License
[Nathan Sarrazin](https://github.com/nsarrazin) and [Contributors](https://github.com/serge-chat/serge/graphs/contributors). `Serge` is free and open-source software licensed under the [MIT License](https://github.com/serge-chat/serge/blob/main/LICENSE-MIT) and [Apache-2.0](https://github.com/serge-chat/serge/blob/main/LICENSE-APACHE).
[Nathan Sarrazin](https://github.com/nsarrazin) and [Contributors](https://github.com/serge-chat/serge/graphs/contributors). `Serge` is free and open-source software licensed under the [MIT License](https://github.com/serge-chat/serge/blob/master/LICENSE).
## 🤝 Contributing
@ -127,32 +138,5 @@ If you discover a bug or have a feature idea, feel free to open an issue or PR.
To run Serge in development mode:
```bash
git clone https://github.com/serge-chat/serge.git
cd serge/
docker compose -f docker-compose.dev.yml up --build
```
The solution will accept a python debugger session on port 5678. Example launch.json for VSCode:
```json
{
"version": "0.2.0",
"configurations": [
{
"name": "Remote Debug",
"type": "python",
"request": "attach",
"connect": {
"host": "localhost",
"port": 5678
},
"pathMappings": [
{
"localRoot": "${workspaceFolder}/api",
"remoteRoot": "/usr/src/app/api/"
}
],
"justMyCode": false
}
]
}
```
docker compose -f docker-compose.dev.yml up -d --build
```

View File

@ -1,2 +1 @@
./weights/*.bin**
./weights/*.gguf**
./weights/*.bin**

4
api/.gitignore vendored
View File

@ -157,6 +157,4 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
*.db
#.idea/

2615
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -21,29 +21,53 @@ requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.dependencies]
python=">=3.10,<4.0"
python=">=3.9,<4.0"
asyncio = "^3.4.3"
packaging = "^24.1"
pydantic = "^1.10.17"
sse-starlette = "^1.8.2"
packaging = "^23.1"
pydantic = "^1.10.12"
python-dotenv = "^1.0.0"
python-multipart = "^0.0.6"
pyyaml = "^6.0"
rfc3986 = "^2.0.0"
sentencepiece = "^0.1.99"
sniffio = "^1.3.0"
sse-starlette = "^1.6.5"
starlette = "^0.26.1"
typing-extensions = "^4.12.2"
urllib3 = "^2.2.2"
toml = "^0.10.2"
tqdm = "^4.66.1"
typing-extensions = "^4.8.0"
ujson = "^5.8.0"
urllib3 = "^2.0.4"
uvicorn = "^0.23.2"
uvloop = "^0.17.0"
watchfiles = "^0.20.0"
websockets = "^11.0"
anyio = "^4.0.0"
certifi = "^2023.7.22"
charset-normalizer = "^3.2.0"
click = "^8.1.7"
email-validator = "^2.0.0"
fastapi = "^0.95.1"
huggingface-hub = "^0.24.5"
requests = "^2.32.3"
filelock = "^3.12.4"
h11 = "^0.14.0"
httpcore = "^0.18.0"
httptools = "^0.6.0"
huggingface-hub = "^0.16.4"
idna = "^3.4"
itsdangerous = "^2.1.2"
jinja2 = "^3.1.2"
markupsafe = "^2.1.3"
motor = "^3.3.1"
orjson = "^3.9.7"
dnspython = "^2.4.2"
lazy-model = "^0.2.0"
requests = "^2.31.0"
numpy = "^1.25.2"
langchain = "^0.0.180"
loguru = "^0.7.2"
redis = {extras = ["hiredis"], version = "^5.0.8"}
pytest = "^8.3.2"
hypercorn = {extras = ["trio"], version = "^0.17.3"}
redis = {extras = ["hiredis"], version = "^5.0.0"}
pytest = "^7.4.2"
pyjwt = "^2.9.0"
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
aiofiles = "^24.1.0"
python-multipart = "^0.0.9"
debugpy = "^1.8.5"
sqlalchemy = "^2.0.32"
[tool.ruff]
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F"]
@ -94,3 +118,6 @@ target-version = "py311"
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10
[tool.black]
line-length = 150
target-version = ['py311']

View File

@ -1,110 +0,0 @@
import logging
import uuid
from typing import List, Optional
from serge.schema import user as user_schema
from serge.utils.security import get_password_hash
from sqlalchemy.orm import Session
from serge.models import user as user_model
def get_user(db: Session, username: str) -> Optional[user_schema.User]:
return Mappers.user_db_to_view(
db.query(user_model.User).filter(user_model.User.username == username).first(),
include_auth=True,
)
def get_user_by_email(db: Session, email: str) -> Optional[user_schema.User]:
return Mappers.user_db_to_view(db.query(user_model.User).filter(user_model.User.email == email).first())
def get_users(db: Session, skip: int = 0, limit: int = 100) -> List[user_schema.User]:
return [Mappers.user_db_to_view(u) for u in db.query(user_model.User).offset(skip).limit(limit).all()]
def create_user(db: Session, ua: user_schema.UserAuth) -> Optional[user_schema.User]:
# Check already exists
if get_user(db, ua.username):
logging.error(f"Tried to create new user, but already exists: {ua.username}")
return None
match ua.auth_type:
case 1:
ua.secret = get_password_hash(ua.secret)
case _: # Todo: More auth types
return None
db_user, db_user_auth = Mappers.user_view_to_db(None, ua)
db.add(db_user_auth)
db.add(db_user)
db.commit()
return Mappers.user_db_to_view(db_user)
def update_user(db: Session, u: user_schema.User) -> Optional[user_schema.User]:
user = db.query(user_model.User).filter(user_model.User.username == u.username).first()
if not user:
return None
for k, v in u.dict().items():
if k in ["auth", "chats"]:
continue
setattr(user, k, v)
db.commit()
return user
def create_chat(db: Session, chat: user_schema.Chat):
c = user_model.Chat(owner=chat.owner, chat_id=chat.chat_id)
db.add(c)
db.commit()
def remove_chat(db: Session, chat: user_schema.Chat):
c = db.query(user_model.Chat).filter(user_model.Chat.chat_id == chat.chat_id).one()
db.delete(c)
db.commit()
class Mappers:
@staticmethod
def user_db_to_view(u: user_model.User, include_auth=False) -> user_schema.User:
if not u:
return None
auths = chats = []
if include_auth:
auths = u.auth
# u.auth = []
chats = u.chats
# u.chats = []
app_user = user_schema.User(**{k: v for k, v in u.__dict__.items() if not k.startswith("_") and k not in ["chats", "auth"]})
app_user.auth = [user_schema.UserAuth(username=u.username, secret=x.secret, auth_type=x.auth_type) for x in auths]
app_user.chats = [user_schema.Chat(chat_id=x.chat_id, owner=x.owner) for x in chats]
return app_user
@staticmethod
def user_view_to_db(
u: Optional[user_schema.User] = None, ua: Optional[user_schema.UserAuth] = None
) -> (user_model.User, Optional[user_model.UserAuth]):
assert u or ua, "One of User or UserAuth must be passed"
if not u: # Creating a new user
u = user_schema.User(id=uuid.uuid4(), username=ua.username)
auth = []
if ua:
auth = Mappers.user_auth_view_to_db(ua, u.id)
user = user_model.User(**u.dict())
if auth:
user.auth.append(auth)
for chat in u.chats:
user.chats.append(user_model.Chat(chat_id=chat.chat_id))
return (user, auth)
@staticmethod
def user_auth_view_to_db(ua: user_schema.UserAuth, user_id: uuid.UUID) -> user_model.UserAuth:
if not ua:
return None
return user_model.UserAuth(secret=ua.secret, auth_type=ua.auth_type, user_id=user_id)

File diff suppressed because it is too large Load Diff

View File

@ -1,31 +0,0 @@
import logging
import uuid
from serge.models.settings import Settings
from serge.models.user import User, UserAuth
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
settings = Settings()
engine = create_engine(settings.SERGE_DATABASE_URL, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
def seed_db(db: Session):
sys_u = db.query(User).filter(User.username == "system").first()
if sys_u:
return
system_user = User(
id=uuid.uuid4(),
username="system",
email="",
full_name="Default User",
theme_light=False,
default_prompt="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
is_active=True,
auth=[UserAuth(secret="", auth_type=0)],
)
db.add(system_user)
db.commit()
logging.info("System user created")

View File

@ -5,21 +5,18 @@ from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from loguru import logger
from serge.database import SessionLocal, engine, seed_db
from starlette.responses import FileResponse
from serge.models.settings import Settings
from serge.routers.auth import auth_router
from serge.routers.chat import chat_router
from serge.routers.model import model_router
from serge.routers.ping import ping_router
from serge.routers.user import user_router
from starlette.responses import FileResponse
from serge.models import user as user_models
from serge.utils.convert import convert_all
# Configure logging settings
# Define a logger for the current mo
logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
logger.add(sys.stderr, format="{time} {level} {message}", level="DEBUG")
settings = Settings()
@ -45,17 +42,12 @@ origins = [
"http://localhost:9124",
]
# Seed the database
user_models.Base.metadata.create_all(bind=engine)
app = FastAPI(title="Serge", version="0.0.1", description=description, tags_metadata=tags_metadata)
api_app = FastAPI(title="Serge API")
api_app.include_router(chat_router)
api_app.include_router(ping_router)
api_app.include_router(model_router)
api_app.include_router(auth_router)
api_app.include_router(user_router)
app.mount("/api", api_app)
# handle serving the frontend as static files in production
@ -92,8 +84,8 @@ async def start_database():
for file in files:
os.remove(WEIGHTS + file)
db = SessionLocal()
seed_db(db)
logger.info("initializing models")
convert_all("/usr/src/app/weights/", "/usr/src/app/weights/tokenizer.model")
app.add_middleware(

View File

@ -14,7 +14,6 @@ class ChatParameters(BaseModel):
# logits_all: bool
# vocab_only: bool
# use_mlock: bool
n_threads: int
# n_batch: int
last_n_tokens_size: int
max_tokens: int
@ -32,5 +31,5 @@ class ChatParameters(BaseModel):
class Chat(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
created: datetime = Field(default_factory=datetime.now)
owner: str = Field("system")
params: ChatParameters

View File

@ -1,13 +1,8 @@
from os import getenv
from pydantic import BaseSettings
class Settings(BaseSettings):
SERGE_DATABASE_URL: str = getenv("SERGE_DATABASE_URL", "sqlite:////data/db/sql_app.db")
NODE_ENV: str = "development"
SERGE_JWT_SECRET: str = getenv("SERGE_JWT_SECRET", "uF7FGN5uzfGdFiPzR")
SERGE_SESSION_EXPIRY: int = getenv("SERGE_SESSION_EXPIRY", 60)
class Config:
orm_mode = True

View File

@ -1,40 +0,0 @@
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, Uuid
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
Base = declarative_base()
class User(Base):
__tablename__ = "users"
id = Column(Uuid, primary_key=True)
username = Column(String, unique=True, index=True)
email = Column(String)
full_name = Column(String)
theme_light = Column(Boolean)
default_prompt = Column(String)
is_active = Column(Boolean, default=True)
auth = relationship("UserAuth", back_populates="user", lazy="joined")
chats = relationship("Chat", back_populates="user", lazy="joined")
class Chat(Base):
__tablename__ = "chats"
id = Column(Integer, primary_key=True)
chat_id = Column(String, index=True)
owner = Column(String, ForeignKey("users.username"))
user = relationship("User", back_populates="chats")
class UserAuth(Base):
__tablename__ = "auth"
id = Column(Integer, primary_key=True)
secret = Column(String)
auth_type = Column(Integer)
user_id = Column(Uuid, ForeignKey("users.id"))
user = relationship("User", back_populates="auth")

View File

@ -1,108 +0,0 @@
import logging
from datetime import timedelta
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
from jose import JWTError
from serge.crud import get_user
from serge.database import SessionLocal
from serge.schema.user import Token, User
from serge.models.settings import Settings
from serge.utils.security import create_access_token, decode_access_token, verify_password
from sqlalchemy.orm import Session
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
settings = Settings()
auth_router = APIRouter(
prefix="/auth",
tags=["auth"],
)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
def authenticate_user(username: str, password: str, db: Session) -> Optional[User]:
user = get_user(db, username)
if not user:
return None
# Users may have multipe ways to authenticate
auths = [a.auth_type for a in user.auth]
if 0 in auths: # Default user, passwordless
return user
if 1 in auths: # Password auth
secret = [x for x in user.auth if x.auth_type == 1][0].secret
if verify_password(password, secret):
return user
if 2 in auths: # todo future auths
pass
return False
@auth_router.post("/token", response_model=Token)
async def login_for_access_token(
response: Response,
form_data: OAuth2PasswordRequestForm = Depends(),
db: Session = Depends(get_db),
):
user = authenticate_user(form_data.username, form_data.password, db)
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
access_token_expires = timedelta(minutes=settings.SERGE_SESSION_EXPIRY)
access_token = create_access_token(data={"sub": user.username}, expires_delta=access_token_expires)
response.set_cookie(key="token", value=access_token, httponly=True, secure=True, samesite="strict")
return {"access_token": access_token, "token_type": "bearer"}
@auth_router.post("/logout")
async def logout(response: Response):
# Clear the token cookie by setting it to expire immediately
response.delete_cookie(key="token")
return {"message": "Logged out successfully"}
async def get_current_user(token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)) -> User:
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
username = decode_access_token(token)
if username is None:
raise credentials_exception
except JWTError as e:
logging.exception(e)
raise credentials_exception
user = get_user(db, username)
if user is None:
raise credentials_exception
return user
async def get_current_active_user(request: Request, response: Response, db: Session = Depends(get_db)) -> User:
token = request.cookies.get("token")
if not token:
return get_user(db, "system")
u = None
try:
u = await get_current_user(token, db)
except HTTPException:
await logout(response)
u = get_user(db, "system")
return u

View File

@ -1,60 +1,25 @@
import os
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status
from typing import Optional
from fastapi import APIRouter
from langchain.memory import RedisChatMessageHistory
from langchain.schema import AIMessage, HumanMessage, SystemMessage, messages_to_dict
from langchain.schema import SystemMessage, messages_to_dict, AIMessage, HumanMessage
from llama_cpp import Llama
from loguru import logger
from redis import Redis
from serge.crud import create_chat, remove_chat, update_user
from serge.database import SessionLocal
from serge.models.chat import Chat, ChatParameters
from serge.routers.auth import get_current_active_user
from serge.schema.user import Chat as UserChat
from serge.schema.user import User
from serge.utils.stream import get_prompt
from sqlalchemy.orm import Session
from sse_starlette.sse import EventSourceResponse
from serge.models.chat import Chat, ChatParameters
from serge.utils.stream import get_prompt
chat_router = APIRouter(
prefix="/chat",
tags=["chat"],
)
unauth_error = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Unauthorized",
headers={"WWW-Authenticate": "Bearer"},
)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
def _try_get_chat(client, chat_id):
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
chat_raw = client.get(f"chat:{chat_id}")
chat = Chat.parse_raw(chat_raw)
# backwards compat
if not hasattr(chat, "owner"):
chat.owner = "system"
return chat
@chat_router.post("/")
async def create_new_chat(
u: User = Depends(get_current_active_user),
db: Session = Depends(get_db),
model: str = "7B",
temperature: float = 0.1,
top_k: int = 50,
@ -65,12 +30,17 @@ async def create_new_chat(
repeat_last_n: int = 64,
repeat_penalty: float = 1.3,
init_prompt: str = "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
n_threads: int = 4,
):
if not os.path.exists(f"/usr/src/app/weights/{model}.bin"):
raise ValueError(f"Model can't be found: /usr/src/app/weights/{model}.bin")
try:
client = Llama(
model_path="/usr/src/app/weights/" + model + ".bin",
)
del client
except Exception as exc:
raise ValueError(f"Model can't be found: {exc}")
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
params = ChatParameters(
model_path=model,
@ -82,20 +52,15 @@ async def create_new_chat(
n_gpu_layers=gpu_layers,
last_n_tokens_size=repeat_last_n,
repeat_penalty=repeat_penalty,
n_threads=n_threads,
n_threads=len(os.sched_getaffinity(0)),
init_prompt=init_prompt,
)
# create the chat
chat = Chat(owner=u.username, params=params)
chat = Chat(params=params)
# store the parameters
client.set(f"chat:{chat.id}", chat.json())
uc = UserChat(chat_id=chat.id, owner=u.username)
create_chat(db, uc)
u.chats.append(uc)
update_user(db, u)
# create the message history
history = RedisChatMessageHistory(chat.id)
history.append(SystemMessage(content=init_prompt))
@ -107,11 +72,15 @@ async def create_new_chat(
@chat_router.get("/")
async def get_all_chats(u: User = Depends(get_current_active_user)):
async def get_all_chats():
res = []
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
ids = client.smembers("chats")
chats = sorted(
[await get_specific_chat(x.chat_id, u) for x in u.chats],
[await get_specific_chat(id.decode()) for id in ids],
key=lambda x: x["created"],
reverse=True,
)
@ -134,39 +103,47 @@ async def get_all_chats(u: User = Depends(get_current_active_user)):
@chat_router.get("/{chat_id}")
async def get_specific_chat(chat_id: str, u: User = Depends(get_current_active_user)):
async def get_specific_chat(chat_id: str):
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
if chat_id not in [x.chat_id for x in u.chats]:
raise unauth_error
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
chat = _try_get_chat(client, chat_id)
chat_raw = client.get(f"chat:{chat_id}")
chat = Chat.parse_raw(chat_raw)
history = RedisChatMessageHistory(chat.id)
chat_dict = chat.dict()
chat_dict["history"] = messages_to_dict(history.messages)
return chat_dict
@chat_router.get("/{chat_id}/history")
async def get_chat_history(chat_id: str, u: User = Depends(get_current_active_user)):
if chat_id not in [x.chat_id for x in u.chats]:
raise unauth_error
async def get_chat_history(chat_id: str):
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
history = RedisChatMessageHistory(chat_id)
return messages_to_dict(history.messages)
@chat_router.delete("/{chat_id}/prompt")
async def delete_prompt(chat_id: str, idx: int, u: User = Depends(get_current_active_user)):
if chat_id not in [x.chat_id for x in u.chats]:
raise unauth_error
async def delete_prompt(chat_id: str, idx: int):
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
history = RedisChatMessageHistory(chat_id)
if idx >= len(history.messages):
logger.error("Unable to delete message, chat in progress")
raise HTTPException(status_code=202, detail="Unable to delete message, chat in progress")
raise ValueError("Index out of range")
messages = history.messages.copy()[:idx]
history.clear()
@ -178,17 +155,13 @@ async def delete_prompt(chat_id: str, idx: int, u: User = Depends(get_current_ac
@chat_router.delete("/{chat_id}")
async def delete_chat(chat_id: str, u: User = Depends(get_current_active_user), db: Session = Depends(get_db)):
async def delete_chat(chat_id: str):
client = Redis(host="localhost", port=6379, decode_responses=False)
if chat_id not in [x.chat_id for x in u.chats]:
raise unauth_error
logger.info(f"Connected to Redis? {client.ping()}")
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
if cid := next((x for x in u.chats if x.chat_id == chat_id), None):
remove_chat(db, cid)
RedisChatMessageHistory(chat_id).clear()
client.delete(f"chat:{chat_id}")
@ -198,25 +171,28 @@ async def delete_chat(chat_id: str, u: User = Depends(get_current_active_user),
@chat_router.delete("/delete/all")
async def delete_all_chats(u: User = Depends(get_current_active_user), db: Session = Depends(get_db)):
[delete_chat(x.chat_id, u, db) for x in u.chats]
async def delete_all_chats():
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
client.flushdb()
client.flushall()
return True
@chat_router.get("/{chat_id}/question")
async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_current_active_user)):
if chat_id not in [x.chat_id for x in u.chats]:
raise unauth_error
def stream_ask_a_question(chat_id: str, prompt: str):
logger.info("Starting redis client")
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
logger.debug("creating chat")
chat = _try_get_chat(client, chat_id)
chat_raw = client.get(f"chat:{chat_id}")
chat = Chat.parse_raw(chat_raw)
logger.debug(chat.params)
logger.debug("creating history")
@ -232,7 +208,7 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
logger.debug("creating Llama client")
try:
client = Llama(
model_path=f"/usr/src/app/weights/{chat.params.model_path}.bin",
model_path="/usr/src/app/weights/" + chat.params.model_path + ".bin",
n_ctx=len(chat.params.init_prompt) + chat.params.n_ctx,
n_gpu_layers=chat.params.n_gpu_layers,
n_threads=chat.params.n_threads,
@ -262,7 +238,7 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
yield {"event": "message", "data": txt}
except Exception as e:
if type(e) is UnicodeDecodeError:
if type(e) == UnicodeDecodeError:
pass
else:
error = e.__str__()
@ -281,16 +257,16 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
@chat_router.post("/{chat_id}/question")
async def ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_current_active_user)):
if chat_id not in [x.chat_id for x in u.chats]:
raise unauth_error
async def ask_a_question(chat_id: str, prompt: str):
client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {client.ping()}")
if not client.sismember("chats", chat_id):
raise ValueError("Chat does not exist")
chat = _try_get_chat(client, chat_id)
chat_raw = client.get(f"chat:{chat_id}")
chat = Chat.parse_raw(chat_raw)
history = RedisChatMessageHistory(chat.id)
if len(prompt) > 0:
@ -302,7 +278,7 @@ async def ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_curren
try:
client = Llama(
model_path=f"/usr/src/app/weights/{chat.params.model_path}.bin",
model_path="/usr/src/app/weights/" + chat.params.model_path + ".bin",
n_ctx=len(chat.params.init_prompt) + chat.params.n_ctx,
n_threads=chat.params.n_threads,
n_gpu_layers=chat.params.n_gpu_layers,

View File

@ -1,12 +1,14 @@
import asyncio
import os
import shutil
import urllib.request
import requests
import huggingface_hub
import aiohttp
from typing import Annotated
from fastapi import APIRouter, HTTPException
from huggingface_hub import hf_hub_url
from fastapi import APIRouter, HTTPException, Form
from serge.models.models import Families
from serge.utils.convert import convert_one_file
from serge.utils.migrate import migrate
from pathlib import Path
@ -15,8 +17,6 @@ model_router = APIRouter(
tags=["model"],
)
active_downloads = {}
WEIGHTS = "/usr/src/app/weights/"
models_file_path = Path(__file__).parent.parent / "data" / "models.json"
@ -34,212 +34,144 @@ for family in families.__root__:
)
# Helper functions
async def is_model_installed(model_name: str) -> bool:
installed_models = await list_of_installed_models()
return any(file_name == f"{model_name}.bin" and not file_name.startswith(".") for file_name in installed_models)
@model_router.post("/refresh")
async def refresh_models(url: Annotated[str, Form()]):
"""
Refreshes the list of models available for download.
"""
global models_info
r = requests.get(url)
if not r.ok:
raise HTTPException(status_code=500, detail="Could not refresh models using the link provided.")
families = Families.parse_obj(r.json())
models_info = {}
for family in families.__root__:
for model in family.models:
for file in model.files:
models_info[model.name] = (
model.repo,
file.filename,
file.disk_space,
)
return
async def get_file_size(file_path: str) -> int:
return os.stat(file_path).st_size
async def cleanup_model_resources(model_name: str):
model_repo, _, _ = models_info.get(model_name, (None, None, None))
if not model_repo:
print(f"No model repo found for {model_name}, cleanup may be incomplete.")
return
temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
lock_dir = os.path.join(WEIGHTS, ".locks", f"models--{model_repo.replace('/', '--')}")
cache_dir = os.path.join(WEIGHTS, f"models--{model_repo.replace('/', '--')}")
# Try to cleanup temporary file if it exists
if os.path.exists(temp_model_path):
try:
os.remove(temp_model_path)
except OSError as e:
print(f"Error removing temporary file for {model_name}: {e}")
# Remove lock file if it exists
if os.path.exists(lock_dir):
try:
shutil.rmtree(lock_dir)
except OSError as e:
print(f"Error removing lock directory for {model_name}: {e}")
# Remove cache directory if it exists
if os.path.exists(cache_dir):
try:
shutil.rmtree(cache_dir)
except OSError as e:
print(f"Error removing cache directory for {model_name}: {e}")
async def download_file(session: aiohttp.ClientSession, url: str, path: str) -> None:
async with session.get(url) as response:
if response.status != 200:
raise HTTPException(status_code=500, detail="Error downloading model")
# Write response content to file asynchronously
with open(path, "wb") as f:
while True:
chunk = await response.content.read(1024)
if not chunk:
break
f.write(chunk)
# Handlers
@model_router.get("/all")
async def list_of_all_models():
res = []
installed_models = await list_of_installed_models()
resp = []
for model in models_info.keys():
if await is_model_installed(model):
progress = await download_status(model)
if f"{model}.bin" in installed_models:
available = True
# if model exists in WEIGHTS directory remove it from the list
installed_models.remove(f"{model}.bin")
else:
available = False
resp.append(
res.append(
{
"name": model,
"size": models_info[model][2],
"available": available,
"progress": await download_status(model),
"progress": progress,
}
)
# append the rest of the models
for model in installed_models:
resp.append(
# .bin is removed for compatibility with generate.py
res.append(
{
"name": model.replace(".bin", "").lstrip("/"),
"size": await get_file_size(WEIGHTS + model),
"size": os.stat(WEIGHTS + model).st_size,
"available": True,
"progress": 100.0,
"progress": None,
}
)
return resp
return res
@model_router.get("/downloadable")
async def list_of_downloadable_models():
files = os.listdir(WEIGHTS)
files = list(filter(lambda x: x.endswith(".bin"), files))
installed_models = [i.rstrip(".bin") for i in files]
return list(filter(lambda x: x not in installed_models, models_info.keys()))
@model_router.get("/installed")
async def list_of_installed_models():
# Iterate through the WEIGHTS directory and return filenames that end with .bin and do not start with a dot
# after iterating through the WEIGHTS directory, return location and filename
files = [
os.path.join(model_location.replace(WEIGHTS, "").lstrip("/"), bin_file)
for model_location, _, filenames in os.walk(WEIGHTS)
model_location.replace(WEIGHTS, "") + "/" + bin_file
for model_location, directory, filenames in os.walk(WEIGHTS)
for bin_file in filenames
if bin_file.endswith(".bin") and not bin_file.startswith(".")
if os.path.splitext(bin_file)[1] == ".bin"
]
files = [i.lstrip("/") for i in files]
return files
@model_router.post("/{model_name}/download")
async def download_model(model_name: str):
if model_name not in models_info:
def download_model(model_name: str):
models = list(models_info.keys())
if model_name not in models:
raise HTTPException(status_code=404, detail="Model not found")
try:
model_repo, filename, _ = models_info[model_name]
model_url = hf_hub_url(repo_id=model_repo, filename=filename)
temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
model_path = os.path.join(WEIGHTS, f"{model_name}.bin")
if not os.path.exists(WEIGHTS + "tokenizer.model"):
print("Downloading tokenizer...")
url = huggingface_hub.hf_hub_url(
"nsarrazin/alpaca",
"alpaca-7B-ggml/tokenizer.model",
repo_type="model",
revision="main",
)
urllib.request.urlretrieve(url, WEIGHTS + "tokenizer.model")
# Create an aiohttp session with timeout settings
timeout = aiohttp.ClientTimeout(total=None, connect=300, sock_read=300)
async with aiohttp.ClientSession(timeout=timeout) as session:
# Start the download and add to active_downloads
download_task = asyncio.create_task(download_file(session, model_url, temp_model_path))
active_downloads[model_name] = download_task
await download_task
repo_id, filename, _ = models_info[model_name]
# Rename the dotfile to its final name
os.rename(temp_model_path, model_path)
print(f"Downloading {model_name} model from {repo_id}...")
url = huggingface_hub.hf_hub_url(repo_id, filename, repo_type="model", revision="main")
urllib.request.urlretrieve(url, WEIGHTS + f"{model_name}.bin.tmp")
# Remove the entry from active_downloads after successful download
active_downloads.pop(model_name, None)
os.rename(WEIGHTS + f"{model_name}.bin.tmp", WEIGHTS + f"{model_name}.bin")
convert_one_file(WEIGHTS + f"{model_name}.bin", WEIGHTS + "tokenizer.model")
migrate(WEIGHTS + f"{model_name}.bin")
return {"message": f"Model {model_name} downloaded"}
except asyncio.CancelledError:
await cleanup_model_resources(model_name)
raise HTTPException(status_code=200, detail="Download cancelled")
except Exception as exc:
await cleanup_model_resources(model_name)
raise HTTPException(status_code=500, detail=f"Error downloading model: {exc}")
@model_router.post("/{model_name}/download/cancel")
async def cancel_download(model_name: str):
try:
task = active_downloads.get(model_name)
if not task:
raise HTTPException(status_code=404, detail="No active download for this model")
# Remove the entry from active downloads after cancellation
task.cancel()
# Remove entry from active downloads
active_downloads.pop(model_name, None)
# Wait for the task to be cancelled
try:
# Wait for the task to respond to cancellation
print(f"Waiting for download for {model_name} to be cancelled")
await task
except asyncio.CancelledError:
# Handle the expected cancellation exception
pass
# Cleanup resources
await cleanup_model_resources(model_name)
print(f"Download for {model_name} cancelled")
return {"message": f"Download for {model_name} cancelled"}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error cancelling model download: {str(e)}")
return {"message": f"Model {model_name} downloaded"}
@model_router.get("/{model_name}/download/status")
async def download_status(model_name: str):
if model_name not in models_info:
models = list(models_info.keys())
if model_name not in models:
raise HTTPException(status_code=404, detail="Model not found")
filesize = models_info[model_name][2]
model_repo, _, _ = models_info[model_name]
# Construct the path to the blobs directory
temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
model_path = os.path.join(WEIGHTS, f"{model_name}.bin")
bin_path = WEIGHTS + f"{model_name}.bin.tmp"
# Check if the model is currently being downloaded
task = active_downloads.get(model_name)
if os.path.exists(model_path):
currentsize = os.path.getsize(model_path)
progress = min(round(currentsize / filesize * 100, 1), 100)
return progress
elif task and not task.done():
# If the task is still running, check for incomplete files
if os.path.exists(temp_model_path):
currentsize = os.path.getsize(temp_model_path)
return min(round(currentsize / filesize * 100, 1), 100)
# If temp_model_path doesn't exist, the download is likely just starting, progress is 0
return 0
else:
# No active download and the file does not exist
return None
if os.path.exists(bin_path):
currentsize = os.path.getsize(bin_path)
return min(round(currentsize / filesize * 100, 1), 100)
return None
@model_router.delete("/{model_name}")
async def delete_model(model_name: str):
if f"{model_name}.bin" not in await list_of_installed_models():
if model_name + ".bin" not in await list_of_installed_models():
raise HTTPException(status_code=404, detail="Model not found")
try:
os.remove(os.path.join(WEIGHTS, f"{model_name}.bin"))
except OSError as e:
print(f"Error removing model file: {e}")
if os.path.exists(WEIGHTS + f"{model_name}.bin"):
os.remove(WEIGHTS + f"{model_name}.bin")
return {"message": f"Model {model_name} deleted"}
await cleanup_model_resources(model_name)
return {"message": f"Model {model_name} deleted"}
raise HTTPException(status_code=404, detail="Model file not found")

View File

@ -1,63 +0,0 @@
import logging
from fastapi import APIRouter, Depends, HTTPException, status
from serge.crud import create_user, update_user
from serge.database import SessionLocal
from serge.routers.auth import get_current_active_user
from serge.schema import user as user_schema
from sqlalchemy.orm import Session
user_router = APIRouter(
prefix="/user",
tags=["user"],
)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
@user_router.get("/", response_model=user_schema.User)
async def get_user(u: user_schema.User = Depends(get_current_active_user)):
if not u:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
return u.to_public_dict()
@user_router.post("/create", response_model=user_schema.User)
async def create_user_with_pass(ua: user_schema.UserAuth, db: Session = Depends(get_db)):
try:
u = create_user(db, ua)
except Exception as e:
logging.exception(e)
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Failed to create. {e}",
)
if not u:
raise HTTPException(
status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
detail="Could not create user",
)
return u.to_public_dict()
@user_router.put("/", response_model=user_schema.User)
async def self_update_user(
new_data: user_schema.User,
current: user_schema.User = Depends(get_current_active_user),
db: Session = Depends(get_db),
):
current.email = new_data.email
current.full_name = new_data.full_name
current.default_prompt = new_data.default_prompt
update_user(db, current)
return current.to_public_dict()

View File

@ -73,35 +73,16 @@
"Name": {
"type": "string",
"enum": [
"fp16",
"iq1_M",
"iq1_S",
"iq2_M",
"iq2_S",
"iq2_XS",
"iq2_XXS",
"iq3_M",
"iq3_S",
"iq3_XS",
"iq3_XXS",
"iq4_NL",
"iq4_XS",
"q2_K",
"q3_K_L",
"q3_K_M",
"q3_K_S",
"q4_0",
"q4_1",
"q4_K_M",
"q4_K_S",
"q5_0",
"q5_1",
"q5_K_M",
"q5_K_S",
"q6_K",
"q8_0",
"q8_1",
"q8_K"
"q8_0"
],
"title": "Name"
}

View File

@ -1,42 +0,0 @@
import uuid
from pydantic import BaseModel
class UserBase(BaseModel):
username: str
class UserAuth(UserBase):
secret: str
auth_type: int
class Chat(BaseModel):
chat_id: str
owner: str
class User(UserBase):
id: uuid.UUID
is_active: bool = True
email: str = ""
full_name: str = ""
theme_light: bool = False
default_prompt: str = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
auth: list[UserAuth] = []
chats: list[Chat] = []
class Config:
orm_mode = True
def to_public_dict(self):
user_dict = self.dict()
for auth in user_dict["auth"]:
auth["secret"] = "********"
return user_dict
class Token(BaseModel):
access_token: str
token_type: str

View File

@ -0,0 +1,127 @@
# code from @eiz
# Thanks! See the relevant comment here:
# https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818
#!/usr/bin/env python3
import argparse
import glob
import os
import struct
import sys
from sentencepiece import SentencePieceProcessor
from serge.utils.migrate import migrate
HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"]
def parse_args():
parser = argparse.ArgumentParser(description="Upgrade old ggml model files to the current format")
parser.add_argument("dir_model", help="directory containing ggml .bin files")
parser.add_argument("tokenizer_model", help="path to LLaMA tokenizer.model file")
return parser.parse_args()
def read_header(f_in):
struct_fmt = "i" * (3 + len(HPARAMS))
struct_size = struct.calcsize(struct_fmt)
buf = f_in.read(struct_size)
return struct.unpack(struct_fmt, buf)
def write_header(f_out, header):
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header
if magic != 0x67676D6C:
raise Exception("Invalid file magic. Must be an old style ggml file.")
values = [
0x67676D66, # magic: ggml in hex
1, # file version
vocab_size,
dim,
multiple_of,
n_heads,
n_layers,
rot,
ftype,
]
f_out.write(struct.pack("i" * len(values), *values))
def write_tokens(fout, tokenizer):
for i in range(tokenizer.vocab_size()):
if tokenizer.is_unknown(i):
text = " \u2047 ".encode()
elif tokenizer.is_control(i):
text = b""
elif tokenizer.is_byte(i):
piece = tokenizer.id_to_piece(i)
if len(piece) != 6:
print(f"Invalid token: {piece}")
sys.exit(1)
byte_value = int(piece[3:-1], 16)
text = struct.pack("B", byte_value)
else:
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
fout.write(struct.pack("i", len(text)))
fout.write(text)
fout.write(struct.pack("f", tokenizer.get_score(i)))
def read_tokens(f_in, tokenizer):
for i in range(tokenizer.vocab_size()):
len_b = f_in.read(4)
(length,) = struct.unpack("i", len_b)
f_in.read(length)
def copy_all_data(f_out, f_in):
while True:
buf = f_in.read(1024 * 1024)
if not buf:
break
f_out.write(buf)
def convert_one_file(path_in, tokenizer):
path_tmp = f"{path_in}.tmp"
print(f"converting {path_in}")
try:
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out:
write_header(f_out, read_header(f_in))
read_tokens(f_in, tokenizer)
write_tokens(f_out, tokenizer)
copy_all_data(f_out, f_in)
except Exception:
print(f"File {path_in} already converted")
else:
os.rename(path_in, path_in + ".old")
os.rename(path_tmp, path_in)
try:
os.remove(path_tmp)
except OSError:
pass
def convert_all(dir_model: str, tokenizer_model: str):
files = []
files.extend(glob.glob(f"{dir_model}/*.bin"))
files.extend(glob.glob(f"{dir_model}/*.bin"))
try:
tokenizer = SentencePieceProcessor(tokenizer_model)
for file in files:
print("Converting file: ", file)
convert_one_file(file, tokenizer)
migrate(file)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
args = parse_args()
convert_all(args.dir_model, args.tokenizer_model)

245
api/src/serge/utils/llm.py Normal file
View File

@ -0,0 +1,245 @@
"""Wrapper around llama.cpp."""
from typing import Any
from langchain.llms.base import LLM
from pydantic import Extra, Field, root_validator
class LlamaCpp(LLM):
"""Wrapper around the llama.cpp model.
To use, you should have the llama-cpp-python library installed, and provide the
path to the Llama model as a named parameter to the constructor.
Check out: https://github.com/abetlen/llama-cpp-python
Example:
.. code-block:: python
from langchain.llms import LlamaCppEmbeddings
llm = LlamaCppEmbeddings(model_path="/path/to/llama/model")
"""
client: Any #: :meta private:
model_path: str
"""The path to the Llama model file."""
n_ctx: int = Field(2048, alias="n_ctx")
"""Token context window."""
n_gpu_layers: int = Field(0, alias="n_gpu_layers")
"""The number of layers to put on the GPU. The rest will be on the CPU."""
n_parts: int = Field(-1, alias="n_parts")
"""Number of parts to split the model into.
If -1, the number of parts is automatically determined."""
seed: int = Field(-1, alias="seed")
"""Seed. If -1, a random seed is used."""
f16_kv: bool = Field(False, alias="f16_kv")
"""Use half-precision for key/value cache."""
logits_all: bool = Field(False, alias="logits_all")
"""Return logits for all tokens, not just the last token."""
vocab_only: bool = Field(False, alias="vocab_only")
"""Only load the vocabulary, no weights."""
use_mlock: bool = Field(False, alias="use_mlock")
"""Force system to keep model in RAM."""
n_threads: int | None = Field(None, alias="n_threads")
"""Number of threads to use.
If None, the number of threads is automatically determined."""
n_batch: int | None = Field(8, alias="n_batch")
"""Number of tokens to process in parallel.
Should be a number between 1 and n_ctx."""
max_tokens: int | None = 2048
"""The maximum number of tokens to generate."""
temperature: float | None = 0.8
"""The temperature to use for sampling."""
top_p: float | None = 0.95
"""The top-p value to use for sampling."""
logprobs: int | None = Field(None)
"""The number of logprobs to return. If None, no logprobs are returned."""
echo: bool | None = False
"""Whether to echo the prompt."""
stop_sequences: list[str] | None = []
"""A list of strings to stop generation when encountered."""
repeat_penalty: float | None = 1.1
"""The penalty to apply to repeated tokens."""
top_k: int | None = 40
"""The top-k value to use for sampling."""
last_n_tokens_size: int | None = 64
"""The number of tokens to look back when applying the repeat_penalty."""
streaming: bool = False
class Config:
extra = Extra.ignore
@root_validator()
def validate_environment(cls, values: dict) -> dict:
"""Validate that llama-cpp-python library is installed."""
model_path = values["model_path"]
try:
pass
except ImportError:
raise ModuleNotFoundError(
"Could not import llama-cpp-python library. "
"Please install the llama-cpp-python library to "
"use this embedding model: pip install llama-cpp-python"
)
except Exception:
raise NameError(f"Could not load Llama model from path: {model_path}")
return values
@property
def _default_params(self) -> dict[str, Any]:
"""Get the default parameters for calling llama_cpp."""
return {
"max_tokens": self.max_tokens,
"temperature": self.temperature,
"top_p": self.top_p,
"logprobs": self.logprobs,
"echo": self.echo,
"stop_sequences": self.stop_sequences,
"repeat_penalty": self.repeat_penalty,
"top_k": self.top_k,
"n_ctx": self.n_ctx,
"n_gpu_layers": self.n_gpu_layers,
"n_parts": self.n_parts,
"seed": self.seed,
"f16_kv": self.f16_kv,
"logits_all": self.logits_all,
"vocab_only": self.vocab_only,
"use_mlock": self.use_mlock,
"n_batch": self.n_batch,
"last_n_tokens_size": self.last_n_tokens_size,
"streaming": self.streaming,
}
@property
def _identifying_params(self) -> dict[str, Any]:
"""Get the identifying parameters."""
return {**{"model_path": self.model_path}, **self._default_params}
@property
def _llm_type(self) -> str:
"""Return type of llm."""
return "llama.cpp"
def _call(self, prompt: str, stop: list[str] | None = None) -> str:
"""Call the Llama model and return the output.
Args:
prompt: The prompt to use for generation.
stop: A list of strings to stop generation when encountered.
Returns:
The generated text.
Example:
.. code-block:: python
from langchain.llms import LlamaCppEmbeddings
llm = LlamaCppEmbeddings(model_path="/path/to/local/llama/model.bin")
llm("This is a prompt.")
"""
from llama_cpp import Llama
params = self._identifying_params
client = Llama(
model_path="/usr/src/app/weights/" + self.model_path + ".bin",
n_ctx=self.n_ctx,
n_gpu_layers=self.n_gpu_layers,
n_parts=self.n_parts,
seed=self.seed,
f16_kv=self.f16_kv,
logits_all=self.logits_all,
vocab_only=self.vocab_only,
use_mlock=self.use_mlock,
n_threads=self.n_threads,
n_batch=self.n_batch,
last_n_tokens_size=self.last_n_tokens_size,
)
if self.stop_sequences and stop is not None:
raise ValueError("`stop_sequences` found in both the input and default params.")
elif self.stop_sequences:
params["stop_sequences"] = self.stop_sequences
else:
params["stop_sequences"] = []
if self.streaming:
response = ""
stream = client(
prompt=prompt,
max_tokens=params["max_tokens"],
temperature=params["temperature"],
top_p=params["top_p"],
logprobs=params["logprobs"],
echo=params["echo"],
stop=params["stop_sequences"],
repeat_penalty=params["repeat_penalty"],
top_k=params["top_k"],
stream=True,
)
for stream_resp in stream:
try:
token = stream_resp["choices"][0]["text"]
except BaseException:
token = ""
response += token
self.callback_manager.on_llm_new_token(token, verbose=self.verbose)
return response
else:
"""Call the Llama model and return the output."""
output = client(
prompt=prompt,
max_tokens=params["max_tokens"],
temperature=params["temperature"],
top_p=params["top_p"],
logprobs=params["logprobs"],
echo=params["echo"],
stop=params["stop_sequences"],
repeat_penalty=params["repeat_penalty"],
top_k=params["top_k"],
)
text = output["choices"][0]["text"]
return text
if __name__ == "__main__":
from langchain.callbacks.base import CallbackManager
from serge.utils.stream import ChainRedisHandler
llm = LlamaCpp(
streaming=True,
model_path="gpt4all",
callback_manager=CallbackManager([ChainRedisHandler("1")]),
verbose=True,
temperature=0.1,
max_tokens=128,
)
input()
resp = llm("Write a paragraph about France please.")

View File

@ -0,0 +1,308 @@
# Migrate ggml file(s) with ggmf magic to ggml file with ggjt magic
#
# We caused a breaking change to the file format on 2023-03-30 in:
# https://github.com/ggerganov/llama.cpp/pull/613
#
# (1) If you still have the Meta LLaMA .pth files, then close this
# file now; you can just run `convert-pth-to-ggml.py` again to
# migrate to the new format. The tool is easier to use too. It
# isn't necessary anymore to manage split output files because
# the new format always combines things into a single file.
#
# (2) If you deleted the Meta LLaMA .pth files due to save on disk
# space, then this tool is intended to help you. Please check
# out the instructions below.
#
# USAGE
#
# python migrate-ggml-2023-03-30-pr613.py INPUT OUTPUT
#
# PREREQUISITES
#
# pip install numpy
# cd llama.cpp
# make -j4
#
# EXAMPLE (7B MODEL)
#
# # you can replace all the 'f16' with 'q4_0' if you're using quantized weights
# python migrate-ggml-2023-03-30-pr613.py models/7B/ggml-model-f16.bin models/7B/ggml-model-f16-ggjt.bin
#
# # check that it works
# ./main -m models/7B/ggml-model-f16-ggjt.bin -p 'Question: Do you love me?'
#
# # you can delete the old files
# rm -f models/7B/ggml-model-f16.bin
# mv models/7B/ggml-model-f16-ggjt.bin models/7B/ggml-model-f16.bin
#
# EXAMPLE (13B MODEL)
#
# # you can replace all the 'f16' with 'q4_0' if you're using quantized weights
# python migrate-ggml-2023-03-30-pr613.py models/13B/ggml-model-f16.bin models/13B/ggml-model-f16-ggjt.bin
#
# # check that it works
# ./main -m models/13B/ggml-model-f16-ggjt.bin -p 'Question: Do you love me?'
#
# # you can delete the old files
# rm -f models/13B/ggml-model-f16.bin*
# mv models/13B/ggml-model-f16-ggjt.bin models/13B/ggml-model-f16.bin
#
import os
import struct
QK = 32
GGML_TYPE_Q4_0 = 0
GGML_TYPE_Q4_1 = 1
GGML_TYPE_I8 = 2
GGML_TYPE_I16 = 3
GGML_TYPE_I32 = 4
GGML_TYPE_F16 = 5
GGML_TYPE_F32 = 6
WTYPE_NAMES = {
0: "F32",
1: "F16",
2: "Q4_0",
3: "Q4_1",
}
WTYPES = {
0: GGML_TYPE_F32,
1: GGML_TYPE_F16,
2: GGML_TYPE_Q4_0,
3: GGML_TYPE_Q4_1,
}
GGML_BLCK_SIZE = {
GGML_TYPE_Q4_0: QK,
GGML_TYPE_Q4_1: QK,
GGML_TYPE_I8: 1,
GGML_TYPE_I16: 1,
GGML_TYPE_I32: 1,
GGML_TYPE_F16: 1,
GGML_TYPE_F32: 1,
}
GGML_TYPE_SIZE = {
GGML_TYPE_Q4_0: 4 + QK // 2,
GGML_TYPE_Q4_1: 4 * 2 + QK // 2,
GGML_TYPE_I8: 1,
GGML_TYPE_I16: 2,
GGML_TYPE_I32: 4,
GGML_TYPE_F16: 2,
GGML_TYPE_F32: 4,
}
HPARAMS = [
"magic", # int32
"version", # int32
"n_vocab", # int32
"n_embd", # int32
"n_mult", # int32
"n_head", # int32
"n_layer", # int32
"n_rot", # int32
"f16", # int32
]
def read_hparams(fin):
struct_fmt = "i" * len(HPARAMS)
struct_size = struct.calcsize(struct_fmt)
buf = fin.read(struct_size)
ints = struct.unpack(struct_fmt, buf)
hparams = dict(zip(HPARAMS, ints))
return hparams
def write_hparams(fout, hparams):
struct_fmt = "i" * len(HPARAMS)
struct.calcsize(struct_fmt)
ints = [hparams[h] for h in HPARAMS]
fout.write(struct.pack(struct_fmt, *ints))
def read_tokens(fin, hparams):
tokens = []
for i in range(hparams["n_vocab"]):
len_b = fin.read(4)
(length,) = struct.unpack("i", len_b)
word = fin.read(length)
score_b = fin.read(4)
(score,) = struct.unpack("f", score_b)
tokens.append((word, score))
return tokens
def write_tokens(fout, tokens):
for word, score in tokens:
fout.write(struct.pack("i", len(word)))
fout.write(word)
fout.write(struct.pack("f", score))
def ggml_nelements(shape):
r = 1
for i in shape:
r *= i
return r
def ggml_nbytes(shape, ftype):
x = ggml_nelements(shape)
t = WTYPES[ftype]
x *= GGML_TYPE_SIZE[t]
x //= GGML_BLCK_SIZE[t]
return x
def copy_tensors(fin, fout, part_id, n_parts):
while True:
b = fin.read(4)
if not b:
break
(n_dims,) = struct.unpack("i", b)
b = fin.read(4)
(length,) = struct.unpack("i", b)
b = fin.read(4)
(ftype,) = struct.unpack("i", b)
assert n_dims in (1, 2)
partshape = list(range(n_dims))
for i in range(n_dims):
b = fin.read(4)
partshape[i] = struct.unpack("i", b)[0]
partshape = list(reversed(partshape))
name = fin.read(length)
data = fin.read(ggml_nbytes(partshape, ftype))
blck_size = GGML_BLCK_SIZE[WTYPES[ftype]]
type_size = GGML_TYPE_SIZE[WTYPES[ftype]]
print(f"Processing tensor {name} with shape: {partshape} and type: {WTYPE_NAMES[ftype]}")
# determine dimension along which multipart tensor is sharded
#
# split_dim 0 regex:
# - output.*
# - layers.*.attention.wq.weight
# - layers.*.attention.wk.weight
# - layers.*.attention.wv.weight
# - layers.*.feed_forward.w1.weight
# - layers.*.feed_forward.w3.weight
#
# split_dim 1 regex:
# - tok_embeddings.*
# - layers.*.attention.wo.weight
# - layers.*.feed_forward.w2.weight
#
if n_dims > 1:
split_dim = 1
if b"tok_embeddings" in name:
split_dim = 1
elif b"layers" in name:
if b"attention.wo.weight" in name:
split_dim = 1
elif b"feed_forward.w2.weight" in name:
split_dim = 1
else:
split_dim = 0
elif b"output" in name:
split_dim = 0
# output tensor header
fullshape = list(partshape)
if n_dims > 1:
fullshape[split_dim] *= n_parts
fout.write(struct.pack("iii", n_dims, len(name), ftype))
for dim in reversed(fullshape):
fout.write(struct.pack("i", dim))
fout.write(name)
# ensure tensor data is aligned
tensor_data_offset = fout.tell()
while tensor_data_offset % QK != 0:
fout.write(struct.pack("B", 0))
tensor_data_offset += 1
# output unified mappable tensor data
if n_dims == 1 or n_parts == 1:
# copy tensor which we thankfully received in one piece
if part_id == 0:
fout.write(data)
elif split_dim == 0:
# reassemble multifile tensor containing some of the rows
rows_per_chunk = partshape[0]
current_row = part_id * rows_per_chunk
bytes_per_row = fullshape[1] // blck_size * type_size
offset = current_row * bytes_per_row
fout.seek(tensor_data_offset + offset)
fout.write(data)
elif split_dim == 1:
# reassemble multifile tensor containing some of the cols
cols_per_chunk = partshape[1]
current_col = part_id * cols_per_chunk
bpr = partshape[1] // blck_size * type_size
bytes_per_row = fullshape[1] // blck_size * type_size
offset_current_col = current_col // blck_size * type_size
for row in range(partshape[0]):
offset_row = row * bytes_per_row
offset = offset_row + offset_current_col
fout.seek(tensor_data_offset + offset)
fout.write(data[row * bpr : row * bpr + bpr])
# advance file position to next tensor
fout.seek(tensor_data_offset + ggml_nbytes(fullshape, ftype))
def migrate(fin_path):
assert fin_path
assert os.path.exists(fin_path)
with open(fin_path, "rb") as fin:
hparams = read_hparams(fin)
tokens = read_tokens(fin, hparams)
if hparams["magic"] == 0x67676A74: # ggjt
print(f"{fin_path}: input ggml has already been converted to 'ggjt' magic\n")
return
if hparams["magic"] != 0x67676D66: # ggmf
print(f"{fin_path}: input ggml file doesn't have expected 'ggmf' magic: {hparams['magic']:#x}\n")
return
hparams["magic"] = 0x67676A74 # ggjt
# count number of multipart files by convention
n_parts = 1
while True:
if os.path.exists("%s.%d" % (fin_path, n_parts)):
n_parts += 1
else:
break
# we output a single file for ggml
with open(fin_path + ".migrated", "wb") as fout:
write_hparams(fout, hparams)
write_tokens(fout, tokens)
offset_of_tensors = fout.tell()
# the tensors we load could be split across multiple files
for part_id in range(n_parts):
fout.seek(offset_of_tensors)
print(f"Processing part {part_id+1} of {n_parts}\n")
fin_path = fin_path
if part_id > 0:
fin_path += ".%d" % (part_id)
with open(fin_path, "rb") as fin:
read_tokens(fin, read_hparams(fin))
copy_tensors(fin, fout, part_id, n_parts)
os.remove(fin_path)
os.rename(fin_path + ".migrated", fin_path)
print(f"Done. Output file: {fin_path+'.migrated'}\n")

View File

@ -1,56 +0,0 @@
import base64
import hashlib
import os
from datetime import datetime, timedelta
from typing import Optional
from fastapi import HTTPException, status
from jose import JWTError, jwt
from serge.models.settings import Settings
ALGORITHM = "HS256"
settings = Settings()
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
def verify_password(plain_password: str, hashed_password: str) -> bool:
salt_and_hash = base64.b64decode(hashed_password.encode("utf-8"))
salt = salt_and_hash[:16]
stored_password = salt_and_hash[16:]
new_hashed_password = hashlib.scrypt(plain_password.encode("utf-8"), salt=salt, n=8192, r=8, p=1, dklen=64)
return new_hashed_password == stored_password
def get_password_hash(password: str) -> str:
salt = os.urandom(16)
hashed_password = hashlib.scrypt(password.encode("utf-8"), salt=salt, n=8192, r=8, p=1, dklen=64)
salt_and_hash = salt + hashed_password
return base64.b64encode(salt_and_hash).decode("utf-8")
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=settings.SERGE_SESSION_EXPIRY)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, settings.SERGE_JWT_SECRET, algorithm=ALGORITHM)
return encoded_jwt
def decode_access_token(token: str):
try:
payload = jwt.decode(token, settings.SERGE_JWT_SECRET, algorithms=[ALGORITHM])
username: str = payload.get("sub")
if username is None:
raise credentials_exception
return username
except JWTError:
raise credentials_exception

View File

@ -1,8 +1,53 @@
import re
from typing import Any
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.memory import RedisChatMessageHistory
from langchain.schema import LLMResult
from loguru import logger
from redis import Redis
# Not used yet. WIP
class ChainRedisHandler(StreamingStdOutCallbackHandler):
"""Callback handler for streaming. Only works with LLMs that support streaming."""
def __init__(self, id: str):
logger.debug(f"Setting up ChainRedisHandler with id {id}")
super().__init__()
self.id = id
self.client = Redis(host="localhost", port=6379, decode_responses=False)
logger.info(f"Connected to Redis? {self.client.ping()}")
logger.info(f"Stream key : {self.stream_key}")
@property
def stream_key(self):
return "stream:" + self.id
def on_llm_start(self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any) -> None:
super().on_llm_start(serialized, prompts, **kwargs)
logger.info("starting")
self.client.set(self.stream_key, "")
"""Run when LLM starts running."""
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
super().on_llm_start(token, **kwargs)
logger.info(token)
self.client.append(self.stream_key, token)
"""Run on new LLM token. Only available when streaming is enabled."""
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
super().on_llm_end(response, **kwargs)
self.client.set(self.stream_key, "")
"""Run when LLM ends running."""
def on_llm_error(self, error: Exception | KeyboardInterrupt, **kwargs: Any) -> None:
super().on_llm_error(error, **kwargs)
self.client.set(self.stream_key, str(error))
"""Run when LLM errors."""
def get_prompt(history: RedisChatMessageHistory, params):
@ -51,7 +96,7 @@ def get_prompt(history: RedisChatMessageHistory, params):
else:
stop = True
if len(next_prompt) > 0:
prompts.append(f"{instruction + next_prompt}\n")
prompts.append(instruction + next_prompt + "\n")
if stop:
break
@ -60,6 +105,6 @@ def get_prompt(history: RedisChatMessageHistory, params):
for next_prompt in prompts:
message_prompt += next_prompt
final_prompt = f"{params.init_prompt}\n{message_prompt[:params.n_ctx]}"
final_prompt = params.init_prompt + "\n" + message_prompt[: params.n_ctx]
logger.debug(final_prompt)
return final_prompt

View File

@ -1,36 +1,26 @@
import json
from pathlib import Path
import requests
from huggingface_hub import hf_hub_url
import huggingface_hub
import pytest
import requests
def load_model_data(file_path):
with open(file_path, "r") as models_file:
return json.load(models_file)
def flatten_model_data(families):
for family in families:
for model in family["models"]:
for file in model["files"]:
yield model["repo"], file["filename"]
def check_model_availability(repo, filename):
url = hf_hub_url(repo, filename, repo_type="model", revision="main")
response = requests.head(url)
if response.ok:
return True
else:
return False
# this test file specifically doesn't start with test_* so it's not picked up by pytest
test_dir = Path(__file__).parent
model_data = load_model_data(test_dir.parent / "src/serge/data/models.json")
checks = list(flatten_model_data(model_data))
with open(test_dir.parent / "src/serge/data/models.json", "r") as models_file:
families = json.load(models_file)
# generate list of checks
checks = []
for family in families:
for model in family["models"]:
for file in model["files"]:
checks.append((model["repo"], file["filename"]))
@pytest.mark.parametrize("repo,filename", checks)
def test_model_available(repo, filename):
assert check_model_availability(repo, filename), f"Model {repo}/{filename} not available"
url = huggingface_hub.hf_hub_url(repo, filename, repo_type="model", revision="main")
r = requests.head(url)
assert r.ok, f"Model {repo}/{filename} not available"

View File

@ -9,7 +9,7 @@ image:
image:
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "main"
tag: ""
imagePullSecrets: []
nameOverride: ""

View File

@ -1,19 +1,19 @@
services:
serge:
restart: on-failure
build:
build:
context: .
dockerfile: Dockerfile.dev
volumes:
- ./web:/usr/src/app/web/:z
- ./api:/usr/src/app/api/:z
- ./web:/usr/src/app/web/
- ./api:/usr/src/app/api/
- datadb:/data/db
- weights:/usr/src/app/weights/
- /etc/localtime:/etc/localtime:ro
ports:
- 8008:8008
- 9124:9124
- 5678:5678
- "8008:8008"
- "9124:9124"
volumes:
datadb:
weights:

42
scripts/deploy.sh Executable file → Normal file
View File

@ -1,36 +1,6 @@
#!/bin/bash
set -x
source serge.env
# Get CPU Architecture
cpu_arch=$(uname -m)
# Function to detect CPU features
detect_cpu_features() {
cpu_info=$(lscpu)
if echo "$cpu_info" | grep -q "avx512"; then
echo "AVX512"
elif echo "$cpu_info" | grep -q "avx2"; then
echo "AVX2"
elif echo "$cpu_info" | grep -q "avx"; then
echo "AVX"
else
echo "basic"
fi
}
# Check if the CPU architecture is aarch64/arm64
if [ "$cpu_arch" = "aarch64" ] || [ "$cpu_arch" = "arm64" ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
else
# Use @smartappli provided wheels
#cpu_feature=$(detect_cpu_features)
#pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu-$cpu_feature/"
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
fi
echo "Recommended install command for llama-cpp-python: $pip_command"
# Handle termination signals
_term() {
@ -40,7 +10,7 @@ _term() {
}
# Install python bindings
eval "$pip_command" || {
pip install llama-cpp-python==0.1.78 || {
echo 'Failed to install llama-cpp-python'
exit 1
}
@ -51,18 +21,10 @@ redis_process=$!
# Start the API
cd /usr/src/app/api || exit 1
hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008"
if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
hypercorn_cmd="hypercorn src.serge.main:app --bind [::]:8008"
elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008 --bind [::]:8008"
fi
$hypercorn_cmd || {
uvicorn src.serge.main:app --host 0.0.0.0 --port 8008 || {
echo 'Failed to start main app'
exit 1
} &
serge_process=$!
# Set up a signal trap and wait for processes to finish

49
scripts/dev.sh Executable file → Normal file
View File

@ -1,42 +1,6 @@
#!/bin/bash
set -x
source serge.env
# Get CPU Architecture
cpu_arch=$(uname -m)
# Function to detect CPU features
detect_cpu_features() {
cpu_info=$(lscpu)
if echo "$cpu_info" | grep -q "avx512"; then
echo "AVX512"
elif echo "$cpu_info" | grep -q "avx2"; then
echo "AVX2"
elif echo "$cpu_info" | grep -q "avx"; then
echo "AVX"
else
echo "basic"
fi
}
# Check if the CPU architecture is aarch64/arm64
if [ "$cpu_arch" = "aarch64" ] || [ "$cpu_arch" = "arm64" ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
else
# Use @smartappli provided wheels
#cpu_feature=$(detect_cpu_features)
#pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu-$cpu_feature/"
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
fi
echo "Recommended install command for llama-cpp-python: $pip_command"
# Install python vendor dependencies
pip install -r /usr/src/app/requirements.txt || {
echo 'Failed to install python dependencies from requirements.txt'
exit 1
}
# Install python dependencies
pip install -e ./api || {
@ -45,7 +9,7 @@ pip install -e ./api || {
}
# Install python bindings
eval "$pip_command" || {
pip install llama-cpp-python==0.1.78 || {
echo 'Failed to install llama-cpp-python'
exit 1
}
@ -57,18 +21,9 @@ redis-server /etc/redis/redis.conf &
cd /usr/src/app/web || exit 1
npm run dev -- --host 0.0.0.0 --port 8008 &
python -m pip install debugpy -t /tmp
# Start the API
cd /usr/src/app/api || exit 1
hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124"
if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind [::]:9124"
elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124 --bind [::]:9124"
fi
$hypercorn_cmd || {
uvicorn src.serge.main:api_app --reload --host 0.0.0.0 --port 9124 --root-path /api/ || {
echo 'Failed to start main app'
exit 1
}

View File

@ -1,3 +0,0 @@
LLAMA_PYTHON_VERSION=0.2.87
SERGE_ENABLE_IPV4=true
SERGE_ENABLE_IPV6=false

View File

@ -1,3 +0,0 @@
typing-extensions>=4.12.2
numpy>=1.26.0,<2.0.0
diskcache>=5.6.3

2941
web/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -12,41 +12,39 @@
"format": "prettier --write ."
},
"devDependencies": {
"@sveltejs/adapter-auto": "^3.2.2",
"@sveltejs/adapter-node": "^5.2.0",
"@sveltejs/adapter-static": "^3.0.2",
"@sveltejs/kit": "^2.5.20",
"@sveltejs/vite-plugin-svelte": "^3.1.1",
"@types/markdown-it": "^14.1.2",
"@typescript-eslint/eslint-plugin": "^7.17.0",
"@typescript-eslint/parser": "^7.18.0",
"autoprefixer": "^10.4.20",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-prettier": "^5.2.1",
"eslint-plugin-svelte": "^2.43.0",
"eslint-plugin-vue": "^9.27.0",
"postcss": "^8.4.40",
"prettier": "3.3.3",
"prettier-plugin-svelte": "^3.2.6",
"svelte": "^4.2.18",
"svelte-check": "^3.8.5",
"tailwindcss": "^3.4.7",
"tslib": "^2.6.3",
"typescript": "^5.5.4",
"vite": "^5.4.1"
"@sveltejs/adapter-auto": "^2.1.0",
"@sveltejs/adapter-node": "^1.3.1",
"@sveltejs/adapter-static": "^2.0.3",
"@sveltejs/kit": "^1.25.0",
"@types/markdown-it": "^13.0.1",
"@typescript-eslint/eslint-plugin": "^6.7.2",
"@typescript-eslint/parser": "^6.7.2",
"autoprefixer": "^10.4.15",
"eslint": "^8.49.0",
"eslint-config-prettier": "^9.0.0",
"eslint-plugin-import": "^2.28.1",
"eslint-plugin-prettier": "^5.0.0",
"eslint-plugin-svelte": "^2.33.1",
"eslint-plugin-vue": "^9.17.0",
"postcss": "^8.4.30",
"prettier": "3.0.3",
"svelte": "^4.2.0",
"svelte-check": "^3.5.1",
"tailwindcss": "^3.3.3",
"tslib": "^2.6.2",
"typescript": "^5.2.2",
"vite": "^4.4.9"
},
"type": "module",
"dependencies": {
"@iconify/svelte": "^4.0.2",
"@iconify/svelte": "^3.1.4",
"@sveltestack/svelte-query": "^1.6.0",
"clipboard": "^2.0.11",
"daisyui": "^4.12.10",
"highlight.js": "^11.10.0",
"ioredis": "^5.4.1",
"markdown-it": "^14.1.0",
"markdown-it-highlightjs": "^4.1.0",
"prettier-plugin-tailwindcss": "^0.6.5"
"daisyui": "^3.7.5",
"highlight.js": "^11.8.0",
"markdown-it": "^13.0.1",
"markdown-it-highlightjs": "^4.0.1",
"prettier-plugin-svelte": "^3.0.3",
"prettier-plugin-tailwindcss": "^0.5.4"
}
}

View File

@ -18,7 +18,7 @@
width: auto;
}
markdown .hljs {
markdown. .hljs {
background: hsl(var(--b3)) !important;
}
@ -85,93 +85,3 @@ markdown .hljs {
.ie-edge-no-scrollbar {
-ms-overflow-style: none;
}
/* Models Grid Layout */
.models-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
gap: 25px;
padding-left: 80px;
padding-right: 40px;
padding-top: 40px;
padding-bottom: 10px;
}
/* Model Accordion Styles */
.model-accordion {
border-radius: 8px;
box-shadow: 0 5px 8px rgba(0, 0, 0, 0.1);
overflow: hidden;
background-color: "bg-base-200";
}
.model-accordion button {
width: 100%;
padding: 15px;
text-align: left;
border: none;
outline: none;
transition: background-color 0.3s ease;
cursor: pointer;
}
.model-details {
padding: 10px;
border-top: 1px solid #ddd;
}
.model-details p {
margin: 10px 0;
}
.top-section {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
margin-bottom: 20px;
padding-top: 10px;
}
.search-row {
position: fixed;
top: 5px;
left: 0;
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
padding-left: 80px;
padding-right: 40px;
padding-bottom: 0px;
}
main {
max-width: 600px;
margin: 0 auto;
padding: 1rem;
}
form {
display: flex;
flex-direction: column;
}
div {
margin-bottom: 1rem;
}
label {
display: block;
margin-bottom: 0.5rem;
}
input {
width: 100%;
padding: 0.5rem;
box-sizing: border-box;
}
button {
padding: 0.5rem 1rem;
}

View File

@ -0,0 +1,57 @@
<script lang="ts">
import { invalidate, invalidateAll } from "$app/navigation";
let dialogTag: HTMLDialogElement;
let isLoading = false;
let link =
"https://raw.githubusercontent.com/serge-chat/serge/main/api/src/serge/data/models.json";
const handleRefresh = async (e: Event) => {
isLoading = true;
const r = await fetch("/api/model/refresh", {
method: "POST",
body: new FormData(e.target as HTMLFormElement),
});
if (r.ok) {
await invalidate("/api/model/all");
dialogTag.close();
} else {
console.error("Error refreshing models");
}
isLoading = false;
};
</script>
<button class="btn-outline btn" on:click={() => dialogTag.showModal()}
>Refresh Models</button
>
<dialog bind:this={dialogTag} class="modal">
<form method="dialog" class="modal-box">
<button class="btn-ghost btn-sm btn-circle btn absolute right-2 top-2"
>✕</button
>
<form on:submit|preventDefault={handleRefresh}>
<h3 class="text-lg font-bold">Model refresh</h3>
<p class="py-4">
Enter the URL of the JSON file containing the models below
</p>
<input
type="text"
name="url"
class="input-bordered input-primary input mb-4 w-full"
bind:value={link}
/>
<div class="modal-action">
<!-- if there is a button in form, it will close the modal -->
<button type="submit" class="btn" disabled={isLoading}>
{#if isLoading}
<span class="loading loading-spinner" />
{/if}
Refresh
</button>
</div>
</form>
</form>
</dialog>

View File

@ -2,6 +2,8 @@ import { writable, type Writable } from "svelte/store";
const themeStore = writable("dark");
const barVisible = writable(true);
const newChat: Writable<object | null> = writable(null);
export { newChat, themeStore };
export { barVisible, newChat, themeStore };

View File

@ -1,52 +1,30 @@
<script lang="ts">
import "../app.css";
import type { PageData } from "./$types";
import type { LayoutData } from "./$types";
import { invalidate, goto } from "$app/navigation";
import { onMount, onDestroy } from "svelte";
import { page } from "$app/stores";
import { newChat, themeStore } from "$lib/stores.js";
import { barVisible, newChat, themeStore } from "$lib/stores.js";
import { fly } from "svelte/transition";
export let data: PageData;
export let data: LayoutData;
export let isSidebarOpen: boolean = true;
let models;
let modelAvailable: boolean;
const isLoading = false;
let deleteConfirm = false;
let deleteAllConfirm = false;
let theme: string;
let bar_visible: boolean;
let dataCht: Response | any = null;
const unsubscribe = newChat.subscribe((value) => (dataCht = value));
function toggleSidebar(): void {
isSidebarOpen = !isSidebarOpen;
}
function hideSidebar(): void {
isSidebarOpen = false;
}
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
const unsubscribe1 = newChat.subscribe((value) => (dataCht = value));
onMount(() => {
bar_visible = window.innerWidth > 768;
barVisible.set(bar_visible);
theme = localStorage.getItem("data-theme") || "dark";
document.documentElement.setAttribute("data-theme", theme);
});
$: if (data && data.models) {
models = data.models.filter((el) => el.available);
modelAvailable = models.length > 0;
} else {
models = [];
modelAvailable = false;
}
$: id = $page.params.id || "";
async function goToHome() {
await goto("/");
}
async function deleteChat(chatID: string) {
const response = await fetch("/api/chat/" + chatID, { method: "DELETE" });
if (response.status === 200) {
@ -115,147 +93,159 @@
localStorage.setItem("data-theme", $themeStore);
}
function toggleBar() {
bar_visible = !bar_visible;
barVisible.set(bar_visible);
}
async function createSameSession() {
if (dataCht) {
const newData = await fetch(
`/api/chat/?model=${dataCht.params.model_path}&temperature=${dataCht.params.temperature}&top_k=${dataCht.params.top_k}` +
`&top_p=${dataCht.params.top_p}&max_length=${dataCht.params.max_tokens}&context_window=${dataCht.params.n_ctx}` +
`&repeat_last_n=${dataCht.params.last_n_tokens_size}&repeat_penalty=${dataCht.params.repeat_penalty}` +
`&init_prompt=${dataCht.history[0].data.content}` +
`&gpu_layers=${dataCht.params.n_gpu_layers}`,
{
method: "POST",
headers: {
accept: "application/json",
},
},
).then((response) => response.json());
await invalidate("/api/chat/");
await goto("/chat/" + newData);
}
}
onDestroy(() => {
unsubscribe;
unsubscribe1;
});
// onDestroy(unsubscribe1);
</script>
<button
on:click={toggleSidebar}
class="border-base-content/[.2] btn btn-square z-10 my-1 mx-2 fixed border"
>
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
class="inline-block w-5 h-5 stroke-current"
><path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M4 6h16M4 12h16M4 18h16"
></path></svg
>
</button>
<aside
class="border-base-content/[.2] fixed top-0 z-40 min-h-full border-r transition-all overflow-hidden aria-label=Sidebar"
class:left-0={isSidebarOpen}
class:-left-80={!isSidebarOpen}
id="default-sidebar"
class={"border-base-content/[.2] fixed left-0 top-0 z-40 h-screen w-80 -translate-x-full border-r transition-transform overflow-hidden" +
(bar_visible ? " translate-x-0" : "")}
aria-label="Sidebar"
>
<div
class="bg-base-200 relative h-screen py-1 px-2 overflow-hidden flex flex-col items-center justify-between"
>
<div class="w-full flex items-center pb-1">
<div
class="w-full flex items-center justify-between border-b border-base-content/[.2] pb-1"
>
<button
on:click={toggleSidebar}
class="border-base-content/[.2] btn btn-square border"
>
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
class="inline-block w-5 h-5 stroke-current"
><path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M4 6h16M4 12h16M4 18h16"
></path></svg
>
</button>
<button
disabled={isLoading || !modelAvailable}
class="btn btn-ghost flex-grow h-6 font-semibold text-left text-sm capitalize"
disabled={isLoading}
class="btn btn-ghost h-6 w-4/5 justify-between font-semibold text-left text-sm capitalize"
class:loading={isLoading}
on:click|preventDefault={() => goto("/")}
style="justify-content: flex-start;"
on:click|preventDefault={() => createSameSession()}
>
<span>New chat</span>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
fill="currentColor"
class="w-4 h-4 mr-2"
class="w-4 h-4"
>
<path
d="M1 2.75C1 1.784 1.784 1 2.75 1h10.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 13.25 12H9.06l-2.573 2.573A1.458 1.458 0 0 1 4 13.543V12H2.75A1.75 1.75 0 0 1 1 10.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h4.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"
>
</path>
</svg>
<span>New Chat</span>
</button>
<button class="btn btn-ghost flex-shrink-0" on:click={goToHome}>
<button
class="btn btn-ghost flex h-6 w-1/6 items-center justify-center font-semibold z-40"
on:click={toggleBar}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
class="w-5 h-5"
class="w-6 h-6"
>
<path d="M10 20v-6h4v6h5v-8h3L12 3 2 12h3v8z" />
<path
d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
>
</path>
<path
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
>
</path>
</svg>
<span class="sr-only">Home</span>
</button>
</div>
<ul
class="my-1 w-full flex-grow overflow-y-auto no-scrollbar firefox-no-scrollbar ie-edge-no-scrollbar"
class="my-1 w-full h-[85%] overflow-y-auto no-scrollbar firefox-no-scrollbar ie-edge-no-scrollbar"
>
{#if data && Symbol.iterator in Object(data.chats)}
{#each data.chats as chat (chat.id)}
<li in:fly={{ x: -100, duration: 900 }}>
<a
href={"/chat/" + chat.id}
class="group hover:from-base-100 hover:text-base-content flex items-center rounded-lg py-2 pl-2 text-base font-normal hover:bg-gradient-to-r hover:to-transparent"
class:bg-base-300={id === chat.id}
>
<div
class="flex w-full flex-col space-y-2 p-2 border-b border-gray-200 relative"
>
{#each data.chats as chat (chat.id)}
<li in:fly={{ x: -100, duration: 900 }}>
<a
href={"/chat/" + chat.id}
class="group hover:from-base-100 hover:text-base-content flex items-center rounded-lg py-2 pl-2 text-base font-normal hover:bg-gradient-to-r hover:to-transparent"
class:bg-base-300={id === chat.id}
>
<div class="flex w-full flex-col">
<div class="flex w-full flex-col items-start justify-start">
<div
class="flex w-full flex-col items-start justify-start space-y-1"
class="relative flex w-full flex-row items-center justify-between"
>
<div
class="flex w-full flex-row items-center justify-between"
>
<div class="flex flex-col space-y-1.5">
<p class="text-sm font-light max-w-[25ch] break-words">
{truncate(chat.subtitle, 100)}
</p>
<span
class="text-xs font-semibold max-w-[25ch] break-words"
>{chat.model}</span
>
<span class="text-xs"
>{timeSince(chat.created) + " ago"}</span
>
</div>
<div class="flex flex-col">
<p class="text-sm font-light">
{truncate(chat.subtitle, 42)}
</p>
<span class="text-xs font-semibold">{chat.model}</span>
<span class="text-xs"
>{timeSince(chat.created) + " ago"}</span
>
</div>
</div>
<div
class="absolute bottom-1.5 right-2 opacity-0 group-hover:opacity-100 transition-opacity duration-300"
>
{#if deleteConfirm}
<div class="flex flex-row items-center space-x-2">
<button
name="confirm-delete"
class="btn btn-sm btn"
on:click|preventDefault={() => deleteChat(chat.id)}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
<div
class="absolute right-0 opacity-0 group-hover:opacity-100 transition"
>
<!-- {#if $page.params.id === chat.id} -->
{#if deleteConfirm}
<div class="flex flex-row items-center">
<button
name="confirm-delete"
class="btn-ghost btn-sm btn"
on:click|preventDefault={() => deleteChat(chat.id)}
>
<path
class="fill-base-content"
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
/>
</svg>
</button>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
/>
</svg>
</button>
<button
name="cancel-delete"
class="btn-ghost btn-sm btn"
on:click|preventDefault={toggleDeleteConfirm}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
/>
</svg>
</button>
</div>
{:else}
<button
name="cancel-delete"
class="btn btn-sm btn"
class="btn-ghost btn-sm btn"
on:click|preventDefault={toggleDeleteConfirm}
>
<svg
@ -266,189 +256,26 @@
>
<path
class="fill-base-content"
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
/>
</svg>
</button>
</div>
{:else}
<button
class="btn btn-sm btn"
on:click|preventDefault={toggleDeleteConfirm}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
/>
</svg>
</button>
{/if}
{/if}
<!-- {/if} -->
</div>
</div>
</div>
</a>
</li>
{/each}
{/if}
</div>
</a>
</li>
{/each}
</ul>
<div class="w-full border-t border-base-content/[.2] pt-1">
{#if data.userData?.username === "system"}
{#if deleteAllConfirm}
<button
name="login-btn"
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
on:click={() => goto("/login")}
class="btn btn-ghost w-full flex flex-row justify-between items-center p-2.5 text-left text-sm capitalize"
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="18"
height="18"
fill="currentColor"
class="mr-3"
viewBox="0 0 16 16"
>
<path
d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m1.679-4.493-1.335 2.226a.75.75 0 0 1-1.174.144l-.774-.773a.5.5 0 0 1 .708-.708l.547.548 1.17-1.951a.5.5 0 1 1 .858.514M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
/>
<path
d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
/>
</svg>
<span>Login</span>
</button>
<button
name="create-btn"
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
on:click={() => goto("/signup")}
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="18"
height="18"
fill="currentColor"
class="mr-3"
viewBox="0 0 16 16"
>
<path
d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m.5-5v1h1a.5.5 0 0 1 0 1h-1v1a.5.5 0 0 1-1 0v-1h-1a.5.5 0 0 1 0-1h1v-1a.5.5 0 0 1 1 0m-2-6a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
/>
<path
d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
/>
</svg>
<span>Create Account</span>
</button>
{:else}
<button
name="logout-btn"
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
on:click={async () => {
const response = await fetch("/api/auth/logout", {
method: "POST",
});
data.userData = null;
window.location.href = "/";
}}
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="18"
height="18"
fill="currentColor"
class="mr-3"
viewBox="0 0 16 16"
>
<path
d="M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4m0 5.996V14H3s-1 0-1-1 1-4 6-4q.845.002 1.544.107a4.5 4.5 0 0 0-.803.918A11 11 0 0 0 8 10c-2.29 0-3.516.68-4.168 1.332-.678.678-.83 1.418-.832 1.664zM9 13a1 1 0 0 1 1-1v-1a2 2 0 1 1 4 0v1a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1h-4a1 1 0 0 1-1-1zm3-3a1 1 0 0 0-1 1v1h2v-1a1 1 0 0 0-1-1"
/>
</svg>
<span>Log Out</span>
</button>
<a
href="/account"
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="18"
height="18"
fill="currentColor"
class="mr-3"
>
<path
d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.029.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"
>
</path>
</svg>
<span>Settings</span>
</a>
{#if deleteAllConfirm}
<button
class="btn btn-ghost w-full flex flex-row justify-between items-center p-2.5 text-left text-sm capitalize"
>
<div class="h-6 flex flex-row items-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="18"
height="18"
fill="currentColor"
class="mr-3"
>
<path
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
>
</path>
</svg>
<span>Clear Chats</span>
</div>
<div class="h-6 flex flex-row items-center">
<button
name="confirm-delete"
class="btn-ghost btn-sm btn"
on:click|preventDefault={() => deleteAllChat()}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
/>
</svg>
</button>
<button
name="cancel-delete"
class="btn-ghost btn-sm btn"
on:click|preventDefault={toggleDeleteAllConfirm}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
/>
</svg>
</button>
</div>
</button>
{:else}
<button
on:click|preventDefault={toggleDeleteAllConfirm}
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
>
<div class="h-6 flex flex-row items-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -463,14 +290,71 @@
</path>
</svg>
<span>Clear Chats</span>
</button>
{/if}
</div>
<div class="h-6 flex flex-row items-center">
<button
name="confirm-delete"
class="btn-ghost btn-sm btn"
on:click|preventDefault={() => deleteAllChat()}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
/>
</svg>
</button>
<button
name="cancel-delete"
class="btn-ghost btn-sm btn"
on:click|preventDefault={toggleDeleteAllConfirm}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
/>
</svg>
</button>
</div>
</button>
{:else}
<button
on:click|preventDefault={toggleDeleteAllConfirm}
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="18"
height="18"
fill="currentColor"
class="mr-3"
>
<path
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
>
</path>
</svg>
<span>Clear Chats</span>
</button>
{/if}
<button
on:click={toggleTheme}
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
>
<label class="swap swap-rotate" for="theme-toggle">
<label class="swap swap-rotate">
<input type="checkbox" />
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -500,12 +384,31 @@
</label>
<span>{theme == "dark" ? "Light" : "Dark"} theme</span>
</button>
<a
href="/"
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="18"
height="18"
fill="currentColor"
class="mr-3"
>
<path
d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.029.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"
>
</path>
</svg>
<span>Settings</span>
</a>
</div>
</div>
</aside>
<button on:click={hideSidebar} type="button"></button>
<div id="main_content" class="h-full w-full">
<div
class={"relative h-full transition-all" + (bar_visible ? " md:ml-80" : "")}
>
<slot />
</div>

View File

@ -1,47 +1,16 @@
import type { LayoutLoad } from "./$types";
interface ChatMetadata {
interface t {
id: string;
created: string;
model: string;
subtitle: string;
}
export const ssr = false; // off for now because ssr with auth is broken
export interface ModelStatus {
name: string;
size: number;
available: boolean;
progress?: number;
}
export interface User {
id: string;
username: string;
email: string;
pref_theme: "light" | "dark";
full_name: string;
default_prompt: string;
}
export const load: LayoutLoad = async ({ fetch }) => {
let userData: User | null = null;
const api_chat = await fetch("/api/chat/");
const chats = (await api_chat.json()) as ChatMetadata[];
const model_api = await fetch("/api/model/all");
const models = (await model_api.json()) as ModelStatus[];
const userData_api = await fetch("/api/user/");
if (userData_api.ok) {
userData = (await userData_api.json()) as User;
}
const r = await fetch("/api/chat/");
const chats = (await r.json()) as t[];
return {
chats,
models,
userData,
};
};

View File

@ -1,12 +1,16 @@
<script lang="ts">
import type { PageData } from "./$types";
import { goto, invalidate } from "$app/navigation";
import { barVisible } from "$lib/stores";
import { onDestroy } from "svelte";
export let data: PageData;
const models = data.models.filter((el) => el.available);
const modelAvailable = models.length > 0;
const modelsLabels = models.map((el) => el.name);
let bar_visible: boolean;
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
let temp = 0.1;
let top_k = 50;
@ -17,10 +21,8 @@
let repeat_penalty = 1.3;
let init_prompt =
data.userData?.default_prompt ??
"Below is an instruction that describes a task. Write a response that appropriately completes the request.";
let n_threads = 4;
let context_window = 2048;
let gpu_layers = 0;
@ -45,21 +47,43 @@
await invalidate("/api/chat/");
}
}
function toggleBar() {
bar_visible = !bar_visible;
barVisible.set(bar_visible);
}
onDestroy(unsubscribe);
</script>
{#if !bar_visible}
<button
class="absolute p-0 top-1 left-2 md:left-16 h-10 w-10 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
on:click={toggleBar}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
class="w-4 h-4"
>
<path
d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
>
</path>
<path
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
>
</path>
</svg>
</button>
{/if}
<div class="flex flex-col items-center justify-center pt-5">
<h1 class="pb-2 text-3xl font-bold">Say Hi to Serge</h1>
</div>
<h1 class="pb-5 pt-2 text-center text-xl font-light">
An easy way to chat with LLaMA based models.
An easy way to chat with Alpaca & other LLaMA based models.
</h1>
<form
on:submit|preventDefault={onCreateChat}
id="form-create-chat"
class="p-5"
aria-label="Model Settings"
>
<form on:submit|preventDefault={onCreateChat} id="form-create-chat" class="p-5">
<div class="w-full pb-20">
<div class="mx-auto w-fit pt-5 flex flex-col lg:flex-row justify-center">
<button
@ -74,181 +98,161 @@
>
</div>
</div>
<div class="flex justify-center">
<div class="grid grid-cols-3 gap-4 p-3 bg-base-200" id="model_settings">
<div class="col-span-3 text-xl font-medium">Model settings</div>
<div
class="tooltip tooltip-bottom col-span-2"
data-tip="Controls how random the generated text is. Higher temperatures lead to more random and creative text, while lower temperatures lead to more predictable and conservative text."
>
<label for="temperature" class="label-text"
>Temperature - [{temp}]</label
<div tabindex="-1" class="collapse-arrow rounded-box collapse bg-base-200">
<input type="checkbox" />
<div class="collapse-title text-xl font-medium">Model settings</div>
<div class="collapse-content">
<div class="grid grid-cols-3 gap-4 p-3">
<div
class="tooltip tooltip-bottom col-span-2"
data-tip="The higher the temperature, the more random the model output."
>
<input
id="temperature"
name="temperature"
type="range"
bind:value={temp}
min="0.05"
max="2"
step="0.05"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip tooltip-bottom flex flex-col"
data-tip="Controls the number of tokens that are considered when generating the next token. Higher values of top_k lead to more predictable text, while lower values of top_k lead to more creative text."
>
<label for="top_k" class="label-text pb-1">top_k</label>
<input
id="top_k"
class="input-bordered input w-full"
name="top_k"
type="number"
bind:value={top_k}
min="0"
max="100"
/>
</div>
<div
class="tooltip tooltip-bottom col-span-2"
data-tip="The maximum number of tokens that the model will generate. This parameter can be used to control the length of the generated text."
>
<label for="max_length" class="label-text"
>Maximum generated tokens - [{max_length}]</label
<label for="temperature" class="label-text"
>Temperature - [{temp}]</label
>
<input
name="temperature"
type="range"
bind:value={temp}
min="0.05"
max="2"
step="0.05"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip tooltip-bottom flex flex-col"
data-tip="The number of samples to consider for top_k sampling."
>
<input
id="max_length"
name="max_length"
type="range"
bind:value={max_length}
min="32"
max="32768"
step="16"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip flex flex-col"
data-tip="Controls the diversity of the generated text. Higher values of top_p lead to more diverse text, while lower values of top_p lead to less diverse text."
>
<label for="top_p" class="label-text pb-1">top_p</label>
<input
class="input-bordered input w-full"
id="top_p"
name="top_p"
type="number"
bind:value={top_p}
min="0"
max="1"
step="0.025"
/>
</div>
<div
class="tooltip col-span-2"
data-tip="The number of previous tokens that are considered when generating the next token. A longer context length can help the model to generate more coherent and informative text."
>
<label for="context_window" class="label-text"
>Context Length - [{context_window}]</label
<label for="top_k" class="label-text pb-1">top_k</label>
<input
class="input-bordered input w-full max-w-xs"
name="top_k"
type="number"
bind:value={top_k}
min="0"
max="100"
/>
</div>
<div
class="tooltip tooltip-bottom col-span-2"
data-tip="Max text generated token"
>
<input
id="context_window"
name="context_window"
type="range"
bind:value={context_window}
min="16"
max="2048"
step="16"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip col-span-2"
data-tip="Number of layers to put on the GPU. The rest will be on the CPU."
>
<label for="gpu_layers" class="label-text"
>GPU Layers - [{gpu_layers}]</label
<label for="max_length" class="label-text"
>Maximum generated tokens - [{max_length}]</label
>
<input
name="max_length"
type="range"
bind:value={max_length}
min="32"
max="32768"
step="16"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip flex flex-col"
data-tip="The cumulative probability of the tokens to keep for nucleus sampling."
>
<input
id="gpu_layers"
name="gpu_layers"
type="range"
bind:value={gpu_layers}
min="0"
max="100"
step="1"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip flex flex-col"
data-tip="Defines the penalty associated with repeating the last 'n' tokens in a generated text sequence."
>
<label for="repeat_last_n" class="label-text pb-1">repeat_last_n</label>
<input
id="repeat_last_n"
class="input-bordered input w-full"
name="repeat_last_n"
type="number"
bind:value={repeat_last_n}
min="0"
max="100"
/>
</div>
<div class="flex flex-col">
<label for="model" class="label-text pb-1"> Model choice</label>
<select
name="model"
id="models"
class="select-bordered select w-full"
aria-haspopup="menu"
<label for="top_p" class="label-text pb-1">top_p</label>
<input
class="input-bordered input w-full max-w-xs"
name="top_p"
type="number"
bind:value={top_p}
min="0"
max="1"
step="0.025"
/>
</div>
<div
class="tooltip col-span-2"
data-tip="Size of the prompt context. Will determine how far the model will read back. Increases memory consumption."
>
{#each modelsLabels as model}
<option id={model} value={model}>{model}</option>
{/each}
</select>
</div>
<div
class="tooltip flex flex-col"
data-tip="Number of threads to run LLaMA on."
>
<label for="n_threads" class="label-text pb-1">n_threads</label>
<input
id="n_threads"
class="input-bordered input w-full"
name="n_threads"
type="number"
bind:value={n_threads}
min="0"
max="64"
/>
</div>
<div
class="tooltip flex flex-col"
data-tip="Defines the penalty assigned to the model when it repeats certain tokens or patterns in the generated text."
>
<label for="repeat_penalty" class="label-text pb-1">
repeat_penalty
</label>
<input
id="repeat_penalty"
class="input-bordered input w-full"
name="repeat_penalty"
type="number"
bind:value={repeat_penalty}
min="0"
max="2"
step="0.05"
/>
</div>
<div class="col-span-3 flex flex-col">
<label for="init_prompt" class="label-text pb-1">Prompt Template</label>
<textarea
class="textarea-bordered textarea h-24 w-full"
name="init_prompt"
bind:value={init_prompt}
placeholder="Enter your prompt here"
/>
<label for="context_window" class="label-text"
>Context Length - [{context_window}]</label
>
<input
name="context_window"
type="range"
bind:value={context_window}
min="16"
max="2048"
step="16"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip col-span-2"
data-tip="Number of layers to put on the GPU. The rest will be on the CPU."
>
<label for="gpu_layers" class="label-text"
>GPU Layers - [{gpu_layers}]</label
>
<input
name="gpu_layers"
type="range"
bind:value={gpu_layers}
min="0"
max="100"
step="1"
class="range range-sm mt-auto"
/>
</div>
<div
class="tooltip flex flex-col"
data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
>
<label for="repeat_last_n" class="label-text pb-1"
>repeat_last_n</label
>
<input
class="input-bordered input w-full max-w-xs"
name="repeat_last_n"
type="number"
bind:value={repeat_last_n}
min="0"
max="100"
/>
</div>
<div class="flex flex-col">
<label for="model" class="label-text pb-1"> Model choice</label>
<select name="model" class="select-bordered select w-full max-w-xs">
{#each modelsLabels as model}
<option value={model}>{model}</option>
{/each}
</select>
</div>
<div
class="tooltip flex flex-col"
data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens."
>
<label for="repeat_penalty" class="label-text pb-1">
repeat_penalty
</label>
<input
class="input-bordered input w-full max-w-xs"
name="repeat_penalty"
type="number"
bind:value={repeat_penalty}
min="0"
max="2"
step="0.05"
/>
</div>
<div class="col-span-3 flex flex-col">
<label for="init_prompt" class="label-text pb-1"
>Pre-Prompt for initializing a conversation.</label
>
<textarea
class="textarea-bordered textarea h-24 w-full"
name="init_prompt"
bind:value={init_prompt}
placeholder="Enter your prompt here"
/>
</div>
</div>
</div>
</div>

View File

@ -1,6 +1,6 @@
import type { PageLoad } from "./$types";
export interface ModelStatus {
interface ModelStatus {
name: string;
size: number;
available: boolean;
@ -8,8 +8,8 @@ export interface ModelStatus {
}
export const load: PageLoad = async ({ fetch }) => {
const api_model = await fetch("/api/model/all");
const models = (await api_model.json()) as ModelStatus[];
const r = await fetch("/api/model/all");
const models = (await r.json()) as ModelStatus[];
return {
models,
};

View File

@ -1,106 +0,0 @@
<script context="module" lang="ts">
export { load } from "./+page";
</script>
<script lang="ts">
import { writable } from "svelte/store";
import { goto } from "$app/navigation";
export let data: {
user: {
id: string;
username: string;
email: string;
full_name: string;
pref_theme: "light" | "dark";
default_prompt: string;
} | null;
};
let user = data.user;
let id: string = user?.id ?? "";
let username: string = user?.username ?? "";
let email: string = user?.email ?? "";
let full_name: string = user?.full_name ?? "";
let pref_theme: "light" | "dark" = user?.pref_theme ?? "light";
let default_prompt: string = user?.default_prompt ?? "";
let status = writable<string | null>(null);
async function handleSubmit(event: Event) {
event.preventDefault();
// Implement the update logic here, e.g., sending a PUT request to update user preferences
try {
await fetch("/api/user/", {
method: "PUT",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
id,
username,
email,
full_name,
pref_theme,
default_prompt,
}),
});
status.set("Preferences updated successfully");
goto("/", { invalidateAll: true });
} catch (error) {
if (error instanceof Error) {
status.set(error.message);
} else {
status.set("Failed to update preferences");
}
}
}
</script>
<main>
<div class="card-group">
<div class="card">
<div class="card-title p-3 text-3xl justify-center font-bold">
User Preferences
</div>
<div class="card-body">
{#if user}
<form on:submit={handleSubmit}>
<div class="input-group">
<div class="input-group-prepend">
<span class="input-group-text">Username</span>
</div>
<input type="text" bind:value={username} disabled />
</div>
<div class="input-group">
<div class="input-group-prepend">
<span class="input-group-text">Full Name</span>
</div>
<input id="full_name" type="text" bind:value={full_name} />
</div>
<div class="input-group">
<div class="input-group-prepend">
<span class="input-group-text">Email</span>
</div>
<input id="email" type="email" bind:value={email} />
</div>
<div class="input-group">
<div class="input-group-prepend">
<span class="input-group-text">Default Prompt</span>
</div>
<textarea
id="default_prompt"
bind:value={default_prompt}
style="resize:both; width:100%;"
/>
</div>
{#if $status}
<p>{$status}</p>
{/if}
<button class="btn" type="submit">Save Preferences</button>
</form>
{:else}
<p>Loading...</p>
{/if}
</div>
</div>
</div>
</main>

View File

@ -1,27 +0,0 @@
import type { Load } from "@sveltejs/kit";
interface User {
id: string;
username: string;
email: string;
pref_theme: "light" | "dark";
full_name: string;
default_prompt: string;
}
export const load: Load = async () => {
const user = await fetch("/api/user/", {
method: "GET",
})
.then((response) => {
if (response.status == 401) {
window.location.href = "/";
}
return response.json();
})
.catch((error) => {
console.log(error);
window.location.href = "/";
});
return { user };
};

View File

@ -2,7 +2,7 @@
import type { PageData } from "./$types";
import { invalidate, goto } from "$app/navigation";
import { page } from "$app/stores";
import { newChat, themeStore } from "$lib/stores";
import { barVisible, newChat, themeStore } from "$lib/stores";
import { onMount, onDestroy } from "svelte";
import ClipboardJS from "clipboard";
import hljs from "highlight.js";
@ -11,6 +11,7 @@
import css from "highlight.js/lib/languages/css";
import cpp from "highlight.js/lib/languages/cpp";
import dockerfile from "highlight.js/lib/languages/dockerfile";
import graphql from "highlight.js/lib/languages/graphql";
import go from "highlight.js/lib/languages/go";
import javascript from "highlight.js/lib/languages/javascript";
import json from "highlight.js/lib/languages/json";
@ -31,6 +32,7 @@
hljs.registerLanguage("bash", bash);
hljs.registerLanguage("css", css);
hljs.registerLanguage("cpp", cpp);
hljs.registerLanguage("graphql", graphql);
hljs.registerLanguage("dockerfile", dockerfile);
hljs.registerLanguage("go", go);
hljs.registerLanguage("javascript", javascript);
@ -59,6 +61,8 @@
messageContainer.scrollBottom = messageContainer.scrollHeight;
}
let prompt = "";
let bar_visible: boolean;
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
async function askQuestion() {
const data = new URLSearchParams();
@ -102,7 +106,10 @@
});
eventSource.onerror = async (error) => {
console.log("error", error);
eventSource.close();
//history[history.length - 1].data.content = "A server error occurred.";
//await invalidate("/api/chat/" + $page.params.id);
};
}
@ -118,7 +125,7 @@
`/api/chat/?model=${data.chat.params.model_path}&temperature=${data.chat.params.temperature}&top_k=${data.chat.params.top_k}` +
`&top_p=${data.chat.params.top_p}&max_length=${data.chat.params.max_tokens}&context_window=${data.chat.params.n_ctx}` +
`&repeat_last_n=${data.chat.params.last_n_tokens_size}&repeat_penalty=${data.chat.params.repeat_penalty}` +
`&n_threads=${data.chat.params.n_threads}&init_prompt=${data.chat.history[0].data.content}` +
`&init_prompt=${data.chat.history[0].data.content}` +
`&gpu_layers=${data.chat.params.n_gpu_layers}`,
{
@ -127,23 +134,17 @@
accept: "application/json",
},
},
)
.then((response) => {
if (response.status == 401) {
console.log("Not authorized");
window.location.href = "/";
} else {
return response.json();
}
})
.catch((error) => {
console.log(error);
window.location.href = "/";
});
).then((response) => response.json());
await invalidate("/api/chat/");
await goto("/chat/" + newData);
}
document.addEventListener("keydown", async (event) => {
if (event.key === "n" && event.altKey) {
await createSameSession();
}
});
async function deletePrompt(chatID: string, idx: number) {
const response = await fetch(
`/api/chat/${chatID}/prompt?idx=${idx.toString()}`,
@ -152,36 +153,11 @@
if (response.status === 200) {
await invalidate("/api/chat/" + $page.params.id);
} else if (response.status === 202) {
showToast("Chat in progress!");
} else if (response.status === 401) {
window.location.href = "/";
} else {
showToast("An error occurred: " + response.statusText);
console.error("Error " + response.status + ": " + response.statusText);
}
}
function showToast(message: string) {
// Create the toast element
const toast = document.createElement("div");
toast.className = `alert alert-info`;
toast.textContent = message;
const toastContainer = document.getElementById("toast-container");
// Append the toast to the toast container if it exists
if (toastContainer) {
toastContainer.appendChild(toast);
} else {
console.error("Toast container not found?");
return;
}
// Automatically remove the toast after a delay
setTimeout(() => {
toast.remove();
}, 3000);
}
const md: MarkdownIt = new MarkdownIt({
html: true,
linkify: true,
@ -249,12 +225,6 @@
themeStore.subscribe((newTheme) => {
updateThemeStyle(newTheme);
});
document.addEventListener("keydown", async (event) => {
if (event.key === "n" && event.altKey) {
await createSameSession();
}
});
});
function updateThemeStyle(currentTheme: string) {
@ -273,6 +243,10 @@
const onMouseLeave = () => {
sendBottomHovered = false;
};
const toggleBar = () => {
bar_visible = !bar_visible;
barVisible.set(bar_visible);
};
const scrollToBottom = (node: Element, history: any[]) => {
const scroll = () =>
node.scroll({
@ -284,22 +258,41 @@
return { update: scroll };
};
onDestroy(() => {
unsubscribe;
styleElement && styleElement.remove();
});
</script>
<!-- svelte-ignore a11y-no-static-element-interactions -->
<div
class="relative h-full max-h-screen overflow-hidden"
class="relative mx-auto h-full max-h-screen w-full overflow-hidden"
on:keydown={handleKeyDown}
>
<div class="mx-20">
<div class="h-8 justify-content border-b border-base-content/[.2]">
<div class="h-full relative flex items-center justify-center">
<div
class="flex flex-row items-center justify-center color-base-300"
title="Model"
>
<div class="w-full border-b border-base-content/[.2]">
<div class="h-8 px-2 md:container md:mx-auto md:px-0">
<div class="w-full h-full relative flex items-center justify-center">
{#if !bar_visible}
<button
class="absolute p-0 top-0 bottom-0 left-0 w-10 h-8 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
on:click={toggleBar}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
class="w-4 h-4 fill-base-content"
>
<path
d="M7.22 14.47 9.69 12 7.22 9.53a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l3 3a.75.75 0 0 1 0 1.06l-3 3a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Z"
>
</path>
<path
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
>
</path>
</svg>
</button>
{/if}
<div class="flex flex-row items-center justify-center color-base-300">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 11.12744 16"
@ -314,10 +307,7 @@
{data.chat.params.model_path}
</span>
</div>
<div
class="pl-4 hidden sm:flex flex-row items-center justify-center"
title="Temperature"
>
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 5.31286 16"
@ -332,10 +322,7 @@
{data.chat.params.temperature}
</span>
</div>
<div
class="pl-4 hidden sm:flex flex-row items-center justify-center"
title="Context Length/Maximum Generated Tokens"
>
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -350,35 +337,8 @@
{data.chat.params.n_ctx}/{data.chat.params.max_tokens}
</span>
</div>
{#if data.chat.params.n_threads > 0}
<div
class="pl-4 hidden sm:flex flex-row items-center justify-center"
title="Threads"
>
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
stroke-width="1.5"
stroke="currentColor"
class="w-4 h-4"
>
<path
stroke-linecap="round"
stroke-linejoin="round"
d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 002.25-2.25V6.75a2.25 2.25 0 00-2.25-2.25H6.75A2.25 2.25 0 004.5 6.75v10.5a2.25 2.25 0 002.25 2.25zm.75-12h9v9h-9v-9z"
/>
</svg>
<span class="ml-2 inline-block text-center text-sm font-semibold">
{data.chat.params.n_threads}
</span>
</div>
{/if}
{#if data.chat.params.n_gpu_layers > 0}
<div
class="pl-4 hidden sm:flex flex-row items-center justify-center"
title="GPU Layers"
>
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -394,10 +354,7 @@
</span>
</div>
{/if}
<div
class="pl-4 hidden sm:flex flex-row items-center justify-center"
title="Repeat Penalty"
>
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -416,10 +373,7 @@
{data.chat.params.repeat_penalty}
</span>
</div>
<div
class="pl-4 hidden sm:flex flex-row items-center justify-center"
title="Top_k-Top_p"
>
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -444,46 +398,49 @@
<div class="h-max pb-4">
{#each history as question, i}
{#if question.type === "human"}
<div class="w-10/12 mx-auto sm:w-10/12 chat chat-end py-4">
<div class="chat-image self-start pl-1 pt-1">
<div
class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
>
<span class="text-xs text-neutral-content">I</span>
</div>
</div>
<div
class="chat-bubble whitespace-normal break-words bg-base-300 text-base font-light text-base-content"
>
<!-- {question.data.content} -->
<div class="w-full overflow-hidden break-words">
{@html renderMarkdown(question.data.content)}
</div>
</div>
{#if i === history.length - 1 && !isLoading}
<div style="width: 100%; text-align: right;">
<button
disabled={isLoading}
class="btn-ghost btn-sm btn"
on:click|preventDefault={() => deletePrompt(data.chat.id, i)}
<div class="w-full border-y border-base-content/[.2] bg-base-300">
<div class="w-11/12 mx-auto sm:w-10/12 chat chat-start py-4">
<div class="chat-image self-start pl-1 pt-1">
<div
class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
/>
</svg>
</button>
<span class="text-xs text-neutral-content">I</span>
</div>
</div>
{/if}
<div
class="chat-bubble whitespace-normal break-words bg-base-300 text-base font-light text-base-content"
>
<!-- {question.data.content} -->
<div class="w-full overflow-hidden break-words">
{@html renderMarkdown(question.data.content)}
</div>
</div>
{#if i === history.length - 1 && !isLoading}
<div style="width: 100%; text-align: right;">
<button
disabled={isLoading}
class="btn-ghost btn-sm btn"
on:click|preventDefault={() =>
deletePrompt(data.chat.id, i)}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="16"
height="16"
>
<path
class="fill-base-content"
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
/>
</svg>
</button>
</div>
{/if}
</div>
</div>
{:else if question.type === "ai"}
<div class="w-10/12 mx-auto sm:w-10/12 chat chat-start py-4">
<div class="w-11/12 mx-auto sm:w-10/12 chat chat-start py-4">
<div class="chat-image self-start pl-1 pt-1">
<div
class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
@ -524,7 +481,6 @@
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
/>
</svg>
<span class="sr-only">Delete</span>
</button>
</div>
{/if}
@ -564,7 +520,7 @@
class="btn btn-ghost h-10 w-14 rounded-l-none rounded-r-lg border-0 text-lg"
class:loading={isLoading}
on:click|preventDefault={askQuestion}
><span class="sr-only">Send</span>
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
@ -580,7 +536,4 @@
</button>
</div>
</div>
<div id="toast-container" class="toast">
<!-- Toast notifications will be added here -->
</div>
</div>

View File

@ -15,7 +15,6 @@ interface Params {
model_path: string;
n_ctx: number;
n_gpu_layers: number;
n_threads: number;
last_n_tokens_size: number;
max_tokens: number;
temperature: number;
@ -28,22 +27,12 @@ interface Response {
id: string;
created: string;
params: Params;
owner: string;
history: Message[];
}
export const load: PageLoad = async ({ fetch, params }) => {
const data = await fetch("/api/chat/" + params.id)
.then((response) => {
if (response.status == 401) {
window.location.href = "/";
}
return response.json();
})
.catch((error) => {
console.log(error);
window.location.href = "/";
});
const r = await fetch("/api/chat/" + params.id);
const data = (await r.json()) as Response;
return {
chat: data,

View File

@ -1,69 +0,0 @@
<script lang="ts">
import { goto } from "$app/navigation";
import { writable } from "svelte/store";
let username = "";
let password = "";
let error = writable<string | null>(null);
async function handleSubmit(event: Event) {
event.preventDefault();
try {
const response = await fetch("/api/auth/token", {
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
body: new URLSearchParams({
username,
password,
}),
});
if (response.ok) {
const data = await response.json();
localStorage.setItem("token", data.access_token);
goto("/", { invalidateAll: true });
} else {
const errorData = await response.json();
error.set(errorData.detail || "Login failed");
}
} catch (err) {
error.set("An error occurred");
}
}
</script>
<main>
<div class="card-group">
<div class="card">
<div class="card-title p-3 text-3xl justify-center font-bold">
Sign In
</div>
<div class="card-body">
<form on:submit={handleSubmit}>
<div class="form-control">
<input
type="text"
placeholder="Username"
bind:value={username}
required
/>
</div>
<div class="form-control">
<input
type="password"
placeholder="Password"
bind:value={password}
required
/>
</div>
{#if $error}
<p style="color: red;">{$error}</p>
{/if}
<button class="btn" type="submit">Authenticate</button>
</form>
</div>
</div>
</div>
</main>

View File

@ -1,359 +1,166 @@
<script lang="ts">
import { invalidate } from "$app/navigation";
import type { ModelStatus } from "../+page";
import type { PageData } from "./$types";
import Icon from "@iconify/svelte";
import { onMount } from "svelte";
import RefreshModal from "../../lib/components/models/RefreshModal.svelte";
import { barVisible } from "$lib/stores";
import { onDestroy } from "svelte";
export let data: PageData;
let searchQuery = "";
let selectedVariant: Record<string, string> = {};
// Add a reactive statement to keep track of downloading models
$: downloadingModels = new Set(
data.models
.filter(
(model) =>
(model.progress > 0 && model.progress < 100) || !model.available,
)
.map((model) => model.name),
);
function onComponentMount() {
const downloadingModelsArray = JSON.parse(
localStorage.getItem("downloadingModels") || "[]",
);
downloadingModelsArray.forEach((model: string) => {
downloadingModels.add(model);
checkDownloadProgress(model);
});
}
onMount(() => {
onComponentMount();
});
/**
* Handles the fetching the status of an active download
* @param modelName - The model name.
*/
async function fetchDownloadProgress(modelName: string) {
const response = await fetch(`/api/model/${modelName}/download/status`);
if (response.ok) {
const progress = await response.text();
const progressNumber = parseFloat(progress);
const modelIndex = data.models.findIndex((m) => m.name === modelName);
if (modelIndex !== -1) {
data.models[modelIndex].progress = progressNumber;
data.models = [...data.models]; // enable reactivity
}
return progressNumber;
}
return 0;
}
function startDownload(modelName: string) {
const currentDownloads = JSON.parse(
localStorage.getItem("downloadingModels") || "[]",
);
if (!currentDownloads.includes(modelName)) {
currentDownloads.push(modelName);
localStorage.setItem(
"downloadingModels",
JSON.stringify(currentDownloads),
);
}
downloadingModels.add(modelName);
checkDownloadProgress(modelName);
}
/**
* Debounce function to limit how often a function can be called.
* @param func - The function to be debounced.
* @param wait - The time to wait in milliseconds.
* @returns A debounced version of the given function.
*/
function debounce(func: (...args: any[]) => void, wait: number) {
let timeout: ReturnType<typeof setTimeout>;
return function (...args: any[]) {
const later = () => {
clearTimeout(timeout);
func(...args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
};
}
// Update search query with debounce to improve performance
const updateSearch = debounce((query: string) => {
searchQuery = query;
}, 300);
/**
* Wrapper function for fetch to include invalidate call on successful response.
* @param url - The URL to fetch.
* @param options - Fetch request options.
* @returns The fetch response.
*/
async function fetchWithInvalidate(url: string, options: any) {
const response = await fetch(url, options);
if (response.ok) {
let downloading = false;
let bar_visible: boolean;
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
console.log(data);
setInterval(async () => {
if (downloading) {
await invalidate("/api/model/all");
}
return response;
}
}, 2500);
/**
* Truncates a string to the specified length and appends an ellipsis.
* @param str - The string to truncate.
* @param maxLength - The maximum length of the truncated string.
* @returns The truncated string with an ellipsis if needed.
*/
function truncateString(str: string, maxLength: number): string {
return str.length > maxLength
? str.substring(0, maxLength - 1) + "..."
: str;
}
/**
* Handles the action (download/delete) on a model.
* @param model - The model name.
* @param isAvailable - Boolean indicating if the model is available.
*/
async function handleModelAction(
model: string,
isAvailable: boolean,
isDownloading: boolean = false,
) {
if (isDownloading) {
await cancelDownload(model);
async function onClick(model: string) {
if (downloading) {
return;
}
const url = `/api/model/${model}${isAvailable ? "" : "/download"}`;
const method = isAvailable ? "DELETE" : "POST";
console.log("Before fetch invalidate");
fetchWithInvalidate(url, { method }).then((response) => {
console.log(`After fetch for ${url}`);
downloading = true;
const r = await fetch(`/api/model/${model}/download`, {
method: "POST",
});
if (method === "POST") {
// Start tracking download progress for the model
console.log(`Calling startDownload() for ${model}`);
startDownload(model);
if (r.ok) {
await invalidate("/api/model/all");
}
downloading = false;
}
async function deleteModel(model: string) {
const r = await fetch(`/api/model/${model}`, {
method: "DELETE",
});
if (r.ok) {
await invalidate("/api/model/all");
}
}
// Function to periodically check download progress for a model
async function checkDownloadProgress(modelName: string) {
let progress = await fetchDownloadProgress(modelName);
console.log(`Download status for ${modelName} ${progress}/100.0%`);
// Continue checking until progress reaches 100
if (progress < 100) {
setTimeout(() => checkDownloadProgress(modelName), 1500);
} else {
// Stop tracking the model once download is complete
console.log(`Stopping tracker for ${modelName}`);
const currentDownloads = JSON.parse(
localStorage.getItem("downloadingModels") || "[]",
);
const updatedDownloads = currentDownloads.filter(
(model: string) => model !== modelName,
);
localStorage.setItem(
"downloadingModels",
JSON.stringify(updatedDownloads),
);
downloadingModels.delete(modelName);
}
}
/**
* Groups models by their prefix.
* @param models - Array of ModelStatus objects.
* @returns An object grouping models by their prefix.
*/
function groupModelsByPrefix(
models: ModelStatus[],
): Record<string, ModelStatus[]> {
return models.reduce(
(acc, model) => {
const prefix = model.name.split("-")[0];
acc[prefix] = acc[prefix] || [];
acc[prefix].push(model);
return acc;
},
{} as Record<string, ModelStatus[]>,
);
}
/**
* Handles change in variant selection for a model.
* @param modelPrefix - The prefix of the model.
* @param event - The change event.
*/
function handleVariantChange(modelPrefix: string, event: Event) {
const target = event.target as HTMLSelectElement;
selectedVariant[modelPrefix] = target.value;
}
/**
* Retrieves model details based on the selected variant or default.
* @param models - Array of ModelStatus objects.
* @param prefix - The prefix of the model group.
* @returns The selected or default ModelStatus object.
*/
function getModelDetails(models: ModelStatus[], prefix: string): ModelStatus {
return models.find((m) => m.name === selectedVariant[prefix]) || models[0];
}
// Reactive statements to filter and group models based on search query
$: filteredModels = data.models
.filter(
(model) =>
!downloadedOrDownloadingModels.includes(model) &&
model.name.toLowerCase().includes(searchQuery.toLowerCase()),
)
.sort((a, b) => a.name.localeCompare(b.name));
// Reactive statement with models grouped by prefix
$: groupedModels = groupModelsByPrefix(filteredModels);
// Reactive statement to filter models that are downloaded or downloading
$: downloadedOrDownloadingModels = data.models
.filter((model) => model.progress > 0 || model.available)
.sort((a, b) => a.name.localeCompare(b.name));
async function cancelDownload(modelName: string) {
try {
const response = await fetch(`/api/model/${modelName}/download/cancel`, {
method: "POST",
});
if (response.ok) {
console.log(`Download for ${modelName} cancelled successfully.`);
// Update UI based on successful cancellation
const modelIndex = data.models.findIndex((m) => m.name === modelName);
if (modelIndex !== -1) {
data.models[modelIndex].progress = 0;
data.models[modelIndex].available = false;
data.models = [...data.models]; // trigger reactivity
}
// Remove model from tracking and local storage
downloadingModels.delete(modelName);
const currentDownloads = JSON.parse(
localStorage.getItem("downloadingModels") || "[]",
);
const updatedDownloads = currentDownloads.filter(
(model: string) => model !== modelName,
);
localStorage.setItem(
"downloadingModels",
JSON.stringify(updatedDownloads),
);
} else {
console.error(`Failed to cancel download for ${modelName}`);
}
} catch (error) {
console.error(`Error cancelling download for ${modelName}:`, error);
}
function toggleBar() {
bar_visible = !bar_visible;
barVisible.set(bar_visible);
}
onDestroy(unsubscribe);
</script>
<div class="ml-12 pt-1">
<div class="search-row">
<input
type="text"
bind:value={searchQuery}
class="input input-bordered flex-grow"
placeholder="Search models..."
on:input={(e) => {
const target = e.target;
if (target instanceof HTMLInputElement) {
updateSearch(target.value);
}
}}
{#if !bar_visible}
<button
class="absolute p-0 top-1 left-2 md:left-16 h-10 w-10 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
on:click={toggleBar}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
class="w-4 h-4"
>
<path
d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
>
</path>
<path
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
>
</path>
</svg>
</button>
{/if}
<div class="flex flex-row items-center justify-center pt-5">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="24"
height="24"
>
<path
class="fill-warning"
d="M9.504.43a1.516 1.516 0 0 1 2.437 1.713L10.415 5.5h2.123c1.57 0 2.346 1.909 1.22 3.004l-7.34 7.142a1.249 1.249 0 0 1-.871.354h-.302a1.25 1.25 0 0 1-1.157-1.723L5.633 10.5H3.462c-1.57 0-2.346-1.909-1.22-3.004L9.503.429Zm1.047 1.074L3.286 8.571A.25.25 0 0 0 3.462 9H6.75a.75.75 0 0 1 .694 1.034l-1.713 4.188 6.982-6.793A.25.25 0 0 0 12.538 7H9.25a.75.75 0 0 1-.683-1.06l2.008-4.418.003-.006a.036.036 0 0 0-.004-.009l-.006-.006-.008-.001c-.003 0-.006.002-.009.004Z"
/>
</svg>
<h1 class="px-2 text-center text-3xl font-bold">Download a model</h1>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="24"
height="24"
>
<path
class="fill-warning"
d="M9.504.43a1.516 1.516 0 0 1 2.437 1.713L10.415 5.5h2.123c1.57 0 2.346 1.909 1.22 3.004l-7.34 7.142a1.249 1.249 0 0 1-.871.354h-.302a1.25 1.25 0 0 1-1.157-1.723L5.633 10.5H3.462c-1.57 0-2.346-1.909-1.22-3.004L9.503.429Zm1.047 1.074L3.286 8.571A.25.25 0 0 0 3.462 9H6.75a.75.75 0 0 1 .694 1.034l-1.713 4.188 6.982-6.793A.25.25 0 0 0 12.538 7H9.25a.75.75 0 0 1-.683-1.06l2.008-4.418.003-.006a.036.036 0 0 0-.004-.009l-.006-.006-.008-.001c-.003 0-.006.002-.009.004Z"
/>
</svg>
</div>
<h1 class="pb-5 pt-2 text-center text-xl font-light">
Make sure you have enough disk space and available RAM to run them.<br />
7B requires about 4.5GB of free RAM, 13B requires about 12GB free, 30B requires
about 20GB free
</h1>
<div class="mx-auto w-fit">
<RefreshModal />
</div>
<div class="mt-30 mx-auto flex flex-col">
<div class="mx-auto w-full max-w-4xl">
<div class="divider" />
{#each data.models as model}
<div class="my-5 flex flex-col content-around">
<div
class="mx-auto flex flex-row items-center justify-center text-3xl font-semibold"
>
<span class="mr-2">{model.name}</span>
{#if model.available}
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
width="24"
height="24"
>
<path
class="fill-info"
d="m9.585.52.929.68c.153.112.331.186.518.215l1.138.175a2.678 2.678 0 0 1 2.24 2.24l.174 1.139c.029.187.103.365.215.518l.68.928a2.677 2.677 0 0 1 0 3.17l-.68.928a1.174 1.174 0 0 0-.215.518l-.175 1.138a2.678 2.678 0 0 1-2.241 2.241l-1.138.175a1.17 1.17 0 0 0-.518.215l-.928.68a2.677 2.677 0 0 1-3.17 0l-.928-.68a1.174 1.174 0 0 0-.518-.215L3.83 14.41a2.678 2.678 0 0 1-2.24-2.24l-.175-1.138a1.17 1.17 0 0 0-.215-.518l-.68-.928a2.677 2.677 0 0 1 0-3.17l.68-.928c.112-.153.186-.331.215-.518l.175-1.14a2.678 2.678 0 0 1 2.24-2.24l1.139-.175c.187-.029.365-.103.518-.215l.928-.68a2.677 2.677 0 0 1 3.17 0ZM7.303 1.728l-.927.68a2.67 2.67 0 0 1-1.18.489l-1.137.174a1.179 1.179 0 0 0-.987.987l-.174 1.136a2.677 2.677 0 0 1-.489 1.18l-.68.928a1.18 1.18 0 0 0 0 1.394l.68.927c.256.348.424.753.489 1.18l.174 1.137c.078.509.478.909.987.987l1.136.174a2.67 2.67 0 0 1 1.18.489l.928.68c.414.305.979.305 1.394 0l.927-.68a2.67 2.67 0 0 1 1.18-.489l1.137-.174a1.18 1.18 0 0 0 .987-.987l.174-1.136a2.67 2.67 0 0 1 .489-1.18l.68-.928a1.176 1.176 0 0 0 0-1.394l-.68-.927a2.686 2.686 0 0 1-.489-1.18l-.174-1.137a1.179 1.179 0 0 0-.987-.987l-1.136-.174a2.677 2.677 0 0 1-1.18-.489l-.928-.68a1.176 1.176 0 0 0-1.394 0ZM11.28 6.78l-3.75 3.75a.75.75 0 0 1-1.06 0L4.72 8.78a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L7 8.94l3.22-3.22a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
/>
</svg>
{/if}
</div>
<p class="mx-auto pb-2 text-xl font-light">
({model.size / 1e9}GB)
</p>
{#if model.progress}
<div class="mx-auto my-5 w-56 justify-center">
<p class="w-full text-center font-light">{model.progress}%</p>
<progress
class="progress-primary progress mx-auto h-5 w-56"
value={model.progress}
max="100"
/>
</div>
{/if}
{#if model.available}
<button
on:click={() => deleteModel(model.name)}
class="btn-warning btn-outline btn mx-auto">Delete</button
>
{:else}
<button
on:click={() => onClick(model.name)}
class="btn-primary btn mx-auto"
class:model.available={() => "btn-outline"}
disabled={model.available ||
!!(model.progress && model.progress > 0)}
>
Download
</button>
{/if}
</div>
<div class="divider" />
{/each}
</div>
</div>
<div class="models-grid grid">
{#each downloadedOrDownloadingModels as model}
<div class="model card card-bordered">
<div class="card-body">
<h2 class="card-title">{truncateString(model.name, 24)}</h2>
<div class="model-details">
{#if model.progress < 100}
<div class="progress-bar">
<progress value={model.progress} max="100"></progress> / {model.progress}%
</div>
{/if}
{#if model.progress >= 100}
<p>Size: {(model.size / 1e9).toFixed(2)} GB</p>
<button
on:click={() => handleModelAction(model.name, model.available)}
class="btn btn-error mt-2"
>
<Icon icon="mdi:trash" width="32" height="32" />
</button>
{:else}
<button
on:click={() =>
handleModelAction(
model.name,
model.available,
model.progress > 0 && model.progress < 100,
)}
class="btn btn-error mt-2"
>
<Icon icon="mdi:cancel" width="32" height="32" />
</button>
{/if}
</div>
</div>
</div>
{/each}
</div>
<div class="models-grid grid">
{#each Object.entries(groupedModels) as [prefix, models]}
<div class="model-group card card-bordered">
<div class="card-body">
<h2 class="card-title">{truncateString(prefix, 24)}</h2>
<div class="model-details">
{#if models.length > 1}
<select
class="select-bordered select w-full"
bind:value={selectedVariant[prefix]}
on:change={(event) => handleVariantChange(prefix, event)}
>
{#each models as model}
<option value={model.name}
>{truncateString(model.name, 32)}</option
>
{/each}
</select>
{/if}
{#if models.length === 1 || selectedVariant[prefix]}
{@const model = getModelDetails(models, prefix)}
{#if models.length === 1}
<h3>{truncateString(model.name, 24)}</h3>
{/if}
<p>Size: {(model.size / 1e9).toFixed(2)} GB</p>
<button
on:click={() => handleModelAction(model.name, model.available)}
class="btn btn-primary mt-2"
>
<Icon icon="ic:baseline-download" width="32" height="32" />
</button>
{/if}
</div>
</div>
</div>
{/each}
</div>

View File

@ -4,7 +4,7 @@ interface ModelStatus {
name: string;
size: number;
available: boolean;
progress: number;
progress?: number;
}
export const load: PageLoad = async ({ fetch }) => {

View File

@ -1,165 +0,0 @@
<script lang="ts">
import { onMount } from "svelte";
import { goto } from "$app/navigation";
let username = "";
let secret = "";
let full_name = "";
let email = "";
let auth_type = 1;
let error = "";
let success = "";
async function handleSubmit(event: Event) {
event.preventDefault();
error = "";
success = "";
const response = await fetch("/api/user/create", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
username,
secret,
full_name,
email,
auth_type,
}),
});
if (response.ok) {
success = "User created successfully!";
await authAfterCreate(event);
goto("/account");
} else {
const data = await response.json();
error = data.detail || "An error occurred";
}
}
async function authAfterCreate(event: Event) {
event.preventDefault();
try {
const response = await fetch("/api/auth/token", {
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
body: new URLSearchParams({
username: username,
password: secret,
}),
});
if (response.ok) {
goto("/", { invalidateAll: true });
} else {
const errorData = await response.json();
error = errorData.detail || "Login failed";
}
} catch (err) {
error = err instanceof Error ? err.message : "An unknown error occurred";
}
}
</script>
<main>
<div class="card-group">
<div class="card">
<div class="card-title p-3 text-3xl justify-center font-bold">
Register a new user
</div>
<div class="card-body">
<form on:submit={handleSubmit}>
<div class="form-control">
<input
type="text"
placeholder="Username"
bind:value={username}
required
/>
</div>
<div class="form-control">
<input
type="password"
placeholder="Password"
bind:value={secret}
required
/>
</div>
{#if error}
<p class="error-message">{error}</p>
{/if}
{#if success}
<p class="success-message">{success}</p>
{/if}
<button class="btn" type="submit">Submit</button>
</form>
</div>
</div>
<div class="card">
<div class="card-title p-3 text-3xl justify-center font-bold">
Or link an account (comming soon)
</div>
<div class="card-body">
<button name="google-btn" class="btn" disabled={true}>
<svg
xmlns="http://www.w3.org/2000/svg"
width="18"
height="18"
fill="currentColor"
viewBox="0 0 16 16"
>
<path
d="M15.545 6.558a9.4 9.4 0 0 1 .139 1.626c0 2.434-.87 4.492-2.384 5.885h.002C11.978 15.292 10.158 16 8 16A8 8 0 1 1 8 0a7.7 7.7 0 0 1 5.352 2.082l-2.284 2.284A4.35 4.35 0 0 0 8 3.166c-2.087 0-3.86 1.408-4.492 3.304a4.8 4.8 0 0 0 0 3.063h.003c.635 1.893 2.405 3.301 4.492 3.301 1.078 0 2.004-.276 2.722-.764h-.003a3.7 3.7 0 0 0 1.599-2.431H8v-3.08z"
/>
</svg>
<span>Link Google Account</span>
</button>
<button name="reddit-btn" class="btn" disabled={true}>
<svg
xmlns="http://www.w3.org/2000/svg"
width="18"
height="18"
fill="currentColor"
viewBox="0 0 16 16"
>
<path
d="M6.167 8a.83.83 0 0 0-.83.83c0 .459.372.84.83.831a.831.831 0 0 0 0-1.661m1.843 3.647c.315 0 1.403-.038 1.976-.611a.23.23 0 0 0 0-.306.213.213 0 0 0-.306 0c-.353.363-1.126.487-1.67.487-.545 0-1.308-.124-1.671-.487a.213.213 0 0 0-.306 0 .213.213 0 0 0 0 .306c.564.563 1.652.61 1.977.61zm.992-2.807c0 .458.373.83.831.83s.83-.381.83-.83a.831.831 0 0 0-1.66 0z"
/>
<path
d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0m-3.828-1.165c-.315 0-.602.124-.812.325-.801-.573-1.9-.945-3.121-.993l.534-2.501 1.738.372a.83.83 0 1 0 .83-.869.83.83 0 0 0-.744.468l-1.938-.41a.2.2 0 0 0-.153.028.2.2 0 0 0-.086.134l-.592 2.788c-1.24.038-2.358.41-3.17.992-.21-.2-.496-.324-.81-.324a1.163 1.163 0 0 0-.478 2.224q-.03.17-.029.353c0 1.795 2.091 3.256 4.669 3.256s4.668-1.451 4.668-3.256c0-.114-.01-.238-.029-.353.401-.181.688-.592.688-1.069 0-.65-.525-1.165-1.165-1.165"
/>
</svg>
<span>Link Reddit Account</span>
</button>
</div>
</div>
<div class="card">
<div class="card-title pt-3 text-3xl justify-center font-bold">
Already have an account?
</div>
<div class="card-body">
<button name="login-btn" class="btn" on:click={() => goto("/login")}>
<svg
xmlns="http://www.w3.org/2000/svg"
width="18"
height="18"
fill="currentColor"
class="mr-3"
viewBox="0 0 16 16"
>
<path
d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m1.679-4.493-1.335 2.226a.75.75 0 0 1-1.174.144l-.774-.773a.5.5 0 0 1 .708-.708l.547.548 1.17-1.951a.5.5 0 1 1 .858.514M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
/>
<path
d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
/>
</svg>
<span>Login Instead</span>
</button>
</div>
</div>
</div>
</main>

View File

@ -1,5 +1,5 @@
import adapter from "@sveltejs/adapter-static";
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
import { vitePreprocess } from "@sveltejs/kit/vite";
/** @type {import('@sveltejs/kit').Config} */
const config = {