Compare commits
1 Commits
main
...
dynamic-th
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6a69a17f7 |
@ -12,6 +12,7 @@ CONTRIBUTING.md
|
||||
Dockerfile
|
||||
docker-compose.yml
|
||||
docker-compose.dev.yml
|
||||
/vendor
|
||||
.vscode/
|
||||
|
||||
**/node_modules/
|
||||
|
||||
15
.github/ISSUE_TEMPLATE/sweep-template.yml
vendored
15
.github/ISSUE_TEMPLATE/sweep-template.yml
vendored
@ -1,15 +0,0 @@
|
||||
name: Sweep Issue
|
||||
title: 'Sweep: '
|
||||
description: For small bugs, features, refactors, and tests to be handled by Sweep, an AI-powered junior developer.
|
||||
labels: sweep
|
||||
body:
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Details
|
||||
description: Tell Sweep where and what to edit and provide enough context for a new developer to the codebase
|
||||
placeholder: |
|
||||
Unit Tests: Write unit tests for <FILE>. Test each function in the file. Make sure to test edge cases.
|
||||
Bugs: The bug might be in <FILE>. Here are the logs: ...
|
||||
Features: the new endpoint should use the ... class from <FILE> because it contains ... logic.
|
||||
Refactors: We are migrating this function to ... version because ...
|
||||
17
.github/release-drafter.yml
vendored
17
.github/release-drafter.yml
vendored
@ -13,17 +13,12 @@ categories:
|
||||
- title: '📚 Documentation:'
|
||||
labels:
|
||||
- '📒 Documentation'
|
||||
- title: '🧠 Models'
|
||||
labels:
|
||||
- '🧠 Models'
|
||||
- title: '🧹 Updates:'
|
||||
labels:
|
||||
- '🧹 Updates'
|
||||
- title: '🤖 Dependencies:'
|
||||
labels:
|
||||
- '🤖 Dependencies'
|
||||
change-template: '- $TITLE (#$NUMBER)'
|
||||
change-title-escapes: '\<*_&'
|
||||
change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
|
||||
exclude-contributors:
|
||||
- dependabot
|
||||
- dependabot[bot]
|
||||
@ -43,7 +38,6 @@ version-resolver:
|
||||
- '☢️ Bug'
|
||||
- '🤖 Dependencies'
|
||||
- '🧹 Updates'
|
||||
- '🧠 Models'
|
||||
default: patch
|
||||
template: |
|
||||
$CHANGES
|
||||
@ -62,18 +56,15 @@ autolabeler:
|
||||
- '*.md'
|
||||
title:
|
||||
- '/(docs|doc:|\[doc\]|typos|comment|documentation)/i'
|
||||
- label: '🧠 Models'
|
||||
files:
|
||||
- 'api/src/serge/data/*.json'
|
||||
- label: '☢️ Bug'
|
||||
title:
|
||||
- '/(fix|bug|missing|correct)/i'
|
||||
- '/(fix|race|bug|missing|correct)/i'
|
||||
- label: '🧹 Updates'
|
||||
title:
|
||||
- '/(improve|update|migrate|refactor|deprecated|remove|unused|test)/i'
|
||||
- '/(improve|update|update|refactor|deprecated|remove|unused|test)/i'
|
||||
- label: '🤖 Dependencies'
|
||||
title:
|
||||
- '/(bump|dependencies)/i'
|
||||
- label: '✏️ Feature'
|
||||
title:
|
||||
- '/(feature|feat|create|implement)/i'
|
||||
- '/(feature|feat|create|implement|add)/i'
|
||||
|
||||
23
.github/workflows/ci.yml
vendored
23
.github/workflows/ci.yml
vendored
@ -1,4 +1,4 @@
|
||||
name: CI Checks
|
||||
name: CI/CD Process
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -49,10 +49,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- name: Install dependencies with poetry
|
||||
working-directory: ./api
|
||||
run: |
|
||||
@ -61,15 +60,14 @@ jobs:
|
||||
- name: Run unit tests
|
||||
working-directory: ./api
|
||||
run: |
|
||||
poetry run python -m pytest -v --color=yes
|
||||
poetry run python -m pytest
|
||||
check-sh-files:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: luizm/action-sh-checker@v0.8.0
|
||||
- uses: luizm/action-sh-checker@v0.7.0
|
||||
env:
|
||||
SHFMT_OPTS: "-s"
|
||||
SHELLCHECK_OPTS: "-P scripts/ -e SC1091"
|
||||
with:
|
||||
sh_checker_only_diff: false
|
||||
sh_checker_comment: false
|
||||
@ -77,25 +75,24 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- name: Run ruff check
|
||||
uses: chartboost/ruff-action@v1
|
||||
with:
|
||||
src: "./api"
|
||||
args: "check --verbose"
|
||||
- name: Run ruff format check
|
||||
uses: chartboost/ruff-action@v1
|
||||
args: "--verbose"
|
||||
- name: Run black check
|
||||
uses: psf/black@stable
|
||||
with:
|
||||
options: "--check --diff --verbose"
|
||||
src: "./api"
|
||||
args: "format --check --verbose"
|
||||
check-web-code:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
- name: Install Web
|
||||
|
||||
4
.github/workflows/docker.yml
vendored
4
.github/workflows/docker.yml
vendored
@ -1,4 +1,4 @@
|
||||
name: Docker
|
||||
name: CI/CD Docker Build/Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -58,7 +58,7 @@ jobs:
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and Publish Docker Image
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
|
||||
10
.github/workflows/helm-test.yml
vendored
10
.github/workflows/helm-test.yml
vendored
@ -1,4 +1,4 @@
|
||||
name: Helm
|
||||
name: Lint and Test Helm Chart
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -37,17 +37,17 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@v4
|
||||
uses: azure/setup-helm@v3
|
||||
with:
|
||||
version: v3.12.3
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
check-latest: true
|
||||
|
||||
- name: Set up chart-testing
|
||||
uses: helm/chart-testing-action@v2.6.1
|
||||
uses: helm/chart-testing-action@v2.4.0
|
||||
|
||||
- name: Run chart-testing (list-changed)
|
||||
id: list-changed
|
||||
@ -63,7 +63,7 @@ jobs:
|
||||
|
||||
- name: Create kind cluster
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
uses: helm/kind-action@v1.10.0
|
||||
uses: helm/kind-action@v1.8.0
|
||||
|
||||
- name: Run chart-testing (install)
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
|
||||
6
.github/workflows/model-check.yml
vendored
6
.github/workflows/model-check.yml
vendored
@ -1,4 +1,4 @@
|
||||
name: LLM Healthcheck
|
||||
name: LLM Models Healthcheck
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -34,7 +34,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install dependencies with poetry
|
||||
@ -45,4 +45,4 @@ jobs:
|
||||
- name: Run model health check
|
||||
working-directory: ./api
|
||||
run: |
|
||||
poetry run python -m pytest -v --color=yes test/healthcheck_models.py
|
||||
poetry run python -m pytest test/healthcheck_models.py
|
||||
|
||||
2
.github/workflows/release-drafter.yml
vendored
2
.github/workflows/release-drafter.yml
vendored
@ -17,6 +17,6 @@ jobs:
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: release-drafter/release-drafter@v6
|
||||
- uses: release-drafter/release-drafter@v5
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -12,4 +12,3 @@ api/static/*
|
||||
**/node_modules/
|
||||
**/dist
|
||||
**/.mypy_cache/
|
||||
.vscode
|
||||
@ -29,14 +29,11 @@ COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
|
||||
COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
|
||||
COPY ./api /usr/src/app/api
|
||||
COPY scripts/deploy.sh /usr/src/app/deploy.sh
|
||||
COPY scripts/serge.env /usr/src/app/serge.env
|
||||
COPY vendor/requirements.txt /usr/src/app/requirements.txt
|
||||
|
||||
# Install api dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends dumb-init libgomp1 musl-dev \
|
||||
&& apt-get install -y --no-install-recommends cmake build-essential dumb-init curl \
|
||||
&& pip install --no-cache-dir ./api \
|
||||
&& pip install -r /usr/src/app/requirements.txt \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \
|
||||
&& chmod 755 /usr/src/app/deploy.sh \
|
||||
&& chmod 755 /usr/local/bin/redis-server \
|
||||
@ -45,8 +42,7 @@ RUN apt-get update \
|
||||
&& mkdir -p /data/db \
|
||||
&& mkdir -p /usr/src/app/weights \
|
||||
&& echo "appendonly yes" >> /etc/redis/redis.conf \
|
||||
&& echo "dir /data/db/" >> /etc/redis/redis.conf \
|
||||
&& ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
|
||||
&& echo "dir /data/db/" >> /etc/redis/redis.conf
|
||||
|
||||
EXPOSE 8008
|
||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
|
||||
@ -17,15 +17,13 @@ ENV NODE_ENV='development'
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends dumb-init musl-dev
|
||||
&& apt-get install -y --no-install-recommends cmake build-essential dumb-init curl
|
||||
|
||||
# Copy database, source code, and scripts
|
||||
COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
|
||||
COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
|
||||
COPY --from=node_base /usr/local /usr/local
|
||||
COPY scripts/dev.sh /usr/src/app/dev.sh
|
||||
COPY scripts/serge.env /usr/src/app/serge.env
|
||||
COPY vendor/requirements.txt /usr/src/app/requirements.txt
|
||||
COPY ./web/package.json ./web/package-lock.json ./
|
||||
|
||||
RUN npm ci \
|
||||
@ -36,8 +34,7 @@ RUN npm ci \
|
||||
&& mkdir -p /data/db \
|
||||
&& mkdir -p /usr/src/app/weights \
|
||||
&& echo "appendonly yes" >> /etc/redis/redis.conf \
|
||||
&& echo "dir /data/db/" >> /etc/redis/redis.conf \
|
||||
&& ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
|
||||
&& echo "dir /data/db/" >> /etc/redis/redis.conf
|
||||
|
||||
EXPOSE 8008
|
||||
EXPOSE 9124
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023-present Nathan Sarrazin and Contributors
|
||||
Copyright (c) 2023 Nathan Sarrazin and contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
201
LICENSE-APACHE
201
LICENSE-APACHE
@ -1,201 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2023 Nathan Sarrazin and contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
154
README.md
154
README.md
@ -3,7 +3,7 @@
|
||||

|
||||
[](https://discord.gg/62Hc6FEYQH)
|
||||
|
||||
Serge is a chat interface crafted with [llama.cpp](https://github.com/ggerganov/llama.cpp) for running GGUF models. No API keys, entirely self-hosted!
|
||||
Serge is a chat interface crafted with [llama.cpp](https://github.com/ggerganov/llama.cpp) for running Alpaca models. No API keys, entirely self-hosted!
|
||||
|
||||
- 🌐 **SvelteKit** frontend
|
||||
- 💾 **[Redis](https://github.com/redis/redis)** for storing chat history & parameters
|
||||
@ -43,24 +43,13 @@ volumes:
|
||||
datadb:
|
||||
```
|
||||
|
||||
Then, just visit http://localhost:8008, You can find the API documentation at http://localhost:8008/api/docs
|
||||
Then, just visit http://localhost:8008/, You can find the API documentation at http://localhost:8008/api/docs
|
||||
|
||||
### 🌍 Environment Variables
|
||||
## 🖥️ Windows Setup
|
||||
|
||||
The following Environment Variables are available:
|
||||
Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
|
||||
|
||||
| Variable Name | Description | Default Value |
|
||||
|-----------------------|---------------------------------------------------------|--------------------------------------|
|
||||
| `SERGE_DATABASE_URL` | Database connection string | `sqlite:////data/db/sql_app.db` |
|
||||
| `SERGE_JWT_SECRET` | Key for auth token encryption. Use a random string | `uF7FGN5uzfGdFiPzR` |
|
||||
| `SERGE_SESSION_EXPIRY`| Duration in minutes before a user must reauthenticate | `60` |
|
||||
| `NODE_ENV` | Node.js running environment | `production` |
|
||||
|
||||
## 🖥️ Windows
|
||||
|
||||
Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
|
||||
|
||||
## ☁️ Kubernetes
|
||||
## ☁️ Kubernetes & Docker Compose Setup
|
||||
|
||||
Instructions for setting up Serge on Kubernetes can be found in the [wiki](https://github.com/serge-chat/serge/wiki/Integrating-Serge-in-your-orchestration#kubernetes-example).
|
||||
|
||||
@ -68,57 +57,79 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https
|
||||
|
||||
| Category | Models |
|
||||
|:-------------:|:-------|
|
||||
| **Alfred** | 40B-1023 |
|
||||
| **BioMistral** | 7B |
|
||||
| **Code** | 13B, 33B |
|
||||
| **CodeLLaMA** | 7B, 7B-Instruct, 7B-Python, 13B, 13B-Instruct, 13B-Python, 34B, 34B-Instruct, 34B-Python |
|
||||
| **Codestral** | 22B v0.1 |
|
||||
| **Gemma** | 2B, 1.1-2B-Instruct, 7B, 1.1-7B-Instruct |
|
||||
| **Gorilla** | Falcon-7B-HF-v0, 7B-HF-v1, Openfunctions-v1, Openfunctions-v2 |
|
||||
| **Falcon** | 7B, 7B-Instruct, 40B, 40B-Instruct |
|
||||
| **LLaMA 2** | 7B, 7B-Chat, 7B-Coder, 13B, 13B-Chat, 70B, 70B-Chat, 70B-OASST |
|
||||
| **LLaMA 3** | 11B-Instruct, 13B-Instruct, 16B-Instruct |
|
||||
| **LLaMA Pro** | 8B, 8B-Instruct |
|
||||
| **Med42** | 70B |
|
||||
| **Medalpaca** | 13B |
|
||||
| **Medicine** | Chat, LLM |
|
||||
| **Meditron** | 7B, 7B-Chat, 70B |
|
||||
| **Meta-LlaMA-3** | 8B, 8B-Instruct, 70B, 70B-Instruct |
|
||||
| **Mistral** | 7B-V0.1, 7B-Instruct-v0.2, 7B-OpenOrca |
|
||||
| **MistralLite** | 7B |
|
||||
| **Mixtral** | 8x7B-v0.1, 8x7B-Dolphin-2.7, 8x7B-Instruct-v0.1 |
|
||||
| **Neural-Chat** | 7B-v3.3 |
|
||||
| **Notus** | 7B-v1 |
|
||||
| **Notux** | 8x7b-v1 |
|
||||
| **Nous-Hermes 2** | Mistral-7B-DPO, Mixtral-8x7B-DPO, Mistral-8x7B-SFT |
|
||||
| **OpenChat** | 7B-v3.5-1210 |
|
||||
| **OpenCodeInterpreter** | DS-6.7B, DS-33B, CL-7B, CL-13B, CL-70B |
|
||||
| **OpenLLaMA** | 3B-v2, 7B-v2, 13B-v2 |
|
||||
| **Orca 2** | 7B, 13B |
|
||||
| **Phi 2** | 2.7B |
|
||||
| **Phi 3** | mini-4k-instruct, medium-4k-instruct, medium-128k-instruct |
|
||||
| **Python Code** | 13B, 33B |
|
||||
| **PsyMedRP** | 13B-v1, 20B-v1 |
|
||||
| **Starling LM** | 7B-Alpha |
|
||||
| **SOLAR** | 10.7B-v1.0, 10.7B-instruct-v1.0 |
|
||||
| **TinyLlama** | 1.1B |
|
||||
| **Vicuna** | 7B-v1.5, 13B-v1.5, 33B-v1.3, 33B-Coder |
|
||||
| **WizardLM** | 2-7B, 13B-v1.2, 70B-v1.0 |
|
||||
| **Zephyr** | 3B, 7B-Alpha, 7B-Beta |
|
||||
| **Alpaca 🦙** | Alpaca-LoRA-65B, GPT4-Alpaca-LoRA-30B |
|
||||
| **Chronos 🌑**| Chronos-13B, Chronos-33B, Chronos-Hermes-13B |
|
||||
| **GPT4All 🌍**| GPT4All-13B |
|
||||
| **Koala 🐨** | Koala-7B, Koala-13B |
|
||||
| **LLaMA 🦙** | FinLLaMA-33B, LLaMA-Supercot-30B, LLaMA2 7B, LLaMA2 13B, LLaMA2 70B |
|
||||
| **Lazarus 💀**| Lazarus-30B |
|
||||
| **Nous 🧠** | Nous-Hermes-13B |
|
||||
| **OpenAssistant 🎙️** | OpenAssistant-30B |
|
||||
| **Orca 🐬** | Orca-Mini-v2-7B, Orca-Mini-v2-13B, OpenOrca-Preview1-13B |
|
||||
| **Samantha 👩**| Samantha-7B, Samantha-13B, Samantha-33B |
|
||||
| **Vicuna 🦙** | Stable-Vicuna-13B, Vicuna-CoT-7B, Vicuna-CoT-13B, Vicuna-v1.1-7B, Vicuna-v1.1-13B, VicUnlocked-30B, VicUnlocked-65B |
|
||||
| **Wizard 🧙** | Wizard-Mega-13B, WizardLM-Uncensored-7B, WizardLM-Uncensored-13B, WizardLM-Uncensored-30B, WizardCoder-Python-13B-V1.0 |
|
||||
|
||||
Additional models can be requested by opening a GitHub issue. Other models are also available at [Serge Models](https://github.com/Smartappli/serge-models).
|
||||
Additional weights can be added to the `serge_weights` volume using `docker cp`:
|
||||
|
||||
```bash
|
||||
docker cp ./my_weight.bin serge:/usr/src/app/weights/
|
||||
```
|
||||
|
||||
## ⚠️ Memory Usage
|
||||
|
||||
LLaMA will crash if you don't have enough available memory for the model
|
||||
LLaMA will crash if you don't have enough available memory for the model:
|
||||
|
||||
| Model | Max RAM Required |
|
||||
|-------------|------------------|
|
||||
| 7B | 4.5GB |
|
||||
| 7B-q2_K | 5.37GB |
|
||||
| 7B-q3_K_L | 6.10GB |
|
||||
| 7B-q4_1 | 6.71GB |
|
||||
| 7B-q4_K_M | 6.58GB |
|
||||
| 7B-q5_1 | 7.56GB |
|
||||
| 7B-q5_K_M | 7.28GB |
|
||||
| 7B-q6_K | 8.03GB |
|
||||
| 7B-q8_0 | 9.66GB |
|
||||
| 13B | 12GB |
|
||||
| 13B-q2_K | 8.01GB |
|
||||
| 13B-q3_K_L | 9.43GB |
|
||||
| 13B-q4_1 | 10.64GB |
|
||||
| 13B-q4_K_M | 10.37GB |
|
||||
| 13B-q5_1 | 12.26GB |
|
||||
| 13B-q5_K_M | 11.73GB |
|
||||
| 13B-q6_K | 13.18GB |
|
||||
| 13B-q8_0 | 16.33GB |
|
||||
| 33B | 20GB |
|
||||
| 33B-q2_K | 16.21GB |
|
||||
| 33B-q3_K_L | 19.78GB |
|
||||
| 33B-q4_1 | 22.83GB |
|
||||
| 33B-q4_K_M | 22.12GB |
|
||||
| 33B-q5_1 | 26.90GB |
|
||||
| 33B-q5_K_M | 25.55GB |
|
||||
| 33B-q6_K | 29.19GB |
|
||||
| 33B-q8_0 | 37.06GB |
|
||||
| 65B | 50GB |
|
||||
| 65B-q2_K | 29.95GB |
|
||||
| 65B-q3_K_L | 37.15GB |
|
||||
| 65B-q4_1 | 43.31GB |
|
||||
| 65B-q4_K_M | 41.85GB |
|
||||
| 65B-q5_1 | 51.47GB |
|
||||
| 65B-q5_K_M | 48.74GB |
|
||||
| 65B-q6_K | 56.06GB |
|
||||
| 65B-q8_0 | 71.87GB |
|
||||
|
||||
## 💬 Support
|
||||
|
||||
Need help? Join our [Discord](https://discord.gg/62Hc6FEYQH)
|
||||
|
||||
## ⭐️ Stargazers
|
||||
|
||||
<img src="https://starchart.cc/serge-chat/serge.svg" alt="Stargazers over time" style="max-width: 100%">
|
||||
|
||||
## 🧾 License
|
||||
|
||||
[Nathan Sarrazin](https://github.com/nsarrazin) and [Contributors](https://github.com/serge-chat/serge/graphs/contributors). `Serge` is free and open-source software licensed under the [MIT License](https://github.com/serge-chat/serge/blob/main/LICENSE-MIT) and [Apache-2.0](https://github.com/serge-chat/serge/blob/main/LICENSE-APACHE).
|
||||
[Nathan Sarrazin](https://github.com/nsarrazin) and [Contributors](https://github.com/serge-chat/serge/graphs/contributors). `Serge` is free and open-source software licensed under the [MIT License](https://github.com/serge-chat/serge/blob/master/LICENSE).
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
@ -127,32 +138,5 @@ If you discover a bug or have a feature idea, feel free to open an issue or PR.
|
||||
To run Serge in development mode:
|
||||
```bash
|
||||
git clone https://github.com/serge-chat/serge.git
|
||||
cd serge/
|
||||
docker compose -f docker-compose.dev.yml up --build
|
||||
```
|
||||
|
||||
The solution will accept a python debugger session on port 5678. Example launch.json for VSCode:
|
||||
|
||||
```json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Remote Debug",
|
||||
"type": "python",
|
||||
"request": "attach",
|
||||
"connect": {
|
||||
"host": "localhost",
|
||||
"port": 5678
|
||||
},
|
||||
"pathMappings": [
|
||||
{
|
||||
"localRoot": "${workspaceFolder}/api",
|
||||
"remoteRoot": "/usr/src/app/api/"
|
||||
}
|
||||
],
|
||||
"justMyCode": false
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
docker compose -f docker-compose.dev.yml up -d --build
|
||||
```
|
||||
@ -1,2 +1 @@
|
||||
./weights/*.bin**
|
||||
./weights/*.gguf**
|
||||
./weights/*.bin**
|
||||
4
api/.gitignore
vendored
4
api/.gitignore
vendored
@ -157,6 +157,4 @@ cython_debug/
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
*.db
|
||||
#.idea/
|
||||
2615
api/poetry.lock
generated
2615
api/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -21,29 +21,53 @@ requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python=">=3.10,<4.0"
|
||||
python=">=3.9,<4.0"
|
||||
asyncio = "^3.4.3"
|
||||
packaging = "^24.1"
|
||||
pydantic = "^1.10.17"
|
||||
sse-starlette = "^1.8.2"
|
||||
packaging = "^23.1"
|
||||
pydantic = "^1.10.12"
|
||||
python-dotenv = "^1.0.0"
|
||||
python-multipart = "^0.0.6"
|
||||
pyyaml = "^6.0"
|
||||
rfc3986 = "^2.0.0"
|
||||
sentencepiece = "^0.1.99"
|
||||
sniffio = "^1.3.0"
|
||||
sse-starlette = "^1.6.5"
|
||||
starlette = "^0.26.1"
|
||||
typing-extensions = "^4.12.2"
|
||||
urllib3 = "^2.2.2"
|
||||
toml = "^0.10.2"
|
||||
tqdm = "^4.66.1"
|
||||
typing-extensions = "^4.8.0"
|
||||
ujson = "^5.8.0"
|
||||
urllib3 = "^2.0.4"
|
||||
uvicorn = "^0.23.2"
|
||||
uvloop = "^0.17.0"
|
||||
watchfiles = "^0.20.0"
|
||||
websockets = "^11.0"
|
||||
anyio = "^4.0.0"
|
||||
certifi = "^2023.7.22"
|
||||
charset-normalizer = "^3.2.0"
|
||||
click = "^8.1.7"
|
||||
email-validator = "^2.0.0"
|
||||
fastapi = "^0.95.1"
|
||||
huggingface-hub = "^0.24.5"
|
||||
requests = "^2.32.3"
|
||||
filelock = "^3.12.4"
|
||||
h11 = "^0.14.0"
|
||||
httpcore = "^0.18.0"
|
||||
httptools = "^0.6.0"
|
||||
huggingface-hub = "^0.16.4"
|
||||
idna = "^3.4"
|
||||
itsdangerous = "^2.1.2"
|
||||
jinja2 = "^3.1.2"
|
||||
markupsafe = "^2.1.3"
|
||||
motor = "^3.3.1"
|
||||
orjson = "^3.9.7"
|
||||
dnspython = "^2.4.2"
|
||||
lazy-model = "^0.2.0"
|
||||
requests = "^2.31.0"
|
||||
numpy = "^1.25.2"
|
||||
langchain = "^0.0.180"
|
||||
loguru = "^0.7.2"
|
||||
redis = {extras = ["hiredis"], version = "^5.0.8"}
|
||||
pytest = "^8.3.2"
|
||||
hypercorn = {extras = ["trio"], version = "^0.17.3"}
|
||||
redis = {extras = ["hiredis"], version = "^5.0.0"}
|
||||
pytest = "^7.4.2"
|
||||
|
||||
pyjwt = "^2.9.0"
|
||||
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
|
||||
aiofiles = "^24.1.0"
|
||||
python-multipart = "^0.0.9"
|
||||
debugpy = "^1.8.5"
|
||||
sqlalchemy = "^2.0.32"
|
||||
[tool.ruff]
|
||||
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
|
||||
select = ["E", "F"]
|
||||
@ -94,3 +118,6 @@ target-version = "py311"
|
||||
# Unlike Flake8, default to a complexity level of 10.
|
||||
max-complexity = 10
|
||||
|
||||
[tool.black]
|
||||
line-length = 150
|
||||
target-version = ['py311']
|
||||
|
||||
@ -1,110 +0,0 @@
|
||||
import logging
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
from serge.schema import user as user_schema
|
||||
from serge.utils.security import get_password_hash
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from serge.models import user as user_model
|
||||
|
||||
|
||||
def get_user(db: Session, username: str) -> Optional[user_schema.User]:
|
||||
return Mappers.user_db_to_view(
|
||||
db.query(user_model.User).filter(user_model.User.username == username).first(),
|
||||
include_auth=True,
|
||||
)
|
||||
|
||||
|
||||
def get_user_by_email(db: Session, email: str) -> Optional[user_schema.User]:
|
||||
return Mappers.user_db_to_view(db.query(user_model.User).filter(user_model.User.email == email).first())
|
||||
|
||||
|
||||
def get_users(db: Session, skip: int = 0, limit: int = 100) -> List[user_schema.User]:
|
||||
return [Mappers.user_db_to_view(u) for u in db.query(user_model.User).offset(skip).limit(limit).all()]
|
||||
|
||||
|
||||
def create_user(db: Session, ua: user_schema.UserAuth) -> Optional[user_schema.User]:
|
||||
# Check already exists
|
||||
if get_user(db, ua.username):
|
||||
logging.error(f"Tried to create new user, but already exists: {ua.username}")
|
||||
return None
|
||||
|
||||
match ua.auth_type:
|
||||
case 1:
|
||||
ua.secret = get_password_hash(ua.secret)
|
||||
case _: # Todo: More auth types
|
||||
return None
|
||||
|
||||
db_user, db_user_auth = Mappers.user_view_to_db(None, ua)
|
||||
db.add(db_user_auth)
|
||||
db.add(db_user)
|
||||
db.commit()
|
||||
return Mappers.user_db_to_view(db_user)
|
||||
|
||||
|
||||
def update_user(db: Session, u: user_schema.User) -> Optional[user_schema.User]:
|
||||
user = db.query(user_model.User).filter(user_model.User.username == u.username).first()
|
||||
if not user:
|
||||
return None
|
||||
for k, v in u.dict().items():
|
||||
if k in ["auth", "chats"]:
|
||||
continue
|
||||
setattr(user, k, v)
|
||||
db.commit()
|
||||
return user
|
||||
|
||||
|
||||
def create_chat(db: Session, chat: user_schema.Chat):
|
||||
c = user_model.Chat(owner=chat.owner, chat_id=chat.chat_id)
|
||||
db.add(c)
|
||||
db.commit()
|
||||
|
||||
|
||||
def remove_chat(db: Session, chat: user_schema.Chat):
|
||||
c = db.query(user_model.Chat).filter(user_model.Chat.chat_id == chat.chat_id).one()
|
||||
db.delete(c)
|
||||
db.commit()
|
||||
|
||||
|
||||
class Mappers:
|
||||
@staticmethod
|
||||
def user_db_to_view(u: user_model.User, include_auth=False) -> user_schema.User:
|
||||
if not u:
|
||||
return None
|
||||
auths = chats = []
|
||||
if include_auth:
|
||||
auths = u.auth
|
||||
# u.auth = []
|
||||
chats = u.chats
|
||||
# u.chats = []
|
||||
app_user = user_schema.User(**{k: v for k, v in u.__dict__.items() if not k.startswith("_") and k not in ["chats", "auth"]})
|
||||
|
||||
app_user.auth = [user_schema.UserAuth(username=u.username, secret=x.secret, auth_type=x.auth_type) for x in auths]
|
||||
|
||||
app_user.chats = [user_schema.Chat(chat_id=x.chat_id, owner=x.owner) for x in chats]
|
||||
|
||||
return app_user
|
||||
|
||||
@staticmethod
|
||||
def user_view_to_db(
|
||||
u: Optional[user_schema.User] = None, ua: Optional[user_schema.UserAuth] = None
|
||||
) -> (user_model.User, Optional[user_model.UserAuth]):
|
||||
assert u or ua, "One of User or UserAuth must be passed"
|
||||
if not u: # Creating a new user
|
||||
u = user_schema.User(id=uuid.uuid4(), username=ua.username)
|
||||
auth = []
|
||||
if ua:
|
||||
auth = Mappers.user_auth_view_to_db(ua, u.id)
|
||||
user = user_model.User(**u.dict())
|
||||
if auth:
|
||||
user.auth.append(auth)
|
||||
for chat in u.chats:
|
||||
user.chats.append(user_model.Chat(chat_id=chat.chat_id))
|
||||
return (user, auth)
|
||||
|
||||
@staticmethod
|
||||
def user_auth_view_to_db(ua: user_schema.UserAuth, user_id: uuid.UUID) -> user_model.UserAuth:
|
||||
if not ua:
|
||||
return None
|
||||
return user_model.UserAuth(secret=ua.secret, auth_type=ua.auth_type, user_id=user_id)
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,31 +0,0 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from serge.models.settings import Settings
|
||||
from serge.models.user import User, UserAuth
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
settings = Settings()
|
||||
|
||||
engine = create_engine(settings.SERGE_DATABASE_URL, connect_args={"check_same_thread": False})
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
def seed_db(db: Session):
|
||||
sys_u = db.query(User).filter(User.username == "system").first()
|
||||
if sys_u:
|
||||
return
|
||||
system_user = User(
|
||||
id=uuid.uuid4(),
|
||||
username="system",
|
||||
email="",
|
||||
full_name="Default User",
|
||||
theme_light=False,
|
||||
default_prompt="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
|
||||
is_active=True,
|
||||
auth=[UserAuth(secret="", auth_type=0)],
|
||||
)
|
||||
db.add(system_user)
|
||||
db.commit()
|
||||
logging.info("System user created")
|
||||
@ -5,21 +5,18 @@ from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from loguru import logger
|
||||
from serge.database import SessionLocal, engine, seed_db
|
||||
from starlette.responses import FileResponse
|
||||
|
||||
from serge.models.settings import Settings
|
||||
from serge.routers.auth import auth_router
|
||||
from serge.routers.chat import chat_router
|
||||
from serge.routers.model import model_router
|
||||
from serge.routers.ping import ping_router
|
||||
from serge.routers.user import user_router
|
||||
from starlette.responses import FileResponse
|
||||
|
||||
from serge.models import user as user_models
|
||||
from serge.utils.convert import convert_all
|
||||
|
||||
# Configure logging settings
|
||||
|
||||
# Define a logger for the current mo
|
||||
logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||
logger.add(sys.stderr, format="{time} {level} {message}", level="DEBUG")
|
||||
|
||||
settings = Settings()
|
||||
|
||||
@ -45,17 +42,12 @@ origins = [
|
||||
"http://localhost:9124",
|
||||
]
|
||||
|
||||
# Seed the database
|
||||
user_models.Base.metadata.create_all(bind=engine)
|
||||
|
||||
app = FastAPI(title="Serge", version="0.0.1", description=description, tags_metadata=tags_metadata)
|
||||
|
||||
api_app = FastAPI(title="Serge API")
|
||||
api_app.include_router(chat_router)
|
||||
api_app.include_router(ping_router)
|
||||
api_app.include_router(model_router)
|
||||
api_app.include_router(auth_router)
|
||||
api_app.include_router(user_router)
|
||||
app.mount("/api", api_app)
|
||||
|
||||
# handle serving the frontend as static files in production
|
||||
@ -92,8 +84,8 @@ async def start_database():
|
||||
for file in files:
|
||||
os.remove(WEIGHTS + file)
|
||||
|
||||
db = SessionLocal()
|
||||
seed_db(db)
|
||||
logger.info("initializing models")
|
||||
convert_all("/usr/src/app/weights/", "/usr/src/app/weights/tokenizer.model")
|
||||
|
||||
|
||||
app.add_middleware(
|
||||
|
||||
@ -14,7 +14,6 @@ class ChatParameters(BaseModel):
|
||||
# logits_all: bool
|
||||
# vocab_only: bool
|
||||
# use_mlock: bool
|
||||
n_threads: int
|
||||
# n_batch: int
|
||||
last_n_tokens_size: int
|
||||
max_tokens: int
|
||||
@ -32,5 +31,5 @@ class ChatParameters(BaseModel):
|
||||
class Chat(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
created: datetime = Field(default_factory=datetime.now)
|
||||
owner: str = Field("system")
|
||||
|
||||
params: ChatParameters
|
||||
|
||||
@ -1,13 +1,8 @@
|
||||
from os import getenv
|
||||
|
||||
from pydantic import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
SERGE_DATABASE_URL: str = getenv("SERGE_DATABASE_URL", "sqlite:////data/db/sql_app.db")
|
||||
NODE_ENV: str = "development"
|
||||
SERGE_JWT_SECRET: str = getenv("SERGE_JWT_SECRET", "uF7FGN5uzfGdFiPzR")
|
||||
SERGE_SESSION_EXPIRY: int = getenv("SERGE_SESSION_EXPIRY", 60)
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
@ -1,40 +0,0 @@
|
||||
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, Uuid
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
id = Column(Uuid, primary_key=True)
|
||||
username = Column(String, unique=True, index=True)
|
||||
email = Column(String)
|
||||
full_name = Column(String)
|
||||
theme_light = Column(Boolean)
|
||||
default_prompt = Column(String)
|
||||
is_active = Column(Boolean, default=True)
|
||||
|
||||
auth = relationship("UserAuth", back_populates="user", lazy="joined")
|
||||
chats = relationship("Chat", back_populates="user", lazy="joined")
|
||||
|
||||
|
||||
class Chat(Base):
|
||||
__tablename__ = "chats"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
chat_id = Column(String, index=True)
|
||||
owner = Column(String, ForeignKey("users.username"))
|
||||
user = relationship("User", back_populates="chats")
|
||||
|
||||
|
||||
class UserAuth(Base):
|
||||
__tablename__ = "auth"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
secret = Column(String)
|
||||
auth_type = Column(Integer)
|
||||
user_id = Column(Uuid, ForeignKey("users.id"))
|
||||
|
||||
user = relationship("User", back_populates="auth")
|
||||
@ -1,108 +0,0 @@
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
||||
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
||||
from jose import JWTError
|
||||
from serge.crud import get_user
|
||||
from serge.database import SessionLocal
|
||||
from serge.schema.user import Token, User
|
||||
from serge.models.settings import Settings
|
||||
from serge.utils.security import create_access_token, decode_access_token, verify_password
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
||||
settings = Settings()
|
||||
|
||||
auth_router = APIRouter(
|
||||
prefix="/auth",
|
||||
tags=["auth"],
|
||||
)
|
||||
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def authenticate_user(username: str, password: str, db: Session) -> Optional[User]:
|
||||
user = get_user(db, username)
|
||||
if not user:
|
||||
return None
|
||||
# Users may have multipe ways to authenticate
|
||||
auths = [a.auth_type for a in user.auth]
|
||||
if 0 in auths: # Default user, passwordless
|
||||
return user
|
||||
if 1 in auths: # Password auth
|
||||
secret = [x for x in user.auth if x.auth_type == 1][0].secret
|
||||
if verify_password(password, secret):
|
||||
return user
|
||||
if 2 in auths: # todo future auths
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
@auth_router.post("/token", response_model=Token)
|
||||
async def login_for_access_token(
|
||||
response: Response,
|
||||
form_data: OAuth2PasswordRequestForm = Depends(),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
user = authenticate_user(form_data.username, form_data.password, db)
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Incorrect username or password",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
access_token_expires = timedelta(minutes=settings.SERGE_SESSION_EXPIRY)
|
||||
access_token = create_access_token(data={"sub": user.username}, expires_delta=access_token_expires)
|
||||
response.set_cookie(key="token", value=access_token, httponly=True, secure=True, samesite="strict")
|
||||
return {"access_token": access_token, "token_type": "bearer"}
|
||||
|
||||
|
||||
@auth_router.post("/logout")
|
||||
async def logout(response: Response):
|
||||
# Clear the token cookie by setting it to expire immediately
|
||||
response.delete_cookie(key="token")
|
||||
return {"message": "Logged out successfully"}
|
||||
|
||||
|
||||
async def get_current_user(token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)) -> User:
|
||||
credentials_exception = HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
try:
|
||||
username = decode_access_token(token)
|
||||
if username is None:
|
||||
raise credentials_exception
|
||||
except JWTError as e:
|
||||
logging.exception(e)
|
||||
raise credentials_exception
|
||||
|
||||
user = get_user(db, username)
|
||||
|
||||
if user is None:
|
||||
raise credentials_exception
|
||||
return user
|
||||
|
||||
|
||||
async def get_current_active_user(request: Request, response: Response, db: Session = Depends(get_db)) -> User:
|
||||
token = request.cookies.get("token")
|
||||
|
||||
if not token:
|
||||
return get_user(db, "system")
|
||||
|
||||
u = None
|
||||
try:
|
||||
u = await get_current_user(token, db)
|
||||
except HTTPException:
|
||||
await logout(response)
|
||||
u = get_user(db, "system")
|
||||
return u
|
||||
@ -1,60 +1,25 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter
|
||||
from langchain.memory import RedisChatMessageHistory
|
||||
from langchain.schema import AIMessage, HumanMessage, SystemMessage, messages_to_dict
|
||||
from langchain.schema import SystemMessage, messages_to_dict, AIMessage, HumanMessage
|
||||
from llama_cpp import Llama
|
||||
from loguru import logger
|
||||
from redis import Redis
|
||||
from serge.crud import create_chat, remove_chat, update_user
|
||||
from serge.database import SessionLocal
|
||||
from serge.models.chat import Chat, ChatParameters
|
||||
from serge.routers.auth import get_current_active_user
|
||||
from serge.schema.user import Chat as UserChat
|
||||
from serge.schema.user import User
|
||||
from serge.utils.stream import get_prompt
|
||||
from sqlalchemy.orm import Session
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
|
||||
from serge.models.chat import Chat, ChatParameters
|
||||
from serge.utils.stream import get_prompt
|
||||
|
||||
chat_router = APIRouter(
|
||||
prefix="/chat",
|
||||
tags=["chat"],
|
||||
)
|
||||
|
||||
unauth_error = HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Unauthorized",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _try_get_chat(client, chat_id):
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
chat_raw = client.get(f"chat:{chat_id}")
|
||||
chat = Chat.parse_raw(chat_raw)
|
||||
|
||||
# backwards compat
|
||||
if not hasattr(chat, "owner"):
|
||||
chat.owner = "system"
|
||||
|
||||
return chat
|
||||
|
||||
|
||||
@chat_router.post("/")
|
||||
async def create_new_chat(
|
||||
u: User = Depends(get_current_active_user),
|
||||
db: Session = Depends(get_db),
|
||||
model: str = "7B",
|
||||
temperature: float = 0.1,
|
||||
top_k: int = 50,
|
||||
@ -65,12 +30,17 @@ async def create_new_chat(
|
||||
repeat_last_n: int = 64,
|
||||
repeat_penalty: float = 1.3,
|
||||
init_prompt: str = "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
|
||||
n_threads: int = 4,
|
||||
):
|
||||
if not os.path.exists(f"/usr/src/app/weights/{model}.bin"):
|
||||
raise ValueError(f"Model can't be found: /usr/src/app/weights/{model}.bin")
|
||||
try:
|
||||
client = Llama(
|
||||
model_path="/usr/src/app/weights/" + model + ".bin",
|
||||
)
|
||||
del client
|
||||
except Exception as exc:
|
||||
raise ValueError(f"Model can't be found: {exc}")
|
||||
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
params = ChatParameters(
|
||||
model_path=model,
|
||||
@ -82,20 +52,15 @@ async def create_new_chat(
|
||||
n_gpu_layers=gpu_layers,
|
||||
last_n_tokens_size=repeat_last_n,
|
||||
repeat_penalty=repeat_penalty,
|
||||
n_threads=n_threads,
|
||||
n_threads=len(os.sched_getaffinity(0)),
|
||||
init_prompt=init_prompt,
|
||||
)
|
||||
# create the chat
|
||||
chat = Chat(owner=u.username, params=params)
|
||||
chat = Chat(params=params)
|
||||
|
||||
# store the parameters
|
||||
client.set(f"chat:{chat.id}", chat.json())
|
||||
|
||||
uc = UserChat(chat_id=chat.id, owner=u.username)
|
||||
create_chat(db, uc)
|
||||
u.chats.append(uc)
|
||||
update_user(db, u)
|
||||
|
||||
# create the message history
|
||||
history = RedisChatMessageHistory(chat.id)
|
||||
history.append(SystemMessage(content=init_prompt))
|
||||
@ -107,11 +72,15 @@ async def create_new_chat(
|
||||
|
||||
|
||||
@chat_router.get("/")
|
||||
async def get_all_chats(u: User = Depends(get_current_active_user)):
|
||||
async def get_all_chats():
|
||||
res = []
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
ids = client.smembers("chats")
|
||||
|
||||
chats = sorted(
|
||||
[await get_specific_chat(x.chat_id, u) for x in u.chats],
|
||||
[await get_specific_chat(id.decode()) for id in ids],
|
||||
key=lambda x: x["created"],
|
||||
reverse=True,
|
||||
)
|
||||
@ -134,39 +103,47 @@ async def get_all_chats(u: User = Depends(get_current_active_user)):
|
||||
|
||||
|
||||
@chat_router.get("/{chat_id}")
|
||||
async def get_specific_chat(chat_id: str, u: User = Depends(get_current_active_user)):
|
||||
async def get_specific_chat(chat_id: str):
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
if chat_id not in [x.chat_id for x in u.chats]:
|
||||
raise unauth_error
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
chat = _try_get_chat(client, chat_id)
|
||||
chat_raw = client.get(f"chat:{chat_id}")
|
||||
chat = Chat.parse_raw(chat_raw)
|
||||
|
||||
history = RedisChatMessageHistory(chat.id)
|
||||
|
||||
chat_dict = chat.dict()
|
||||
chat_dict["history"] = messages_to_dict(history.messages)
|
||||
return chat_dict
|
||||
|
||||
|
||||
@chat_router.get("/{chat_id}/history")
|
||||
async def get_chat_history(chat_id: str, u: User = Depends(get_current_active_user)):
|
||||
if chat_id not in [x.chat_id for x in u.chats]:
|
||||
raise unauth_error
|
||||
async def get_chat_history(chat_id: str):
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
history = RedisChatMessageHistory(chat_id)
|
||||
return messages_to_dict(history.messages)
|
||||
|
||||
|
||||
@chat_router.delete("/{chat_id}/prompt")
|
||||
async def delete_prompt(chat_id: str, idx: int, u: User = Depends(get_current_active_user)):
|
||||
if chat_id not in [x.chat_id for x in u.chats]:
|
||||
raise unauth_error
|
||||
async def delete_prompt(chat_id: str, idx: int):
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
history = RedisChatMessageHistory(chat_id)
|
||||
|
||||
if idx >= len(history.messages):
|
||||
logger.error("Unable to delete message, chat in progress")
|
||||
raise HTTPException(status_code=202, detail="Unable to delete message, chat in progress")
|
||||
raise ValueError("Index out of range")
|
||||
|
||||
messages = history.messages.copy()[:idx]
|
||||
history.clear()
|
||||
@ -178,17 +155,13 @@ async def delete_prompt(chat_id: str, idx: int, u: User = Depends(get_current_ac
|
||||
|
||||
|
||||
@chat_router.delete("/{chat_id}")
|
||||
async def delete_chat(chat_id: str, u: User = Depends(get_current_active_user), db: Session = Depends(get_db)):
|
||||
async def delete_chat(chat_id: str):
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
if chat_id not in [x.chat_id for x in u.chats]:
|
||||
raise unauth_error
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
if cid := next((x for x in u.chats if x.chat_id == chat_id), None):
|
||||
remove_chat(db, cid)
|
||||
|
||||
RedisChatMessageHistory(chat_id).clear()
|
||||
|
||||
client.delete(f"chat:{chat_id}")
|
||||
@ -198,25 +171,28 @@ async def delete_chat(chat_id: str, u: User = Depends(get_current_active_user),
|
||||
|
||||
|
||||
@chat_router.delete("/delete/all")
|
||||
async def delete_all_chats(u: User = Depends(get_current_active_user), db: Session = Depends(get_db)):
|
||||
[delete_chat(x.chat_id, u, db) for x in u.chats]
|
||||
async def delete_all_chats():
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
client.flushdb()
|
||||
client.flushall()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@chat_router.get("/{chat_id}/question")
|
||||
async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_current_active_user)):
|
||||
if chat_id not in [x.chat_id for x in u.chats]:
|
||||
raise unauth_error
|
||||
|
||||
def stream_ask_a_question(chat_id: str, prompt: str):
|
||||
logger.info("Starting redis client")
|
||||
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
logger.debug("creating chat")
|
||||
chat = _try_get_chat(client, chat_id)
|
||||
chat_raw = client.get(f"chat:{chat_id}")
|
||||
chat = Chat.parse_raw(chat_raw)
|
||||
|
||||
logger.debug(chat.params)
|
||||
logger.debug("creating history")
|
||||
@ -232,7 +208,7 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
|
||||
logger.debug("creating Llama client")
|
||||
try:
|
||||
client = Llama(
|
||||
model_path=f"/usr/src/app/weights/{chat.params.model_path}.bin",
|
||||
model_path="/usr/src/app/weights/" + chat.params.model_path + ".bin",
|
||||
n_ctx=len(chat.params.init_prompt) + chat.params.n_ctx,
|
||||
n_gpu_layers=chat.params.n_gpu_layers,
|
||||
n_threads=chat.params.n_threads,
|
||||
@ -262,7 +238,7 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
|
||||
yield {"event": "message", "data": txt}
|
||||
|
||||
except Exception as e:
|
||||
if type(e) is UnicodeDecodeError:
|
||||
if type(e) == UnicodeDecodeError:
|
||||
pass
|
||||
else:
|
||||
error = e.__str__()
|
||||
@ -281,16 +257,16 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
|
||||
|
||||
|
||||
@chat_router.post("/{chat_id}/question")
|
||||
async def ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_current_active_user)):
|
||||
if chat_id not in [x.chat_id for x in u.chats]:
|
||||
raise unauth_error
|
||||
|
||||
async def ask_a_question(chat_id: str, prompt: str):
|
||||
client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {client.ping()}")
|
||||
|
||||
if not client.sismember("chats", chat_id):
|
||||
raise ValueError("Chat does not exist")
|
||||
|
||||
chat = _try_get_chat(client, chat_id)
|
||||
chat_raw = client.get(f"chat:{chat_id}")
|
||||
chat = Chat.parse_raw(chat_raw)
|
||||
|
||||
history = RedisChatMessageHistory(chat.id)
|
||||
|
||||
if len(prompt) > 0:
|
||||
@ -302,7 +278,7 @@ async def ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_curren
|
||||
|
||||
try:
|
||||
client = Llama(
|
||||
model_path=f"/usr/src/app/weights/{chat.params.model_path}.bin",
|
||||
model_path="/usr/src/app/weights/" + chat.params.model_path + ".bin",
|
||||
n_ctx=len(chat.params.init_prompt) + chat.params.n_ctx,
|
||||
n_threads=chat.params.n_threads,
|
||||
n_gpu_layers=chat.params.n_gpu_layers,
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import urllib.request
|
||||
import requests
|
||||
import huggingface_hub
|
||||
|
||||
import aiohttp
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from huggingface_hub import hf_hub_url
|
||||
from fastapi import APIRouter, HTTPException, Form
|
||||
from serge.models.models import Families
|
||||
from serge.utils.convert import convert_one_file
|
||||
from serge.utils.migrate import migrate
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
@ -15,8 +17,6 @@ model_router = APIRouter(
|
||||
tags=["model"],
|
||||
)
|
||||
|
||||
active_downloads = {}
|
||||
|
||||
WEIGHTS = "/usr/src/app/weights/"
|
||||
|
||||
models_file_path = Path(__file__).parent.parent / "data" / "models.json"
|
||||
@ -34,212 +34,144 @@ for family in families.__root__:
|
||||
)
|
||||
|
||||
|
||||
# Helper functions
|
||||
async def is_model_installed(model_name: str) -> bool:
|
||||
installed_models = await list_of_installed_models()
|
||||
return any(file_name == f"{model_name}.bin" and not file_name.startswith(".") for file_name in installed_models)
|
||||
@model_router.post("/refresh")
|
||||
async def refresh_models(url: Annotated[str, Form()]):
|
||||
"""
|
||||
Refreshes the list of models available for download.
|
||||
"""
|
||||
global models_info
|
||||
|
||||
r = requests.get(url)
|
||||
|
||||
if not r.ok:
|
||||
raise HTTPException(status_code=500, detail="Could not refresh models using the link provided.")
|
||||
|
||||
families = Families.parse_obj(r.json())
|
||||
|
||||
models_info = {}
|
||||
for family in families.__root__:
|
||||
for model in family.models:
|
||||
for file in model.files:
|
||||
models_info[model.name] = (
|
||||
model.repo,
|
||||
file.filename,
|
||||
file.disk_space,
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
|
||||
async def get_file_size(file_path: str) -> int:
|
||||
return os.stat(file_path).st_size
|
||||
|
||||
|
||||
async def cleanup_model_resources(model_name: str):
|
||||
model_repo, _, _ = models_info.get(model_name, (None, None, None))
|
||||
if not model_repo:
|
||||
print(f"No model repo found for {model_name}, cleanup may be incomplete.")
|
||||
return
|
||||
|
||||
temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
|
||||
lock_dir = os.path.join(WEIGHTS, ".locks", f"models--{model_repo.replace('/', '--')}")
|
||||
cache_dir = os.path.join(WEIGHTS, f"models--{model_repo.replace('/', '--')}")
|
||||
|
||||
# Try to cleanup temporary file if it exists
|
||||
if os.path.exists(temp_model_path):
|
||||
try:
|
||||
os.remove(temp_model_path)
|
||||
except OSError as e:
|
||||
print(f"Error removing temporary file for {model_name}: {e}")
|
||||
|
||||
# Remove lock file if it exists
|
||||
if os.path.exists(lock_dir):
|
||||
try:
|
||||
shutil.rmtree(lock_dir)
|
||||
except OSError as e:
|
||||
print(f"Error removing lock directory for {model_name}: {e}")
|
||||
|
||||
# Remove cache directory if it exists
|
||||
if os.path.exists(cache_dir):
|
||||
try:
|
||||
shutil.rmtree(cache_dir)
|
||||
except OSError as e:
|
||||
print(f"Error removing cache directory for {model_name}: {e}")
|
||||
|
||||
|
||||
async def download_file(session: aiohttp.ClientSession, url: str, path: str) -> None:
|
||||
async with session.get(url) as response:
|
||||
if response.status != 200:
|
||||
raise HTTPException(status_code=500, detail="Error downloading model")
|
||||
|
||||
# Write response content to file asynchronously
|
||||
with open(path, "wb") as f:
|
||||
while True:
|
||||
chunk = await response.content.read(1024)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
# Handlers
|
||||
@model_router.get("/all")
|
||||
async def list_of_all_models():
|
||||
res = []
|
||||
installed_models = await list_of_installed_models()
|
||||
resp = []
|
||||
|
||||
for model in models_info.keys():
|
||||
if await is_model_installed(model):
|
||||
progress = await download_status(model)
|
||||
if f"{model}.bin" in installed_models:
|
||||
available = True
|
||||
# if model exists in WEIGHTS directory remove it from the list
|
||||
installed_models.remove(f"{model}.bin")
|
||||
else:
|
||||
available = False
|
||||
resp.append(
|
||||
res.append(
|
||||
{
|
||||
"name": model,
|
||||
"size": models_info[model][2],
|
||||
"available": available,
|
||||
"progress": await download_status(model),
|
||||
"progress": progress,
|
||||
}
|
||||
)
|
||||
# append the rest of the models
|
||||
for model in installed_models:
|
||||
resp.append(
|
||||
# .bin is removed for compatibility with generate.py
|
||||
res.append(
|
||||
{
|
||||
"name": model.replace(".bin", "").lstrip("/"),
|
||||
"size": await get_file_size(WEIGHTS + model),
|
||||
"size": os.stat(WEIGHTS + model).st_size,
|
||||
"available": True,
|
||||
"progress": 100.0,
|
||||
"progress": None,
|
||||
}
|
||||
)
|
||||
return resp
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@model_router.get("/downloadable")
|
||||
async def list_of_downloadable_models():
|
||||
files = os.listdir(WEIGHTS)
|
||||
files = list(filter(lambda x: x.endswith(".bin"), files))
|
||||
|
||||
installed_models = [i.rstrip(".bin") for i in files]
|
||||
|
||||
return list(filter(lambda x: x not in installed_models, models_info.keys()))
|
||||
|
||||
|
||||
@model_router.get("/installed")
|
||||
async def list_of_installed_models():
|
||||
# Iterate through the WEIGHTS directory and return filenames that end with .bin and do not start with a dot
|
||||
# after iterating through the WEIGHTS directory, return location and filename
|
||||
files = [
|
||||
os.path.join(model_location.replace(WEIGHTS, "").lstrip("/"), bin_file)
|
||||
for model_location, _, filenames in os.walk(WEIGHTS)
|
||||
model_location.replace(WEIGHTS, "") + "/" + bin_file
|
||||
for model_location, directory, filenames in os.walk(WEIGHTS)
|
||||
for bin_file in filenames
|
||||
if bin_file.endswith(".bin") and not bin_file.startswith(".")
|
||||
if os.path.splitext(bin_file)[1] == ".bin"
|
||||
]
|
||||
files = [i.lstrip("/") for i in files]
|
||||
return files
|
||||
|
||||
|
||||
@model_router.post("/{model_name}/download")
|
||||
async def download_model(model_name: str):
|
||||
if model_name not in models_info:
|
||||
def download_model(model_name: str):
|
||||
models = list(models_info.keys())
|
||||
if model_name not in models:
|
||||
raise HTTPException(status_code=404, detail="Model not found")
|
||||
|
||||
try:
|
||||
model_repo, filename, _ = models_info[model_name]
|
||||
model_url = hf_hub_url(repo_id=model_repo, filename=filename)
|
||||
temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
|
||||
model_path = os.path.join(WEIGHTS, f"{model_name}.bin")
|
||||
if not os.path.exists(WEIGHTS + "tokenizer.model"):
|
||||
print("Downloading tokenizer...")
|
||||
url = huggingface_hub.hf_hub_url(
|
||||
"nsarrazin/alpaca",
|
||||
"alpaca-7B-ggml/tokenizer.model",
|
||||
repo_type="model",
|
||||
revision="main",
|
||||
)
|
||||
urllib.request.urlretrieve(url, WEIGHTS + "tokenizer.model")
|
||||
|
||||
# Create an aiohttp session with timeout settings
|
||||
timeout = aiohttp.ClientTimeout(total=None, connect=300, sock_read=300)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
# Start the download and add to active_downloads
|
||||
download_task = asyncio.create_task(download_file(session, model_url, temp_model_path))
|
||||
active_downloads[model_name] = download_task
|
||||
await download_task
|
||||
repo_id, filename, _ = models_info[model_name]
|
||||
|
||||
# Rename the dotfile to its final name
|
||||
os.rename(temp_model_path, model_path)
|
||||
print(f"Downloading {model_name} model from {repo_id}...")
|
||||
url = huggingface_hub.hf_hub_url(repo_id, filename, repo_type="model", revision="main")
|
||||
urllib.request.urlretrieve(url, WEIGHTS + f"{model_name}.bin.tmp")
|
||||
|
||||
# Remove the entry from active_downloads after successful download
|
||||
active_downloads.pop(model_name, None)
|
||||
os.rename(WEIGHTS + f"{model_name}.bin.tmp", WEIGHTS + f"{model_name}.bin")
|
||||
convert_one_file(WEIGHTS + f"{model_name}.bin", WEIGHTS + "tokenizer.model")
|
||||
migrate(WEIGHTS + f"{model_name}.bin")
|
||||
|
||||
return {"message": f"Model {model_name} downloaded"}
|
||||
except asyncio.CancelledError:
|
||||
await cleanup_model_resources(model_name)
|
||||
raise HTTPException(status_code=200, detail="Download cancelled")
|
||||
except Exception as exc:
|
||||
await cleanup_model_resources(model_name)
|
||||
raise HTTPException(status_code=500, detail=f"Error downloading model: {exc}")
|
||||
|
||||
|
||||
@model_router.post("/{model_name}/download/cancel")
|
||||
async def cancel_download(model_name: str):
|
||||
try:
|
||||
task = active_downloads.get(model_name)
|
||||
if not task:
|
||||
raise HTTPException(status_code=404, detail="No active download for this model")
|
||||
|
||||
# Remove the entry from active downloads after cancellation
|
||||
task.cancel()
|
||||
|
||||
# Remove entry from active downloads
|
||||
active_downloads.pop(model_name, None)
|
||||
|
||||
# Wait for the task to be cancelled
|
||||
try:
|
||||
# Wait for the task to respond to cancellation
|
||||
print(f"Waiting for download for {model_name} to be cancelled")
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
# Handle the expected cancellation exception
|
||||
pass
|
||||
|
||||
# Cleanup resources
|
||||
await cleanup_model_resources(model_name)
|
||||
|
||||
print(f"Download for {model_name} cancelled")
|
||||
return {"message": f"Download for {model_name} cancelled"}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error cancelling model download: {str(e)}")
|
||||
return {"message": f"Model {model_name} downloaded"}
|
||||
|
||||
|
||||
@model_router.get("/{model_name}/download/status")
|
||||
async def download_status(model_name: str):
|
||||
if model_name not in models_info:
|
||||
models = list(models_info.keys())
|
||||
|
||||
if model_name not in models:
|
||||
raise HTTPException(status_code=404, detail="Model not found")
|
||||
|
||||
filesize = models_info[model_name][2]
|
||||
model_repo, _, _ = models_info[model_name]
|
||||
|
||||
# Construct the path to the blobs directory
|
||||
temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
|
||||
model_path = os.path.join(WEIGHTS, f"{model_name}.bin")
|
||||
bin_path = WEIGHTS + f"{model_name}.bin.tmp"
|
||||
|
||||
# Check if the model is currently being downloaded
|
||||
task = active_downloads.get(model_name)
|
||||
|
||||
if os.path.exists(model_path):
|
||||
currentsize = os.path.getsize(model_path)
|
||||
progress = min(round(currentsize / filesize * 100, 1), 100)
|
||||
return progress
|
||||
elif task and not task.done():
|
||||
# If the task is still running, check for incomplete files
|
||||
if os.path.exists(temp_model_path):
|
||||
currentsize = os.path.getsize(temp_model_path)
|
||||
return min(round(currentsize / filesize * 100, 1), 100)
|
||||
# If temp_model_path doesn't exist, the download is likely just starting, progress is 0
|
||||
return 0
|
||||
else:
|
||||
# No active download and the file does not exist
|
||||
return None
|
||||
if os.path.exists(bin_path):
|
||||
currentsize = os.path.getsize(bin_path)
|
||||
return min(round(currentsize / filesize * 100, 1), 100)
|
||||
return None
|
||||
|
||||
|
||||
@model_router.delete("/{model_name}")
|
||||
async def delete_model(model_name: str):
|
||||
if f"{model_name}.bin" not in await list_of_installed_models():
|
||||
if model_name + ".bin" not in await list_of_installed_models():
|
||||
raise HTTPException(status_code=404, detail="Model not found")
|
||||
|
||||
try:
|
||||
os.remove(os.path.join(WEIGHTS, f"{model_name}.bin"))
|
||||
except OSError as e:
|
||||
print(f"Error removing model file: {e}")
|
||||
if os.path.exists(WEIGHTS + f"{model_name}.bin"):
|
||||
os.remove(WEIGHTS + f"{model_name}.bin")
|
||||
return {"message": f"Model {model_name} deleted"}
|
||||
|
||||
await cleanup_model_resources(model_name)
|
||||
|
||||
return {"message": f"Model {model_name} deleted"}
|
||||
raise HTTPException(status_code=404, detail="Model file not found")
|
||||
|
||||
@ -1,63 +0,0 @@
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from serge.crud import create_user, update_user
|
||||
from serge.database import SessionLocal
|
||||
from serge.routers.auth import get_current_active_user
|
||||
from serge.schema import user as user_schema
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
user_router = APIRouter(
|
||||
prefix="/user",
|
||||
tags=["user"],
|
||||
)
|
||||
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@user_router.get("/", response_model=user_schema.User)
|
||||
async def get_user(u: user_schema.User = Depends(get_current_active_user)):
|
||||
if not u:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Incorrect username or password",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
return u.to_public_dict()
|
||||
|
||||
|
||||
@user_router.post("/create", response_model=user_schema.User)
|
||||
async def create_user_with_pass(ua: user_schema.UserAuth, db: Session = Depends(get_db)):
|
||||
try:
|
||||
u = create_user(db, ua)
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"Failed to create. {e}",
|
||||
)
|
||||
if not u:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
|
||||
detail="Could not create user",
|
||||
)
|
||||
return u.to_public_dict()
|
||||
|
||||
|
||||
@user_router.put("/", response_model=user_schema.User)
|
||||
async def self_update_user(
|
||||
new_data: user_schema.User,
|
||||
current: user_schema.User = Depends(get_current_active_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
current.email = new_data.email
|
||||
current.full_name = new_data.full_name
|
||||
current.default_prompt = new_data.default_prompt
|
||||
update_user(db, current)
|
||||
return current.to_public_dict()
|
||||
@ -73,35 +73,16 @@
|
||||
"Name": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"fp16",
|
||||
"iq1_M",
|
||||
"iq1_S",
|
||||
"iq2_M",
|
||||
"iq2_S",
|
||||
"iq2_XS",
|
||||
"iq2_XXS",
|
||||
"iq3_M",
|
||||
"iq3_S",
|
||||
"iq3_XS",
|
||||
"iq3_XXS",
|
||||
"iq4_NL",
|
||||
"iq4_XS",
|
||||
"q2_K",
|
||||
"q3_K_L",
|
||||
"q3_K_M",
|
||||
"q3_K_S",
|
||||
"q4_0",
|
||||
"q4_1",
|
||||
"q4_K_M",
|
||||
"q4_K_S",
|
||||
"q5_0",
|
||||
"q5_1",
|
||||
"q5_K_M",
|
||||
"q5_K_S",
|
||||
"q6_K",
|
||||
"q8_0",
|
||||
"q8_1",
|
||||
"q8_K"
|
||||
"q8_0"
|
||||
],
|
||||
"title": "Name"
|
||||
}
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
import uuid
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class UserBase(BaseModel):
|
||||
username: str
|
||||
|
||||
|
||||
class UserAuth(UserBase):
|
||||
secret: str
|
||||
auth_type: int
|
||||
|
||||
|
||||
class Chat(BaseModel):
|
||||
chat_id: str
|
||||
owner: str
|
||||
|
||||
|
||||
class User(UserBase):
|
||||
id: uuid.UUID
|
||||
is_active: bool = True
|
||||
email: str = ""
|
||||
full_name: str = ""
|
||||
theme_light: bool = False
|
||||
default_prompt: str = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
|
||||
auth: list[UserAuth] = []
|
||||
chats: list[Chat] = []
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
def to_public_dict(self):
|
||||
user_dict = self.dict()
|
||||
for auth in user_dict["auth"]:
|
||||
auth["secret"] = "********"
|
||||
return user_dict
|
||||
|
||||
|
||||
class Token(BaseModel):
|
||||
access_token: str
|
||||
token_type: str
|
||||
127
api/src/serge/utils/convert.py
Normal file
127
api/src/serge/utils/convert.py
Normal file
@ -0,0 +1,127 @@
|
||||
# code from @eiz
|
||||
# Thanks! See the relevant comment here:
|
||||
# https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818
|
||||
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
|
||||
from serge.utils.migrate import migrate
|
||||
|
||||
HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"]
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Upgrade old ggml model files to the current format")
|
||||
parser.add_argument("dir_model", help="directory containing ggml .bin files")
|
||||
parser.add_argument("tokenizer_model", help="path to LLaMA tokenizer.model file")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def read_header(f_in):
|
||||
struct_fmt = "i" * (3 + len(HPARAMS))
|
||||
struct_size = struct.calcsize(struct_fmt)
|
||||
buf = f_in.read(struct_size)
|
||||
return struct.unpack(struct_fmt, buf)
|
||||
|
||||
|
||||
def write_header(f_out, header):
|
||||
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header
|
||||
|
||||
if magic != 0x67676D6C:
|
||||
raise Exception("Invalid file magic. Must be an old style ggml file.")
|
||||
|
||||
values = [
|
||||
0x67676D66, # magic: ggml in hex
|
||||
1, # file version
|
||||
vocab_size,
|
||||
dim,
|
||||
multiple_of,
|
||||
n_heads,
|
||||
n_layers,
|
||||
rot,
|
||||
ftype,
|
||||
]
|
||||
f_out.write(struct.pack("i" * len(values), *values))
|
||||
|
||||
|
||||
def write_tokens(fout, tokenizer):
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
if tokenizer.is_unknown(i):
|
||||
text = " \u2047 ".encode()
|
||||
elif tokenizer.is_control(i):
|
||||
text = b""
|
||||
elif tokenizer.is_byte(i):
|
||||
piece = tokenizer.id_to_piece(i)
|
||||
if len(piece) != 6:
|
||||
print(f"Invalid token: {piece}")
|
||||
sys.exit(1)
|
||||
byte_value = int(piece[3:-1], 16)
|
||||
text = struct.pack("B", byte_value)
|
||||
else:
|
||||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
|
||||
fout.write(struct.pack("i", len(text)))
|
||||
fout.write(text)
|
||||
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
||||
|
||||
|
||||
def read_tokens(f_in, tokenizer):
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
len_b = f_in.read(4)
|
||||
(length,) = struct.unpack("i", len_b)
|
||||
f_in.read(length)
|
||||
|
||||
|
||||
def copy_all_data(f_out, f_in):
|
||||
while True:
|
||||
buf = f_in.read(1024 * 1024)
|
||||
if not buf:
|
||||
break
|
||||
f_out.write(buf)
|
||||
|
||||
|
||||
def convert_one_file(path_in, tokenizer):
|
||||
path_tmp = f"{path_in}.tmp"
|
||||
|
||||
print(f"converting {path_in}")
|
||||
try:
|
||||
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out:
|
||||
write_header(f_out, read_header(f_in))
|
||||
read_tokens(f_in, tokenizer)
|
||||
write_tokens(f_out, tokenizer)
|
||||
copy_all_data(f_out, f_in)
|
||||
except Exception:
|
||||
print(f"File {path_in} already converted")
|
||||
else:
|
||||
os.rename(path_in, path_in + ".old")
|
||||
os.rename(path_tmp, path_in)
|
||||
|
||||
try:
|
||||
os.remove(path_tmp)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def convert_all(dir_model: str, tokenizer_model: str):
|
||||
files = []
|
||||
files.extend(glob.glob(f"{dir_model}/*.bin"))
|
||||
files.extend(glob.glob(f"{dir_model}/*.bin"))
|
||||
|
||||
try:
|
||||
tokenizer = SentencePieceProcessor(tokenizer_model)
|
||||
for file in files:
|
||||
print("Converting file: ", file)
|
||||
convert_one_file(file, tokenizer)
|
||||
migrate(file)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
convert_all(args.dir_model, args.tokenizer_model)
|
||||
245
api/src/serge/utils/llm.py
Normal file
245
api/src/serge/utils/llm.py
Normal file
@ -0,0 +1,245 @@
|
||||
"""Wrapper around llama.cpp."""
|
||||
from typing import Any
|
||||
|
||||
from langchain.llms.base import LLM
|
||||
from pydantic import Extra, Field, root_validator
|
||||
|
||||
|
||||
class LlamaCpp(LLM):
|
||||
"""Wrapper around the llama.cpp model.
|
||||
|
||||
To use, you should have the llama-cpp-python library installed, and provide the
|
||||
path to the Llama model as a named parameter to the constructor.
|
||||
Check out: https://github.com/abetlen/llama-cpp-python
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import LlamaCppEmbeddings
|
||||
llm = LlamaCppEmbeddings(model_path="/path/to/llama/model")
|
||||
"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
model_path: str
|
||||
"""The path to the Llama model file."""
|
||||
|
||||
n_ctx: int = Field(2048, alias="n_ctx")
|
||||
"""Token context window."""
|
||||
|
||||
n_gpu_layers: int = Field(0, alias="n_gpu_layers")
|
||||
"""The number of layers to put on the GPU. The rest will be on the CPU."""
|
||||
|
||||
n_parts: int = Field(-1, alias="n_parts")
|
||||
"""Number of parts to split the model into.
|
||||
If -1, the number of parts is automatically determined."""
|
||||
|
||||
seed: int = Field(-1, alias="seed")
|
||||
"""Seed. If -1, a random seed is used."""
|
||||
|
||||
f16_kv: bool = Field(False, alias="f16_kv")
|
||||
"""Use half-precision for key/value cache."""
|
||||
|
||||
logits_all: bool = Field(False, alias="logits_all")
|
||||
"""Return logits for all tokens, not just the last token."""
|
||||
|
||||
vocab_only: bool = Field(False, alias="vocab_only")
|
||||
"""Only load the vocabulary, no weights."""
|
||||
|
||||
use_mlock: bool = Field(False, alias="use_mlock")
|
||||
"""Force system to keep model in RAM."""
|
||||
|
||||
n_threads: int | None = Field(None, alias="n_threads")
|
||||
"""Number of threads to use.
|
||||
If None, the number of threads is automatically determined."""
|
||||
|
||||
n_batch: int | None = Field(8, alias="n_batch")
|
||||
"""Number of tokens to process in parallel.
|
||||
Should be a number between 1 and n_ctx."""
|
||||
|
||||
max_tokens: int | None = 2048
|
||||
"""The maximum number of tokens to generate."""
|
||||
|
||||
temperature: float | None = 0.8
|
||||
"""The temperature to use for sampling."""
|
||||
|
||||
top_p: float | None = 0.95
|
||||
"""The top-p value to use for sampling."""
|
||||
|
||||
logprobs: int | None = Field(None)
|
||||
"""The number of logprobs to return. If None, no logprobs are returned."""
|
||||
|
||||
echo: bool | None = False
|
||||
"""Whether to echo the prompt."""
|
||||
|
||||
stop_sequences: list[str] | None = []
|
||||
"""A list of strings to stop generation when encountered."""
|
||||
|
||||
repeat_penalty: float | None = 1.1
|
||||
"""The penalty to apply to repeated tokens."""
|
||||
|
||||
top_k: int | None = 40
|
||||
"""The top-k value to use for sampling."""
|
||||
|
||||
last_n_tokens_size: int | None = 64
|
||||
"""The number of tokens to look back when applying the repeat_penalty."""
|
||||
|
||||
streaming: bool = False
|
||||
|
||||
class Config:
|
||||
extra = Extra.ignore
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: dict) -> dict:
|
||||
"""Validate that llama-cpp-python library is installed."""
|
||||
model_path = values["model_path"]
|
||||
|
||||
try:
|
||||
pass
|
||||
|
||||
except ImportError:
|
||||
raise ModuleNotFoundError(
|
||||
"Could not import llama-cpp-python library. "
|
||||
"Please install the llama-cpp-python library to "
|
||||
"use this embedding model: pip install llama-cpp-python"
|
||||
)
|
||||
except Exception:
|
||||
raise NameError(f"Could not load Llama model from path: {model_path}")
|
||||
|
||||
return values
|
||||
|
||||
@property
|
||||
def _default_params(self) -> dict[str, Any]:
|
||||
"""Get the default parameters for calling llama_cpp."""
|
||||
return {
|
||||
"max_tokens": self.max_tokens,
|
||||
"temperature": self.temperature,
|
||||
"top_p": self.top_p,
|
||||
"logprobs": self.logprobs,
|
||||
"echo": self.echo,
|
||||
"stop_sequences": self.stop_sequences,
|
||||
"repeat_penalty": self.repeat_penalty,
|
||||
"top_k": self.top_k,
|
||||
"n_ctx": self.n_ctx,
|
||||
"n_gpu_layers": self.n_gpu_layers,
|
||||
"n_parts": self.n_parts,
|
||||
"seed": self.seed,
|
||||
"f16_kv": self.f16_kv,
|
||||
"logits_all": self.logits_all,
|
||||
"vocab_only": self.vocab_only,
|
||||
"use_mlock": self.use_mlock,
|
||||
"n_batch": self.n_batch,
|
||||
"last_n_tokens_size": self.last_n_tokens_size,
|
||||
"streaming": self.streaming,
|
||||
}
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> dict[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
return {**{"model_path": self.model_path}, **self._default_params}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of llm."""
|
||||
return "llama.cpp"
|
||||
|
||||
def _call(self, prompt: str, stop: list[str] | None = None) -> str:
|
||||
"""Call the Llama model and return the output.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to use for generation.
|
||||
stop: A list of strings to stop generation when encountered.
|
||||
|
||||
Returns:
|
||||
The generated text.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import LlamaCppEmbeddings
|
||||
llm = LlamaCppEmbeddings(model_path="/path/to/local/llama/model.bin")
|
||||
llm("This is a prompt.")
|
||||
"""
|
||||
from llama_cpp import Llama
|
||||
|
||||
params = self._identifying_params
|
||||
client = Llama(
|
||||
model_path="/usr/src/app/weights/" + self.model_path + ".bin",
|
||||
n_ctx=self.n_ctx,
|
||||
n_gpu_layers=self.n_gpu_layers,
|
||||
n_parts=self.n_parts,
|
||||
seed=self.seed,
|
||||
f16_kv=self.f16_kv,
|
||||
logits_all=self.logits_all,
|
||||
vocab_only=self.vocab_only,
|
||||
use_mlock=self.use_mlock,
|
||||
n_threads=self.n_threads,
|
||||
n_batch=self.n_batch,
|
||||
last_n_tokens_size=self.last_n_tokens_size,
|
||||
)
|
||||
|
||||
if self.stop_sequences and stop is not None:
|
||||
raise ValueError("`stop_sequences` found in both the input and default params.")
|
||||
elif self.stop_sequences:
|
||||
params["stop_sequences"] = self.stop_sequences
|
||||
else:
|
||||
params["stop_sequences"] = []
|
||||
|
||||
if self.streaming:
|
||||
response = ""
|
||||
stream = client(
|
||||
prompt=prompt,
|
||||
max_tokens=params["max_tokens"],
|
||||
temperature=params["temperature"],
|
||||
top_p=params["top_p"],
|
||||
logprobs=params["logprobs"],
|
||||
echo=params["echo"],
|
||||
stop=params["stop_sequences"],
|
||||
repeat_penalty=params["repeat_penalty"],
|
||||
top_k=params["top_k"],
|
||||
stream=True,
|
||||
)
|
||||
for stream_resp in stream:
|
||||
try:
|
||||
token = stream_resp["choices"][0]["text"]
|
||||
except BaseException:
|
||||
token = ""
|
||||
|
||||
response += token
|
||||
|
||||
self.callback_manager.on_llm_new_token(token, verbose=self.verbose)
|
||||
return response
|
||||
|
||||
else:
|
||||
"""Call the Llama model and return the output."""
|
||||
output = client(
|
||||
prompt=prompt,
|
||||
max_tokens=params["max_tokens"],
|
||||
temperature=params["temperature"],
|
||||
top_p=params["top_p"],
|
||||
logprobs=params["logprobs"],
|
||||
echo=params["echo"],
|
||||
stop=params["stop_sequences"],
|
||||
repeat_penalty=params["repeat_penalty"],
|
||||
top_k=params["top_k"],
|
||||
)
|
||||
text = output["choices"][0]["text"]
|
||||
|
||||
return text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from langchain.callbacks.base import CallbackManager
|
||||
|
||||
from serge.utils.stream import ChainRedisHandler
|
||||
|
||||
llm = LlamaCpp(
|
||||
streaming=True,
|
||||
model_path="gpt4all",
|
||||
callback_manager=CallbackManager([ChainRedisHandler("1")]),
|
||||
verbose=True,
|
||||
temperature=0.1,
|
||||
max_tokens=128,
|
||||
)
|
||||
|
||||
input()
|
||||
resp = llm("Write a paragraph about France please.")
|
||||
308
api/src/serge/utils/migrate.py
Normal file
308
api/src/serge/utils/migrate.py
Normal file
@ -0,0 +1,308 @@
|
||||
# Migrate ggml file(s) with ggmf magic to ggml file with ggjt magic
|
||||
#
|
||||
# We caused a breaking change to the file format on 2023-03-30 in:
|
||||
# https://github.com/ggerganov/llama.cpp/pull/613
|
||||
#
|
||||
# (1) If you still have the Meta LLaMA .pth files, then close this
|
||||
# file now; you can just run `convert-pth-to-ggml.py` again to
|
||||
# migrate to the new format. The tool is easier to use too. It
|
||||
# isn't necessary anymore to manage split output files because
|
||||
# the new format always combines things into a single file.
|
||||
#
|
||||
# (2) If you deleted the Meta LLaMA .pth files due to save on disk
|
||||
# space, then this tool is intended to help you. Please check
|
||||
# out the instructions below.
|
||||
#
|
||||
# USAGE
|
||||
#
|
||||
# python migrate-ggml-2023-03-30-pr613.py INPUT OUTPUT
|
||||
#
|
||||
# PREREQUISITES
|
||||
#
|
||||
# pip install numpy
|
||||
# cd llama.cpp
|
||||
# make -j4
|
||||
#
|
||||
# EXAMPLE (7B MODEL)
|
||||
#
|
||||
# # you can replace all the 'f16' with 'q4_0' if you're using quantized weights
|
||||
# python migrate-ggml-2023-03-30-pr613.py models/7B/ggml-model-f16.bin models/7B/ggml-model-f16-ggjt.bin
|
||||
#
|
||||
# # check that it works
|
||||
# ./main -m models/7B/ggml-model-f16-ggjt.bin -p 'Question: Do you love me?'
|
||||
#
|
||||
# # you can delete the old files
|
||||
# rm -f models/7B/ggml-model-f16.bin
|
||||
# mv models/7B/ggml-model-f16-ggjt.bin models/7B/ggml-model-f16.bin
|
||||
#
|
||||
# EXAMPLE (13B MODEL)
|
||||
#
|
||||
# # you can replace all the 'f16' with 'q4_0' if you're using quantized weights
|
||||
# python migrate-ggml-2023-03-30-pr613.py models/13B/ggml-model-f16.bin models/13B/ggml-model-f16-ggjt.bin
|
||||
#
|
||||
# # check that it works
|
||||
# ./main -m models/13B/ggml-model-f16-ggjt.bin -p 'Question: Do you love me?'
|
||||
#
|
||||
# # you can delete the old files
|
||||
# rm -f models/13B/ggml-model-f16.bin*
|
||||
# mv models/13B/ggml-model-f16-ggjt.bin models/13B/ggml-model-f16.bin
|
||||
#
|
||||
|
||||
import os
|
||||
import struct
|
||||
|
||||
|
||||
QK = 32
|
||||
|
||||
GGML_TYPE_Q4_0 = 0
|
||||
GGML_TYPE_Q4_1 = 1
|
||||
GGML_TYPE_I8 = 2
|
||||
GGML_TYPE_I16 = 3
|
||||
GGML_TYPE_I32 = 4
|
||||
GGML_TYPE_F16 = 5
|
||||
GGML_TYPE_F32 = 6
|
||||
|
||||
WTYPE_NAMES = {
|
||||
0: "F32",
|
||||
1: "F16",
|
||||
2: "Q4_0",
|
||||
3: "Q4_1",
|
||||
}
|
||||
|
||||
WTYPES = {
|
||||
0: GGML_TYPE_F32,
|
||||
1: GGML_TYPE_F16,
|
||||
2: GGML_TYPE_Q4_0,
|
||||
3: GGML_TYPE_Q4_1,
|
||||
}
|
||||
|
||||
GGML_BLCK_SIZE = {
|
||||
GGML_TYPE_Q4_0: QK,
|
||||
GGML_TYPE_Q4_1: QK,
|
||||
GGML_TYPE_I8: 1,
|
||||
GGML_TYPE_I16: 1,
|
||||
GGML_TYPE_I32: 1,
|
||||
GGML_TYPE_F16: 1,
|
||||
GGML_TYPE_F32: 1,
|
||||
}
|
||||
|
||||
GGML_TYPE_SIZE = {
|
||||
GGML_TYPE_Q4_0: 4 + QK // 2,
|
||||
GGML_TYPE_Q4_1: 4 * 2 + QK // 2,
|
||||
GGML_TYPE_I8: 1,
|
||||
GGML_TYPE_I16: 2,
|
||||
GGML_TYPE_I32: 4,
|
||||
GGML_TYPE_F16: 2,
|
||||
GGML_TYPE_F32: 4,
|
||||
}
|
||||
|
||||
HPARAMS = [
|
||||
"magic", # int32
|
||||
"version", # int32
|
||||
"n_vocab", # int32
|
||||
"n_embd", # int32
|
||||
"n_mult", # int32
|
||||
"n_head", # int32
|
||||
"n_layer", # int32
|
||||
"n_rot", # int32
|
||||
"f16", # int32
|
||||
]
|
||||
|
||||
|
||||
def read_hparams(fin):
|
||||
struct_fmt = "i" * len(HPARAMS)
|
||||
struct_size = struct.calcsize(struct_fmt)
|
||||
buf = fin.read(struct_size)
|
||||
ints = struct.unpack(struct_fmt, buf)
|
||||
hparams = dict(zip(HPARAMS, ints))
|
||||
return hparams
|
||||
|
||||
|
||||
def write_hparams(fout, hparams):
|
||||
struct_fmt = "i" * len(HPARAMS)
|
||||
struct.calcsize(struct_fmt)
|
||||
ints = [hparams[h] for h in HPARAMS]
|
||||
fout.write(struct.pack(struct_fmt, *ints))
|
||||
|
||||
|
||||
def read_tokens(fin, hparams):
|
||||
tokens = []
|
||||
for i in range(hparams["n_vocab"]):
|
||||
len_b = fin.read(4)
|
||||
(length,) = struct.unpack("i", len_b)
|
||||
word = fin.read(length)
|
||||
score_b = fin.read(4)
|
||||
(score,) = struct.unpack("f", score_b)
|
||||
tokens.append((word, score))
|
||||
return tokens
|
||||
|
||||
|
||||
def write_tokens(fout, tokens):
|
||||
for word, score in tokens:
|
||||
fout.write(struct.pack("i", len(word)))
|
||||
fout.write(word)
|
||||
fout.write(struct.pack("f", score))
|
||||
|
||||
|
||||
def ggml_nelements(shape):
|
||||
r = 1
|
||||
for i in shape:
|
||||
r *= i
|
||||
return r
|
||||
|
||||
|
||||
def ggml_nbytes(shape, ftype):
|
||||
x = ggml_nelements(shape)
|
||||
t = WTYPES[ftype]
|
||||
x *= GGML_TYPE_SIZE[t]
|
||||
x //= GGML_BLCK_SIZE[t]
|
||||
return x
|
||||
|
||||
|
||||
def copy_tensors(fin, fout, part_id, n_parts):
|
||||
while True:
|
||||
b = fin.read(4)
|
||||
if not b:
|
||||
break
|
||||
(n_dims,) = struct.unpack("i", b)
|
||||
b = fin.read(4)
|
||||
(length,) = struct.unpack("i", b)
|
||||
b = fin.read(4)
|
||||
(ftype,) = struct.unpack("i", b)
|
||||
|
||||
assert n_dims in (1, 2)
|
||||
|
||||
partshape = list(range(n_dims))
|
||||
for i in range(n_dims):
|
||||
b = fin.read(4)
|
||||
partshape[i] = struct.unpack("i", b)[0]
|
||||
partshape = list(reversed(partshape))
|
||||
|
||||
name = fin.read(length)
|
||||
data = fin.read(ggml_nbytes(partshape, ftype))
|
||||
|
||||
blck_size = GGML_BLCK_SIZE[WTYPES[ftype]]
|
||||
type_size = GGML_TYPE_SIZE[WTYPES[ftype]]
|
||||
|
||||
print(f"Processing tensor {name} with shape: {partshape} and type: {WTYPE_NAMES[ftype]}")
|
||||
|
||||
# determine dimension along which multipart tensor is sharded
|
||||
#
|
||||
# split_dim 0 regex:
|
||||
# - output.*
|
||||
# - layers.*.attention.wq.weight
|
||||
# - layers.*.attention.wk.weight
|
||||
# - layers.*.attention.wv.weight
|
||||
# - layers.*.feed_forward.w1.weight
|
||||
# - layers.*.feed_forward.w3.weight
|
||||
#
|
||||
# split_dim 1 regex:
|
||||
# - tok_embeddings.*
|
||||
# - layers.*.attention.wo.weight
|
||||
# - layers.*.feed_forward.w2.weight
|
||||
#
|
||||
if n_dims > 1:
|
||||
split_dim = 1
|
||||
if b"tok_embeddings" in name:
|
||||
split_dim = 1
|
||||
elif b"layers" in name:
|
||||
if b"attention.wo.weight" in name:
|
||||
split_dim = 1
|
||||
elif b"feed_forward.w2.weight" in name:
|
||||
split_dim = 1
|
||||
else:
|
||||
split_dim = 0
|
||||
elif b"output" in name:
|
||||
split_dim = 0
|
||||
|
||||
# output tensor header
|
||||
fullshape = list(partshape)
|
||||
if n_dims > 1:
|
||||
fullshape[split_dim] *= n_parts
|
||||
fout.write(struct.pack("iii", n_dims, len(name), ftype))
|
||||
for dim in reversed(fullshape):
|
||||
fout.write(struct.pack("i", dim))
|
||||
fout.write(name)
|
||||
|
||||
# ensure tensor data is aligned
|
||||
tensor_data_offset = fout.tell()
|
||||
while tensor_data_offset % QK != 0:
|
||||
fout.write(struct.pack("B", 0))
|
||||
tensor_data_offset += 1
|
||||
|
||||
# output unified mappable tensor data
|
||||
if n_dims == 1 or n_parts == 1:
|
||||
# copy tensor which we thankfully received in one piece
|
||||
if part_id == 0:
|
||||
fout.write(data)
|
||||
elif split_dim == 0:
|
||||
# reassemble multifile tensor containing some of the rows
|
||||
rows_per_chunk = partshape[0]
|
||||
current_row = part_id * rows_per_chunk
|
||||
bytes_per_row = fullshape[1] // blck_size * type_size
|
||||
offset = current_row * bytes_per_row
|
||||
fout.seek(tensor_data_offset + offset)
|
||||
fout.write(data)
|
||||
elif split_dim == 1:
|
||||
# reassemble multifile tensor containing some of the cols
|
||||
cols_per_chunk = partshape[1]
|
||||
current_col = part_id * cols_per_chunk
|
||||
bpr = partshape[1] // blck_size * type_size
|
||||
bytes_per_row = fullshape[1] // blck_size * type_size
|
||||
offset_current_col = current_col // blck_size * type_size
|
||||
for row in range(partshape[0]):
|
||||
offset_row = row * bytes_per_row
|
||||
offset = offset_row + offset_current_col
|
||||
fout.seek(tensor_data_offset + offset)
|
||||
fout.write(data[row * bpr : row * bpr + bpr])
|
||||
|
||||
# advance file position to next tensor
|
||||
fout.seek(tensor_data_offset + ggml_nbytes(fullshape, ftype))
|
||||
|
||||
|
||||
def migrate(fin_path):
|
||||
assert fin_path
|
||||
assert os.path.exists(fin_path)
|
||||
|
||||
with open(fin_path, "rb") as fin:
|
||||
hparams = read_hparams(fin)
|
||||
tokens = read_tokens(fin, hparams)
|
||||
|
||||
if hparams["magic"] == 0x67676A74: # ggjt
|
||||
print(f"{fin_path}: input ggml has already been converted to 'ggjt' magic\n")
|
||||
return
|
||||
|
||||
if hparams["magic"] != 0x67676D66: # ggmf
|
||||
print(f"{fin_path}: input ggml file doesn't have expected 'ggmf' magic: {hparams['magic']:#x}\n")
|
||||
return
|
||||
|
||||
hparams["magic"] = 0x67676A74 # ggjt
|
||||
|
||||
# count number of multipart files by convention
|
||||
n_parts = 1
|
||||
while True:
|
||||
if os.path.exists("%s.%d" % (fin_path, n_parts)):
|
||||
n_parts += 1
|
||||
else:
|
||||
break
|
||||
|
||||
# we output a single file for ggml
|
||||
with open(fin_path + ".migrated", "wb") as fout:
|
||||
write_hparams(fout, hparams)
|
||||
write_tokens(fout, tokens)
|
||||
offset_of_tensors = fout.tell()
|
||||
# the tensors we load could be split across multiple files
|
||||
for part_id in range(n_parts):
|
||||
fout.seek(offset_of_tensors)
|
||||
print(f"Processing part {part_id+1} of {n_parts}\n")
|
||||
fin_path = fin_path
|
||||
if part_id > 0:
|
||||
fin_path += ".%d" % (part_id)
|
||||
with open(fin_path, "rb") as fin:
|
||||
read_tokens(fin, read_hparams(fin))
|
||||
copy_tensors(fin, fout, part_id, n_parts)
|
||||
|
||||
os.remove(fin_path)
|
||||
os.rename(fin_path + ".migrated", fin_path)
|
||||
|
||||
print(f"Done. Output file: {fin_path+'.migrated'}\n")
|
||||
@ -1,56 +0,0 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import HTTPException, status
|
||||
from jose import JWTError, jwt
|
||||
from serge.models.settings import Settings
|
||||
|
||||
ALGORITHM = "HS256"
|
||||
settings = Settings()
|
||||
|
||||
credentials_exception = HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
|
||||
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
||||
salt_and_hash = base64.b64decode(hashed_password.encode("utf-8"))
|
||||
salt = salt_and_hash[:16]
|
||||
stored_password = salt_and_hash[16:]
|
||||
new_hashed_password = hashlib.scrypt(plain_password.encode("utf-8"), salt=salt, n=8192, r=8, p=1, dklen=64)
|
||||
return new_hashed_password == stored_password
|
||||
|
||||
|
||||
def get_password_hash(password: str) -> str:
|
||||
salt = os.urandom(16)
|
||||
hashed_password = hashlib.scrypt(password.encode("utf-8"), salt=salt, n=8192, r=8, p=1, dklen=64)
|
||||
salt_and_hash = salt + hashed_password
|
||||
return base64.b64encode(salt_and_hash).decode("utf-8")
|
||||
|
||||
|
||||
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
|
||||
to_encode = data.copy()
|
||||
if expires_delta:
|
||||
expire = datetime.utcnow() + expires_delta
|
||||
else:
|
||||
expire = datetime.utcnow() + timedelta(minutes=settings.SERGE_SESSION_EXPIRY)
|
||||
to_encode.update({"exp": expire})
|
||||
encoded_jwt = jwt.encode(to_encode, settings.SERGE_JWT_SECRET, algorithm=ALGORITHM)
|
||||
return encoded_jwt
|
||||
|
||||
|
||||
def decode_access_token(token: str):
|
||||
try:
|
||||
payload = jwt.decode(token, settings.SERGE_JWT_SECRET, algorithms=[ALGORITHM])
|
||||
username: str = payload.get("sub")
|
||||
if username is None:
|
||||
raise credentials_exception
|
||||
return username
|
||||
except JWTError:
|
||||
raise credentials_exception
|
||||
@ -1,8 +1,53 @@
|
||||
import re
|
||||
|
||||
from typing import Any
|
||||
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
from langchain.memory import RedisChatMessageHistory
|
||||
from langchain.schema import LLMResult
|
||||
from loguru import logger
|
||||
from redis import Redis
|
||||
|
||||
|
||||
# Not used yet. WIP
|
||||
class ChainRedisHandler(StreamingStdOutCallbackHandler):
|
||||
"""Callback handler for streaming. Only works with LLMs that support streaming."""
|
||||
|
||||
def __init__(self, id: str):
|
||||
logger.debug(f"Setting up ChainRedisHandler with id {id}")
|
||||
super().__init__()
|
||||
self.id = id
|
||||
self.client = Redis(host="localhost", port=6379, decode_responses=False)
|
||||
logger.info(f"Connected to Redis? {self.client.ping()}")
|
||||
logger.info(f"Stream key : {self.stream_key}")
|
||||
|
||||
@property
|
||||
def stream_key(self):
|
||||
return "stream:" + self.id
|
||||
|
||||
def on_llm_start(self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any) -> None:
|
||||
super().on_llm_start(serialized, prompts, **kwargs)
|
||||
logger.info("starting")
|
||||
self.client.set(self.stream_key, "")
|
||||
"""Run when LLM starts running."""
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
super().on_llm_start(token, **kwargs)
|
||||
logger.info(token)
|
||||
self.client.append(self.stream_key, token)
|
||||
|
||||
"""Run on new LLM token. Only available when streaming is enabled."""
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
super().on_llm_end(response, **kwargs)
|
||||
self.client.set(self.stream_key, "")
|
||||
|
||||
"""Run when LLM ends running."""
|
||||
|
||||
def on_llm_error(self, error: Exception | KeyboardInterrupt, **kwargs: Any) -> None:
|
||||
super().on_llm_error(error, **kwargs)
|
||||
self.client.set(self.stream_key, str(error))
|
||||
"""Run when LLM errors."""
|
||||
|
||||
|
||||
def get_prompt(history: RedisChatMessageHistory, params):
|
||||
@ -51,7 +96,7 @@ def get_prompt(history: RedisChatMessageHistory, params):
|
||||
else:
|
||||
stop = True
|
||||
if len(next_prompt) > 0:
|
||||
prompts.append(f"{instruction + next_prompt}\n")
|
||||
prompts.append(instruction + next_prompt + "\n")
|
||||
if stop:
|
||||
break
|
||||
|
||||
@ -60,6 +105,6 @@ def get_prompt(history: RedisChatMessageHistory, params):
|
||||
for next_prompt in prompts:
|
||||
message_prompt += next_prompt
|
||||
|
||||
final_prompt = f"{params.init_prompt}\n{message_prompt[:params.n_ctx]}"
|
||||
final_prompt = params.init_prompt + "\n" + message_prompt[: params.n_ctx]
|
||||
logger.debug(final_prompt)
|
||||
return final_prompt
|
||||
|
||||
@ -1,36 +1,26 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from huggingface_hub import hf_hub_url
|
||||
|
||||
import huggingface_hub
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
def load_model_data(file_path):
|
||||
with open(file_path, "r") as models_file:
|
||||
return json.load(models_file)
|
||||
|
||||
|
||||
def flatten_model_data(families):
|
||||
for family in families:
|
||||
for model in family["models"]:
|
||||
for file in model["files"]:
|
||||
yield model["repo"], file["filename"]
|
||||
|
||||
|
||||
def check_model_availability(repo, filename):
|
||||
url = hf_hub_url(repo, filename, repo_type="model", revision="main")
|
||||
response = requests.head(url)
|
||||
if response.ok:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# this test file specifically doesn't start with test_* so it's not picked up by pytest
|
||||
|
||||
test_dir = Path(__file__).parent
|
||||
model_data = load_model_data(test_dir.parent / "src/serge/data/models.json")
|
||||
checks = list(flatten_model_data(model_data))
|
||||
with open(test_dir.parent / "src/serge/data/models.json", "r") as models_file:
|
||||
families = json.load(models_file)
|
||||
|
||||
# generate list of checks
|
||||
checks = []
|
||||
for family in families:
|
||||
for model in family["models"]:
|
||||
for file in model["files"]:
|
||||
checks.append((model["repo"], file["filename"]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("repo,filename", checks)
|
||||
def test_model_available(repo, filename):
|
||||
assert check_model_availability(repo, filename), f"Model {repo}/{filename} not available"
|
||||
url = huggingface_hub.hf_hub_url(repo, filename, repo_type="model", revision="main")
|
||||
r = requests.head(url)
|
||||
assert r.ok, f"Model {repo}/{filename} not available"
|
||||
|
||||
@ -9,7 +9,7 @@ image:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
tag: "main"
|
||||
tag: ""
|
||||
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
|
||||
@ -1,19 +1,19 @@
|
||||
services:
|
||||
serge:
|
||||
restart: on-failure
|
||||
build:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.dev
|
||||
volumes:
|
||||
- ./web:/usr/src/app/web/:z
|
||||
- ./api:/usr/src/app/api/:z
|
||||
- ./web:/usr/src/app/web/
|
||||
- ./api:/usr/src/app/api/
|
||||
- datadb:/data/db
|
||||
- weights:/usr/src/app/weights/
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
ports:
|
||||
- 8008:8008
|
||||
- 9124:9124
|
||||
- 5678:5678
|
||||
- "8008:8008"
|
||||
- "9124:9124"
|
||||
|
||||
volumes:
|
||||
datadb:
|
||||
weights:
|
||||
|
||||
42
scripts/deploy.sh
Executable file → Normal file
42
scripts/deploy.sh
Executable file → Normal file
@ -1,36 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
source serge.env
|
||||
|
||||
# Get CPU Architecture
|
||||
cpu_arch=$(uname -m)
|
||||
|
||||
# Function to detect CPU features
|
||||
detect_cpu_features() {
|
||||
cpu_info=$(lscpu)
|
||||
if echo "$cpu_info" | grep -q "avx512"; then
|
||||
echo "AVX512"
|
||||
elif echo "$cpu_info" | grep -q "avx2"; then
|
||||
echo "AVX2"
|
||||
elif echo "$cpu_info" | grep -q "avx"; then
|
||||
echo "AVX"
|
||||
else
|
||||
echo "basic"
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if the CPU architecture is aarch64/arm64
|
||||
if [ "$cpu_arch" = "aarch64" ] || [ "$cpu_arch" = "arm64" ]; then
|
||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
|
||||
else
|
||||
# Use @smartappli provided wheels
|
||||
#cpu_feature=$(detect_cpu_features)
|
||||
#pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu-$cpu_feature/"
|
||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
|
||||
fi
|
||||
|
||||
echo "Recommended install command for llama-cpp-python: $pip_command"
|
||||
|
||||
# Handle termination signals
|
||||
_term() {
|
||||
@ -40,7 +10,7 @@ _term() {
|
||||
}
|
||||
|
||||
# Install python bindings
|
||||
eval "$pip_command" || {
|
||||
pip install llama-cpp-python==0.1.78 || {
|
||||
echo 'Failed to install llama-cpp-python'
|
||||
exit 1
|
||||
}
|
||||
@ -51,18 +21,10 @@ redis_process=$!
|
||||
|
||||
# Start the API
|
||||
cd /usr/src/app/api || exit 1
|
||||
hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008"
|
||||
if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
|
||||
hypercorn_cmd="hypercorn src.serge.main:app --bind [::]:8008"
|
||||
elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
|
||||
hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008 --bind [::]:8008"
|
||||
fi
|
||||
|
||||
$hypercorn_cmd || {
|
||||
uvicorn src.serge.main:app --host 0.0.0.0 --port 8008 || {
|
||||
echo 'Failed to start main app'
|
||||
exit 1
|
||||
} &
|
||||
|
||||
serge_process=$!
|
||||
|
||||
# Set up a signal trap and wait for processes to finish
|
||||
|
||||
49
scripts/dev.sh
Executable file → Normal file
49
scripts/dev.sh
Executable file → Normal file
@ -1,42 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
source serge.env
|
||||
|
||||
# Get CPU Architecture
|
||||
cpu_arch=$(uname -m)
|
||||
|
||||
# Function to detect CPU features
|
||||
detect_cpu_features() {
|
||||
cpu_info=$(lscpu)
|
||||
if echo "$cpu_info" | grep -q "avx512"; then
|
||||
echo "AVX512"
|
||||
elif echo "$cpu_info" | grep -q "avx2"; then
|
||||
echo "AVX2"
|
||||
elif echo "$cpu_info" | grep -q "avx"; then
|
||||
echo "AVX"
|
||||
else
|
||||
echo "basic"
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if the CPU architecture is aarch64/arm64
|
||||
if [ "$cpu_arch" = "aarch64" ] || [ "$cpu_arch" = "arm64" ]; then
|
||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
|
||||
else
|
||||
# Use @smartappli provided wheels
|
||||
#cpu_feature=$(detect_cpu_features)
|
||||
#pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu-$cpu_feature/"
|
||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
|
||||
fi
|
||||
|
||||
echo "Recommended install command for llama-cpp-python: $pip_command"
|
||||
|
||||
# Install python vendor dependencies
|
||||
pip install -r /usr/src/app/requirements.txt || {
|
||||
echo 'Failed to install python dependencies from requirements.txt'
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Install python dependencies
|
||||
pip install -e ./api || {
|
||||
@ -45,7 +9,7 @@ pip install -e ./api || {
|
||||
}
|
||||
|
||||
# Install python bindings
|
||||
eval "$pip_command" || {
|
||||
pip install llama-cpp-python==0.1.78 || {
|
||||
echo 'Failed to install llama-cpp-python'
|
||||
exit 1
|
||||
}
|
||||
@ -57,18 +21,9 @@ redis-server /etc/redis/redis.conf &
|
||||
cd /usr/src/app/web || exit 1
|
||||
npm run dev -- --host 0.0.0.0 --port 8008 &
|
||||
|
||||
python -m pip install debugpy -t /tmp
|
||||
|
||||
# Start the API
|
||||
cd /usr/src/app/api || exit 1
|
||||
hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124"
|
||||
if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
|
||||
hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind [::]:9124"
|
||||
elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
|
||||
hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124 --bind [::]:9124"
|
||||
fi
|
||||
|
||||
$hypercorn_cmd || {
|
||||
uvicorn src.serge.main:api_app --reload --host 0.0.0.0 --port 9124 --root-path /api/ || {
|
||||
echo 'Failed to start main app'
|
||||
exit 1
|
||||
}
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
LLAMA_PYTHON_VERSION=0.2.87
|
||||
SERGE_ENABLE_IPV4=true
|
||||
SERGE_ENABLE_IPV6=false
|
||||
3
vendor/requirements.txt
vendored
3
vendor/requirements.txt
vendored
@ -1,3 +0,0 @@
|
||||
typing-extensions>=4.12.2
|
||||
numpy>=1.26.0,<2.0.0
|
||||
diskcache>=5.6.3
|
||||
2941
web/package-lock.json
generated
2941
web/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -12,41 +12,39 @@
|
||||
"format": "prettier --write ."
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/adapter-auto": "^3.2.2",
|
||||
"@sveltejs/adapter-node": "^5.2.0",
|
||||
"@sveltejs/adapter-static": "^3.0.2",
|
||||
"@sveltejs/kit": "^2.5.20",
|
||||
"@sveltejs/vite-plugin-svelte": "^3.1.1",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@typescript-eslint/eslint-plugin": "^7.17.0",
|
||||
"@typescript-eslint/parser": "^7.18.0",
|
||||
"autoprefixer": "^10.4.20",
|
||||
"eslint": "^8.57.0",
|
||||
"eslint-config-prettier": "^9.1.0",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"eslint-plugin-prettier": "^5.2.1",
|
||||
"eslint-plugin-svelte": "^2.43.0",
|
||||
"eslint-plugin-vue": "^9.27.0",
|
||||
"postcss": "^8.4.40",
|
||||
"prettier": "3.3.3",
|
||||
"prettier-plugin-svelte": "^3.2.6",
|
||||
"svelte": "^4.2.18",
|
||||
"svelte-check": "^3.8.5",
|
||||
"tailwindcss": "^3.4.7",
|
||||
"tslib": "^2.6.3",
|
||||
"typescript": "^5.5.4",
|
||||
"vite": "^5.4.1"
|
||||
"@sveltejs/adapter-auto": "^2.1.0",
|
||||
"@sveltejs/adapter-node": "^1.3.1",
|
||||
"@sveltejs/adapter-static": "^2.0.3",
|
||||
"@sveltejs/kit": "^1.25.0",
|
||||
"@types/markdown-it": "^13.0.1",
|
||||
"@typescript-eslint/eslint-plugin": "^6.7.2",
|
||||
"@typescript-eslint/parser": "^6.7.2",
|
||||
"autoprefixer": "^10.4.15",
|
||||
"eslint": "^8.49.0",
|
||||
"eslint-config-prettier": "^9.0.0",
|
||||
"eslint-plugin-import": "^2.28.1",
|
||||
"eslint-plugin-prettier": "^5.0.0",
|
||||
"eslint-plugin-svelte": "^2.33.1",
|
||||
"eslint-plugin-vue": "^9.17.0",
|
||||
"postcss": "^8.4.30",
|
||||
"prettier": "3.0.3",
|
||||
"svelte": "^4.2.0",
|
||||
"svelte-check": "^3.5.1",
|
||||
"tailwindcss": "^3.3.3",
|
||||
"tslib": "^2.6.2",
|
||||
"typescript": "^5.2.2",
|
||||
"vite": "^4.4.9"
|
||||
},
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"@iconify/svelte": "^4.0.2",
|
||||
"@iconify/svelte": "^3.1.4",
|
||||
"@sveltestack/svelte-query": "^1.6.0",
|
||||
"clipboard": "^2.0.11",
|
||||
"daisyui": "^4.12.10",
|
||||
"highlight.js": "^11.10.0",
|
||||
"ioredis": "^5.4.1",
|
||||
"markdown-it": "^14.1.0",
|
||||
"markdown-it-highlightjs": "^4.1.0",
|
||||
"prettier-plugin-tailwindcss": "^0.6.5"
|
||||
"daisyui": "^3.7.5",
|
||||
"highlight.js": "^11.8.0",
|
||||
"markdown-it": "^13.0.1",
|
||||
"markdown-it-highlightjs": "^4.0.1",
|
||||
"prettier-plugin-svelte": "^3.0.3",
|
||||
"prettier-plugin-tailwindcss": "^0.5.4"
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
width: auto;
|
||||
}
|
||||
|
||||
markdown .hljs {
|
||||
markdown. .hljs {
|
||||
background: hsl(var(--b3)) !important;
|
||||
}
|
||||
|
||||
@ -85,93 +85,3 @@ markdown .hljs {
|
||||
.ie-edge-no-scrollbar {
|
||||
-ms-overflow-style: none;
|
||||
}
|
||||
|
||||
/* Models Grid Layout */
|
||||
.models-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
|
||||
gap: 25px;
|
||||
padding-left: 80px;
|
||||
padding-right: 40px;
|
||||
padding-top: 40px;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
|
||||
/* Model Accordion Styles */
|
||||
.model-accordion {
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 5px 8px rgba(0, 0, 0, 0.1);
|
||||
overflow: hidden;
|
||||
background-color: "bg-base-200";
|
||||
}
|
||||
|
||||
.model-accordion button {
|
||||
width: 100%;
|
||||
padding: 15px;
|
||||
text-align: left;
|
||||
border: none;
|
||||
outline: none;
|
||||
transition: background-color 0.3s ease;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.model-details {
|
||||
padding: 10px;
|
||||
border-top: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.model-details p {
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
.top-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
margin-bottom: 20px;
|
||||
padding-top: 10px;
|
||||
}
|
||||
|
||||
.search-row {
|
||||
position: fixed;
|
||||
top: 5px;
|
||||
left: 0;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
padding-left: 80px;
|
||||
padding-right: 40px;
|
||||
padding-bottom: 0px;
|
||||
}
|
||||
|
||||
main {
|
||||
max-width: 600px;
|
||||
margin: 0 auto;
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
div {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
input {
|
||||
width: 100%;
|
||||
padding: 0.5rem;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
button {
|
||||
padding: 0.5rem 1rem;
|
||||
}
|
||||
|
||||
57
web/src/lib/components/models/RefreshModal.svelte
Normal file
57
web/src/lib/components/models/RefreshModal.svelte
Normal file
@ -0,0 +1,57 @@
|
||||
<script lang="ts">
|
||||
import { invalidate, invalidateAll } from "$app/navigation";
|
||||
|
||||
let dialogTag: HTMLDialogElement;
|
||||
let isLoading = false;
|
||||
|
||||
let link =
|
||||
"https://raw.githubusercontent.com/serge-chat/serge/main/api/src/serge/data/models.json";
|
||||
|
||||
const handleRefresh = async (e: Event) => {
|
||||
isLoading = true;
|
||||
const r = await fetch("/api/model/refresh", {
|
||||
method: "POST",
|
||||
body: new FormData(e.target as HTMLFormElement),
|
||||
});
|
||||
|
||||
if (r.ok) {
|
||||
await invalidate("/api/model/all");
|
||||
dialogTag.close();
|
||||
} else {
|
||||
console.error("Error refreshing models");
|
||||
}
|
||||
isLoading = false;
|
||||
};
|
||||
</script>
|
||||
|
||||
<button class="btn-outline btn" on:click={() => dialogTag.showModal()}
|
||||
>Refresh Models</button
|
||||
>
|
||||
<dialog bind:this={dialogTag} class="modal">
|
||||
<form method="dialog" class="modal-box">
|
||||
<button class="btn-ghost btn-sm btn-circle btn absolute right-2 top-2"
|
||||
>✕</button
|
||||
>
|
||||
<form on:submit|preventDefault={handleRefresh}>
|
||||
<h3 class="text-lg font-bold">Model refresh</h3>
|
||||
<p class="py-4">
|
||||
Enter the URL of the JSON file containing the models below
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
name="url"
|
||||
class="input-bordered input-primary input mb-4 w-full"
|
||||
bind:value={link}
|
||||
/>
|
||||
<div class="modal-action">
|
||||
<!-- if there is a button in form, it will close the modal -->
|
||||
<button type="submit" class="btn" disabled={isLoading}>
|
||||
{#if isLoading}
|
||||
<span class="loading loading-spinner" />
|
||||
{/if}
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</form>
|
||||
</dialog>
|
||||
@ -2,6 +2,8 @@ import { writable, type Writable } from "svelte/store";
|
||||
|
||||
const themeStore = writable("dark");
|
||||
|
||||
const barVisible = writable(true);
|
||||
|
||||
const newChat: Writable<object | null> = writable(null);
|
||||
|
||||
export { newChat, themeStore };
|
||||
export { barVisible, newChat, themeStore };
|
||||
|
||||
@ -1,52 +1,30 @@
|
||||
<script lang="ts">
|
||||
import "../app.css";
|
||||
import type { PageData } from "./$types";
|
||||
import type { LayoutData } from "./$types";
|
||||
import { invalidate, goto } from "$app/navigation";
|
||||
import { onMount, onDestroy } from "svelte";
|
||||
import { page } from "$app/stores";
|
||||
import { newChat, themeStore } from "$lib/stores.js";
|
||||
import { barVisible, newChat, themeStore } from "$lib/stores.js";
|
||||
import { fly } from "svelte/transition";
|
||||
export let data: PageData;
|
||||
export let data: LayoutData;
|
||||
|
||||
export let isSidebarOpen: boolean = true;
|
||||
|
||||
let models;
|
||||
let modelAvailable: boolean;
|
||||
const isLoading = false;
|
||||
|
||||
let deleteConfirm = false;
|
||||
let deleteAllConfirm = false;
|
||||
let theme: string;
|
||||
let bar_visible: boolean;
|
||||
let dataCht: Response | any = null;
|
||||
const unsubscribe = newChat.subscribe((value) => (dataCht = value));
|
||||
|
||||
function toggleSidebar(): void {
|
||||
isSidebarOpen = !isSidebarOpen;
|
||||
}
|
||||
|
||||
function hideSidebar(): void {
|
||||
isSidebarOpen = false;
|
||||
}
|
||||
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
|
||||
const unsubscribe1 = newChat.subscribe((value) => (dataCht = value));
|
||||
|
||||
onMount(() => {
|
||||
bar_visible = window.innerWidth > 768;
|
||||
barVisible.set(bar_visible);
|
||||
theme = localStorage.getItem("data-theme") || "dark";
|
||||
document.documentElement.setAttribute("data-theme", theme);
|
||||
});
|
||||
|
||||
$: if (data && data.models) {
|
||||
models = data.models.filter((el) => el.available);
|
||||
modelAvailable = models.length > 0;
|
||||
} else {
|
||||
models = [];
|
||||
modelAvailable = false;
|
||||
}
|
||||
|
||||
$: id = $page.params.id || "";
|
||||
|
||||
async function goToHome() {
|
||||
await goto("/");
|
||||
}
|
||||
|
||||
async function deleteChat(chatID: string) {
|
||||
const response = await fetch("/api/chat/" + chatID, { method: "DELETE" });
|
||||
if (response.status === 200) {
|
||||
@ -115,147 +93,159 @@
|
||||
localStorage.setItem("data-theme", $themeStore);
|
||||
}
|
||||
|
||||
function toggleBar() {
|
||||
bar_visible = !bar_visible;
|
||||
barVisible.set(bar_visible);
|
||||
}
|
||||
|
||||
async function createSameSession() {
|
||||
if (dataCht) {
|
||||
const newData = await fetch(
|
||||
`/api/chat/?model=${dataCht.params.model_path}&temperature=${dataCht.params.temperature}&top_k=${dataCht.params.top_k}` +
|
||||
`&top_p=${dataCht.params.top_p}&max_length=${dataCht.params.max_tokens}&context_window=${dataCht.params.n_ctx}` +
|
||||
`&repeat_last_n=${dataCht.params.last_n_tokens_size}&repeat_penalty=${dataCht.params.repeat_penalty}` +
|
||||
`&init_prompt=${dataCht.history[0].data.content}` +
|
||||
`&gpu_layers=${dataCht.params.n_gpu_layers}`,
|
||||
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
accept: "application/json",
|
||||
},
|
||||
},
|
||||
).then((response) => response.json());
|
||||
await invalidate("/api/chat/");
|
||||
await goto("/chat/" + newData);
|
||||
}
|
||||
}
|
||||
|
||||
onDestroy(() => {
|
||||
unsubscribe;
|
||||
unsubscribe1;
|
||||
});
|
||||
// onDestroy(unsubscribe1);
|
||||
</script>
|
||||
|
||||
<button
|
||||
on:click={toggleSidebar}
|
||||
class="border-base-content/[.2] btn btn-square z-10 my-1 mx-2 fixed border"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
class="inline-block w-5 h-5 stroke-current"
|
||||
><path
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
stroke-width="2"
|
||||
d="M4 6h16M4 12h16M4 18h16"
|
||||
></path></svg
|
||||
>
|
||||
</button>
|
||||
|
||||
<aside
|
||||
class="border-base-content/[.2] fixed top-0 z-40 min-h-full border-r transition-all overflow-hidden aria-label=Sidebar"
|
||||
class:left-0={isSidebarOpen}
|
||||
class:-left-80={!isSidebarOpen}
|
||||
id="default-sidebar"
|
||||
class={"border-base-content/[.2] fixed left-0 top-0 z-40 h-screen w-80 -translate-x-full border-r transition-transform overflow-hidden" +
|
||||
(bar_visible ? " translate-x-0" : "")}
|
||||
aria-label="Sidebar"
|
||||
>
|
||||
<div
|
||||
class="bg-base-200 relative h-screen py-1 px-2 overflow-hidden flex flex-col items-center justify-between"
|
||||
>
|
||||
<div class="w-full flex items-center pb-1">
|
||||
<div
|
||||
class="w-full flex items-center justify-between border-b border-base-content/[.2] pb-1"
|
||||
>
|
||||
<button
|
||||
on:click={toggleSidebar}
|
||||
class="border-base-content/[.2] btn btn-square border"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
class="inline-block w-5 h-5 stroke-current"
|
||||
><path
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
stroke-width="2"
|
||||
d="M4 6h16M4 12h16M4 18h16"
|
||||
></path></svg
|
||||
>
|
||||
</button>
|
||||
<button
|
||||
disabled={isLoading || !modelAvailable}
|
||||
class="btn btn-ghost flex-grow h-6 font-semibold text-left text-sm capitalize"
|
||||
disabled={isLoading}
|
||||
class="btn btn-ghost h-6 w-4/5 justify-between font-semibold text-left text-sm capitalize"
|
||||
class:loading={isLoading}
|
||||
on:click|preventDefault={() => goto("/")}
|
||||
style="justify-content: flex-start;"
|
||||
on:click|preventDefault={() => createSameSession()}
|
||||
>
|
||||
<span>New chat</span>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
fill="currentColor"
|
||||
class="w-4 h-4 mr-2"
|
||||
class="w-4 h-4"
|
||||
>
|
||||
<path
|
||||
d="M1 2.75C1 1.784 1.784 1 2.75 1h10.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 13.25 12H9.06l-2.573 2.573A1.458 1.458 0 0 1 4 13.543V12H2.75A1.75 1.75 0 0 1 1 10.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h4.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
<span>New Chat</span>
|
||||
</button>
|
||||
|
||||
<button class="btn btn-ghost flex-shrink-0" on:click={goToHome}>
|
||||
<button
|
||||
class="btn btn-ghost flex h-6 w-1/6 items-center justify-center font-semibold z-40"
|
||||
on:click={toggleBar}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="currentColor"
|
||||
class="w-5 h-5"
|
||||
class="w-6 h-6"
|
||||
>
|
||||
<path d="M10 20v-6h4v6h5v-8h3L12 3 2 12h3v8z" />
|
||||
<path
|
||||
d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
|
||||
>
|
||||
</path>
|
||||
<path
|
||||
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
<span class="sr-only">Home</span>
|
||||
</button>
|
||||
</div>
|
||||
<ul
|
||||
class="my-1 w-full flex-grow overflow-y-auto no-scrollbar firefox-no-scrollbar ie-edge-no-scrollbar"
|
||||
class="my-1 w-full h-[85%] overflow-y-auto no-scrollbar firefox-no-scrollbar ie-edge-no-scrollbar"
|
||||
>
|
||||
{#if data && Symbol.iterator in Object(data.chats)}
|
||||
{#each data.chats as chat (chat.id)}
|
||||
<li in:fly={{ x: -100, duration: 900 }}>
|
||||
<a
|
||||
href={"/chat/" + chat.id}
|
||||
class="group hover:from-base-100 hover:text-base-content flex items-center rounded-lg py-2 pl-2 text-base font-normal hover:bg-gradient-to-r hover:to-transparent"
|
||||
class:bg-base-300={id === chat.id}
|
||||
>
|
||||
<div
|
||||
class="flex w-full flex-col space-y-2 p-2 border-b border-gray-200 relative"
|
||||
>
|
||||
{#each data.chats as chat (chat.id)}
|
||||
<li in:fly={{ x: -100, duration: 900 }}>
|
||||
<a
|
||||
href={"/chat/" + chat.id}
|
||||
class="group hover:from-base-100 hover:text-base-content flex items-center rounded-lg py-2 pl-2 text-base font-normal hover:bg-gradient-to-r hover:to-transparent"
|
||||
class:bg-base-300={id === chat.id}
|
||||
>
|
||||
<div class="flex w-full flex-col">
|
||||
<div class="flex w-full flex-col items-start justify-start">
|
||||
<div
|
||||
class="flex w-full flex-col items-start justify-start space-y-1"
|
||||
class="relative flex w-full flex-row items-center justify-between"
|
||||
>
|
||||
<div
|
||||
class="flex w-full flex-row items-center justify-between"
|
||||
>
|
||||
<div class="flex flex-col space-y-1.5">
|
||||
<p class="text-sm font-light max-w-[25ch] break-words">
|
||||
{truncate(chat.subtitle, 100)}
|
||||
</p>
|
||||
<span
|
||||
class="text-xs font-semibold max-w-[25ch] break-words"
|
||||
>{chat.model}</span
|
||||
>
|
||||
<span class="text-xs"
|
||||
>{timeSince(chat.created) + " ago"}</span
|
||||
>
|
||||
</div>
|
||||
<div class="flex flex-col">
|
||||
<p class="text-sm font-light">
|
||||
{truncate(chat.subtitle, 42)}
|
||||
</p>
|
||||
<span class="text-xs font-semibold">{chat.model}</span>
|
||||
<span class="text-xs"
|
||||
>{timeSince(chat.created) + " ago"}</span
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
class="absolute bottom-1.5 right-2 opacity-0 group-hover:opacity-100 transition-opacity duration-300"
|
||||
>
|
||||
{#if deleteConfirm}
|
||||
<div class="flex flex-row items-center space-x-2">
|
||||
<button
|
||||
name="confirm-delete"
|
||||
class="btn btn-sm btn"
|
||||
on:click|preventDefault={() => deleteChat(chat.id)}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
<div
|
||||
class="absolute right-0 opacity-0 group-hover:opacity-100 transition"
|
||||
>
|
||||
<!-- {#if $page.params.id === chat.id} -->
|
||||
{#if deleteConfirm}
|
||||
<div class="flex flex-row items-center">
|
||||
<button
|
||||
name="confirm-delete"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={() => deleteChat(chat.id)}
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
name="cancel-delete"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={toggleDeleteConfirm}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
{:else}
|
||||
<button
|
||||
name="cancel-delete"
|
||||
class="btn btn-sm btn"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={toggleDeleteConfirm}
|
||||
>
|
||||
<svg
|
||||
@ -266,189 +256,26 @@
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
{:else}
|
||||
<button
|
||||
class="btn btn-sm btn"
|
||||
on:click|preventDefault={toggleDeleteConfirm}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
{/if}
|
||||
{/if}
|
||||
<!-- {/if} -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</li>
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
</a>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
<div class="w-full border-t border-base-content/[.2] pt-1">
|
||||
{#if data.userData?.username === "system"}
|
||||
{#if deleteAllConfirm}
|
||||
<button
|
||||
name="login-btn"
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
on:click={() => goto("/login")}
|
||||
class="btn btn-ghost w-full flex flex-row justify-between items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path
|
||||
d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m1.679-4.493-1.335 2.226a.75.75 0 0 1-1.174.144l-.774-.773a.5.5 0 0 1 .708-.708l.547.548 1.17-1.951a.5.5 0 1 1 .858.514M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
|
||||
/>
|
||||
<path
|
||||
d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
|
||||
/>
|
||||
</svg>
|
||||
<span>Login</span>
|
||||
</button>
|
||||
<button
|
||||
name="create-btn"
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
on:click={() => goto("/signup")}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path
|
||||
d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m.5-5v1h1a.5.5 0 0 1 0 1h-1v1a.5.5 0 0 1-1 0v-1h-1a.5.5 0 0 1 0-1h1v-1a.5.5 0 0 1 1 0m-2-6a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
|
||||
/>
|
||||
<path
|
||||
d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
|
||||
/>
|
||||
</svg>
|
||||
<span>Create Account</span>
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
name="logout-btn"
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
on:click={async () => {
|
||||
const response = await fetch("/api/auth/logout", {
|
||||
method: "POST",
|
||||
});
|
||||
data.userData = null;
|
||||
window.location.href = "/";
|
||||
}}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path
|
||||
d="M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4m0 5.996V14H3s-1 0-1-1 1-4 6-4q.845.002 1.544.107a4.5 4.5 0 0 0-.803.918A11 11 0 0 0 8 10c-2.29 0-3.516.68-4.168 1.332-.678.678-.83 1.418-.832 1.664zM9 13a1 1 0 0 1 1-1v-1a2 2 0 1 1 4 0v1a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1h-4a1 1 0 0 1-1-1zm3-3a1 1 0 0 0-1 1v1h2v-1a1 1 0 0 0-1-1"
|
||||
/>
|
||||
</svg>
|
||||
<span>Log Out</span>
|
||||
</button>
|
||||
<a
|
||||
href="/account"
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
>
|
||||
<path
|
||||
d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.029.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
<span>Settings</span>
|
||||
</a>
|
||||
{#if deleteAllConfirm}
|
||||
<button
|
||||
class="btn btn-ghost w-full flex flex-row justify-between items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<div class="h-6 flex flex-row items-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
>
|
||||
<path
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
<span>Clear Chats</span>
|
||||
</div>
|
||||
<div class="h-6 flex flex-row items-center">
|
||||
<button
|
||||
name="confirm-delete"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={() => deleteAllChat()}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
name="cancel-delete"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={toggleDeleteAllConfirm}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
on:click|preventDefault={toggleDeleteAllConfirm}
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<div class="h-6 flex flex-row items-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -463,14 +290,71 @@
|
||||
</path>
|
||||
</svg>
|
||||
<span>Clear Chats</span>
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="h-6 flex flex-row items-center">
|
||||
<button
|
||||
name="confirm-delete"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={() => deleteAllChat()}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
name="cancel-delete"
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={toggleDeleteAllConfirm}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
on:click|preventDefault={toggleDeleteAllConfirm}
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
>
|
||||
<path
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
<span>Clear Chats</span>
|
||||
</button>
|
||||
{/if}
|
||||
<button
|
||||
on:click={toggleTheme}
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<label class="swap swap-rotate" for="theme-toggle">
|
||||
<label class="swap swap-rotate">
|
||||
<input type="checkbox" />
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -500,12 +384,31 @@
|
||||
</label>
|
||||
<span>{theme == "dark" ? "Light" : "Dark"} theme</span>
|
||||
</button>
|
||||
<a
|
||||
href="/"
|
||||
class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
>
|
||||
<path
|
||||
d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.029.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
<span>Settings</span>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<button on:click={hideSidebar} type="button"></button>
|
||||
|
||||
<div id="main_content" class="h-full w-full">
|
||||
<div
|
||||
class={"relative h-full transition-all" + (bar_visible ? " md:ml-80" : "")}
|
||||
>
|
||||
<slot />
|
||||
</div>
|
||||
|
||||
@ -1,47 +1,16 @@
|
||||
import type { LayoutLoad } from "./$types";
|
||||
|
||||
interface ChatMetadata {
|
||||
interface t {
|
||||
id: string;
|
||||
created: string;
|
||||
model: string;
|
||||
subtitle: string;
|
||||
}
|
||||
|
||||
export const ssr = false; // off for now because ssr with auth is broken
|
||||
|
||||
export interface ModelStatus {
|
||||
name: string;
|
||||
size: number;
|
||||
available: boolean;
|
||||
progress?: number;
|
||||
}
|
||||
|
||||
export interface User {
|
||||
id: string;
|
||||
username: string;
|
||||
email: string;
|
||||
pref_theme: "light" | "dark";
|
||||
full_name: string;
|
||||
default_prompt: string;
|
||||
}
|
||||
|
||||
export const load: LayoutLoad = async ({ fetch }) => {
|
||||
let userData: User | null = null;
|
||||
|
||||
const api_chat = await fetch("/api/chat/");
|
||||
const chats = (await api_chat.json()) as ChatMetadata[];
|
||||
|
||||
const model_api = await fetch("/api/model/all");
|
||||
const models = (await model_api.json()) as ModelStatus[];
|
||||
|
||||
const userData_api = await fetch("/api/user/");
|
||||
if (userData_api.ok) {
|
||||
userData = (await userData_api.json()) as User;
|
||||
}
|
||||
|
||||
const r = await fetch("/api/chat/");
|
||||
const chats = (await r.json()) as t[];
|
||||
return {
|
||||
chats,
|
||||
models,
|
||||
userData,
|
||||
};
|
||||
};
|
||||
|
||||
@ -1,12 +1,16 @@
|
||||
<script lang="ts">
|
||||
import type { PageData } from "./$types";
|
||||
import { goto, invalidate } from "$app/navigation";
|
||||
import { barVisible } from "$lib/stores";
|
||||
import { onDestroy } from "svelte";
|
||||
export let data: PageData;
|
||||
|
||||
const models = data.models.filter((el) => el.available);
|
||||
|
||||
const modelAvailable = models.length > 0;
|
||||
const modelsLabels = models.map((el) => el.name);
|
||||
let bar_visible: boolean;
|
||||
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
|
||||
|
||||
let temp = 0.1;
|
||||
let top_k = 50;
|
||||
@ -17,10 +21,8 @@
|
||||
let repeat_penalty = 1.3;
|
||||
|
||||
let init_prompt =
|
||||
data.userData?.default_prompt ??
|
||||
"Below is an instruction that describes a task. Write a response that appropriately completes the request.";
|
||||
|
||||
let n_threads = 4;
|
||||
let context_window = 2048;
|
||||
let gpu_layers = 0;
|
||||
|
||||
@ -45,21 +47,43 @@
|
||||
await invalidate("/api/chat/");
|
||||
}
|
||||
}
|
||||
function toggleBar() {
|
||||
bar_visible = !bar_visible;
|
||||
barVisible.set(bar_visible);
|
||||
}
|
||||
onDestroy(unsubscribe);
|
||||
</script>
|
||||
|
||||
{#if !bar_visible}
|
||||
<button
|
||||
class="absolute p-0 top-1 left-2 md:left-16 h-10 w-10 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
|
||||
on:click={toggleBar}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="currentColor"
|
||||
class="w-4 h-4"
|
||||
>
|
||||
<path
|
||||
d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
|
||||
>
|
||||
</path>
|
||||
<path
|
||||
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
</button>
|
||||
{/if}
|
||||
<div class="flex flex-col items-center justify-center pt-5">
|
||||
<h1 class="pb-2 text-3xl font-bold">Say Hi to Serge</h1>
|
||||
</div>
|
||||
<h1 class="pb-5 pt-2 text-center text-xl font-light">
|
||||
An easy way to chat with LLaMA based models.
|
||||
An easy way to chat with Alpaca & other LLaMA based models.
|
||||
</h1>
|
||||
|
||||
<form
|
||||
on:submit|preventDefault={onCreateChat}
|
||||
id="form-create-chat"
|
||||
class="p-5"
|
||||
aria-label="Model Settings"
|
||||
>
|
||||
<form on:submit|preventDefault={onCreateChat} id="form-create-chat" class="p-5">
|
||||
<div class="w-full pb-20">
|
||||
<div class="mx-auto w-fit pt-5 flex flex-col lg:flex-row justify-center">
|
||||
<button
|
||||
@ -74,181 +98,161 @@
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-center">
|
||||
<div class="grid grid-cols-3 gap-4 p-3 bg-base-200" id="model_settings">
|
||||
<div class="col-span-3 text-xl font-medium">Model settings</div>
|
||||
<div
|
||||
class="tooltip tooltip-bottom col-span-2"
|
||||
data-tip="Controls how random the generated text is. Higher temperatures lead to more random and creative text, while lower temperatures lead to more predictable and conservative text."
|
||||
>
|
||||
<label for="temperature" class="label-text"
|
||||
>Temperature - [{temp}]</label
|
||||
|
||||
<div tabindex="-1" class="collapse-arrow rounded-box collapse bg-base-200">
|
||||
<input type="checkbox" />
|
||||
<div class="collapse-title text-xl font-medium">Model settings</div>
|
||||
<div class="collapse-content">
|
||||
<div class="grid grid-cols-3 gap-4 p-3">
|
||||
<div
|
||||
class="tooltip tooltip-bottom col-span-2"
|
||||
data-tip="The higher the temperature, the more random the model output."
|
||||
>
|
||||
<input
|
||||
id="temperature"
|
||||
name="temperature"
|
||||
type="range"
|
||||
bind:value={temp}
|
||||
min="0.05"
|
||||
max="2"
|
||||
step="0.05"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip tooltip-bottom flex flex-col"
|
||||
data-tip="Controls the number of tokens that are considered when generating the next token. Higher values of top_k lead to more predictable text, while lower values of top_k lead to more creative text."
|
||||
>
|
||||
<label for="top_k" class="label-text pb-1">top_k</label>
|
||||
<input
|
||||
id="top_k"
|
||||
class="input-bordered input w-full"
|
||||
name="top_k"
|
||||
type="number"
|
||||
bind:value={top_k}
|
||||
min="0"
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip tooltip-bottom col-span-2"
|
||||
data-tip="The maximum number of tokens that the model will generate. This parameter can be used to control the length of the generated text."
|
||||
>
|
||||
<label for="max_length" class="label-text"
|
||||
>Maximum generated tokens - [{max_length}]</label
|
||||
<label for="temperature" class="label-text"
|
||||
>Temperature - [{temp}]</label
|
||||
>
|
||||
<input
|
||||
name="temperature"
|
||||
type="range"
|
||||
bind:value={temp}
|
||||
min="0.05"
|
||||
max="2"
|
||||
step="0.05"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip tooltip-bottom flex flex-col"
|
||||
data-tip="The number of samples to consider for top_k sampling."
|
||||
>
|
||||
<input
|
||||
id="max_length"
|
||||
name="max_length"
|
||||
type="range"
|
||||
bind:value={max_length}
|
||||
min="32"
|
||||
max="32768"
|
||||
step="16"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="Controls the diversity of the generated text. Higher values of top_p lead to more diverse text, while lower values of top_p lead to less diverse text."
|
||||
>
|
||||
<label for="top_p" class="label-text pb-1">top_p</label>
|
||||
<input
|
||||
class="input-bordered input w-full"
|
||||
id="top_p"
|
||||
name="top_p"
|
||||
type="number"
|
||||
bind:value={top_p}
|
||||
min="0"
|
||||
max="1"
|
||||
step="0.025"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip col-span-2"
|
||||
data-tip="The number of previous tokens that are considered when generating the next token. A longer context length can help the model to generate more coherent and informative text."
|
||||
>
|
||||
<label for="context_window" class="label-text"
|
||||
>Context Length - [{context_window}]</label
|
||||
<label for="top_k" class="label-text pb-1">top_k</label>
|
||||
<input
|
||||
class="input-bordered input w-full max-w-xs"
|
||||
name="top_k"
|
||||
type="number"
|
||||
bind:value={top_k}
|
||||
min="0"
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip tooltip-bottom col-span-2"
|
||||
data-tip="Max text generated token"
|
||||
>
|
||||
<input
|
||||
id="context_window"
|
||||
name="context_window"
|
||||
type="range"
|
||||
bind:value={context_window}
|
||||
min="16"
|
||||
max="2048"
|
||||
step="16"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip col-span-2"
|
||||
data-tip="Number of layers to put on the GPU. The rest will be on the CPU."
|
||||
>
|
||||
<label for="gpu_layers" class="label-text"
|
||||
>GPU Layers - [{gpu_layers}]</label
|
||||
<label for="max_length" class="label-text"
|
||||
>Maximum generated tokens - [{max_length}]</label
|
||||
>
|
||||
<input
|
||||
name="max_length"
|
||||
type="range"
|
||||
bind:value={max_length}
|
||||
min="32"
|
||||
max="32768"
|
||||
step="16"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="The cumulative probability of the tokens to keep for nucleus sampling."
|
||||
>
|
||||
<input
|
||||
id="gpu_layers"
|
||||
name="gpu_layers"
|
||||
type="range"
|
||||
bind:value={gpu_layers}
|
||||
min="0"
|
||||
max="100"
|
||||
step="1"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="Defines the penalty associated with repeating the last 'n' tokens in a generated text sequence."
|
||||
>
|
||||
<label for="repeat_last_n" class="label-text pb-1">repeat_last_n</label>
|
||||
<input
|
||||
id="repeat_last_n"
|
||||
class="input-bordered input w-full"
|
||||
name="repeat_last_n"
|
||||
type="number"
|
||||
bind:value={repeat_last_n}
|
||||
min="0"
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex flex-col">
|
||||
<label for="model" class="label-text pb-1"> Model choice</label>
|
||||
<select
|
||||
name="model"
|
||||
id="models"
|
||||
class="select-bordered select w-full"
|
||||
aria-haspopup="menu"
|
||||
<label for="top_p" class="label-text pb-1">top_p</label>
|
||||
<input
|
||||
class="input-bordered input w-full max-w-xs"
|
||||
name="top_p"
|
||||
type="number"
|
||||
bind:value={top_p}
|
||||
min="0"
|
||||
max="1"
|
||||
step="0.025"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip col-span-2"
|
||||
data-tip="Size of the prompt context. Will determine how far the model will read back. Increases memory consumption."
|
||||
>
|
||||
{#each modelsLabels as model}
|
||||
<option id={model} value={model}>{model}</option>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="Number of threads to run LLaMA on."
|
||||
>
|
||||
<label for="n_threads" class="label-text pb-1">n_threads</label>
|
||||
<input
|
||||
id="n_threads"
|
||||
class="input-bordered input w-full"
|
||||
name="n_threads"
|
||||
type="number"
|
||||
bind:value={n_threads}
|
||||
min="0"
|
||||
max="64"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="Defines the penalty assigned to the model when it repeats certain tokens or patterns in the generated text."
|
||||
>
|
||||
<label for="repeat_penalty" class="label-text pb-1">
|
||||
repeat_penalty
|
||||
</label>
|
||||
<input
|
||||
id="repeat_penalty"
|
||||
class="input-bordered input w-full"
|
||||
name="repeat_penalty"
|
||||
type="number"
|
||||
bind:value={repeat_penalty}
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.05"
|
||||
/>
|
||||
</div>
|
||||
<div class="col-span-3 flex flex-col">
|
||||
<label for="init_prompt" class="label-text pb-1">Prompt Template</label>
|
||||
<textarea
|
||||
class="textarea-bordered textarea h-24 w-full"
|
||||
name="init_prompt"
|
||||
bind:value={init_prompt}
|
||||
placeholder="Enter your prompt here"
|
||||
/>
|
||||
<label for="context_window" class="label-text"
|
||||
>Context Length - [{context_window}]</label
|
||||
>
|
||||
<input
|
||||
name="context_window"
|
||||
type="range"
|
||||
bind:value={context_window}
|
||||
min="16"
|
||||
max="2048"
|
||||
step="16"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip col-span-2"
|
||||
data-tip="Number of layers to put on the GPU. The rest will be on the CPU."
|
||||
>
|
||||
<label for="gpu_layers" class="label-text"
|
||||
>GPU Layers - [{gpu_layers}]</label
|
||||
>
|
||||
<input
|
||||
name="gpu_layers"
|
||||
type="range"
|
||||
bind:value={gpu_layers}
|
||||
min="0"
|
||||
max="100"
|
||||
step="1"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
|
||||
>
|
||||
<label for="repeat_last_n" class="label-text pb-1"
|
||||
>repeat_last_n</label
|
||||
>
|
||||
<input
|
||||
class="input-bordered input w-full max-w-xs"
|
||||
name="repeat_last_n"
|
||||
type="number"
|
||||
bind:value={repeat_last_n}
|
||||
min="0"
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex flex-col">
|
||||
<label for="model" class="label-text pb-1"> Model choice</label>
|
||||
<select name="model" class="select-bordered select w-full max-w-xs">
|
||||
{#each modelsLabels as model}
|
||||
<option value={model}>{model}</option>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
<div
|
||||
class="tooltip flex flex-col"
|
||||
data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens."
|
||||
>
|
||||
<label for="repeat_penalty" class="label-text pb-1">
|
||||
repeat_penalty
|
||||
</label>
|
||||
<input
|
||||
class="input-bordered input w-full max-w-xs"
|
||||
name="repeat_penalty"
|
||||
type="number"
|
||||
bind:value={repeat_penalty}
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.05"
|
||||
/>
|
||||
</div>
|
||||
<div class="col-span-3 flex flex-col">
|
||||
<label for="init_prompt" class="label-text pb-1"
|
||||
>Pre-Prompt for initializing a conversation.</label
|
||||
>
|
||||
<textarea
|
||||
class="textarea-bordered textarea h-24 w-full"
|
||||
name="init_prompt"
|
||||
bind:value={init_prompt}
|
||||
placeholder="Enter your prompt here"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import type { PageLoad } from "./$types";
|
||||
|
||||
export interface ModelStatus {
|
||||
interface ModelStatus {
|
||||
name: string;
|
||||
size: number;
|
||||
available: boolean;
|
||||
@ -8,8 +8,8 @@ export interface ModelStatus {
|
||||
}
|
||||
|
||||
export const load: PageLoad = async ({ fetch }) => {
|
||||
const api_model = await fetch("/api/model/all");
|
||||
const models = (await api_model.json()) as ModelStatus[];
|
||||
const r = await fetch("/api/model/all");
|
||||
const models = (await r.json()) as ModelStatus[];
|
||||
return {
|
||||
models,
|
||||
};
|
||||
|
||||
@ -1,106 +0,0 @@
|
||||
<script context="module" lang="ts">
|
||||
export { load } from "./+page";
|
||||
</script>
|
||||
|
||||
<script lang="ts">
|
||||
import { writable } from "svelte/store";
|
||||
import { goto } from "$app/navigation";
|
||||
export let data: {
|
||||
user: {
|
||||
id: string;
|
||||
username: string;
|
||||
email: string;
|
||||
full_name: string;
|
||||
pref_theme: "light" | "dark";
|
||||
default_prompt: string;
|
||||
} | null;
|
||||
};
|
||||
let user = data.user;
|
||||
let id: string = user?.id ?? "";
|
||||
let username: string = user?.username ?? "";
|
||||
let email: string = user?.email ?? "";
|
||||
let full_name: string = user?.full_name ?? "";
|
||||
let pref_theme: "light" | "dark" = user?.pref_theme ?? "light";
|
||||
let default_prompt: string = user?.default_prompt ?? "";
|
||||
let status = writable<string | null>(null);
|
||||
|
||||
async function handleSubmit(event: Event) {
|
||||
event.preventDefault();
|
||||
// Implement the update logic here, e.g., sending a PUT request to update user preferences
|
||||
try {
|
||||
await fetch("/api/user/", {
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
id,
|
||||
username,
|
||||
email,
|
||||
full_name,
|
||||
pref_theme,
|
||||
default_prompt,
|
||||
}),
|
||||
});
|
||||
|
||||
status.set("Preferences updated successfully");
|
||||
goto("/", { invalidateAll: true });
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
status.set(error.message);
|
||||
} else {
|
||||
status.set("Failed to update preferences");
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<main>
|
||||
<div class="card-group">
|
||||
<div class="card">
|
||||
<div class="card-title p-3 text-3xl justify-center font-bold">
|
||||
User Preferences
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{#if user}
|
||||
<form on:submit={handleSubmit}>
|
||||
<div class="input-group">
|
||||
<div class="input-group-prepend">
|
||||
<span class="input-group-text">Username</span>
|
||||
</div>
|
||||
<input type="text" bind:value={username} disabled />
|
||||
</div>
|
||||
<div class="input-group">
|
||||
<div class="input-group-prepend">
|
||||
<span class="input-group-text">Full Name</span>
|
||||
</div>
|
||||
<input id="full_name" type="text" bind:value={full_name} />
|
||||
</div>
|
||||
<div class="input-group">
|
||||
<div class="input-group-prepend">
|
||||
<span class="input-group-text">Email</span>
|
||||
</div>
|
||||
<input id="email" type="email" bind:value={email} />
|
||||
</div>
|
||||
<div class="input-group">
|
||||
<div class="input-group-prepend">
|
||||
<span class="input-group-text">Default Prompt</span>
|
||||
</div>
|
||||
<textarea
|
||||
id="default_prompt"
|
||||
bind:value={default_prompt}
|
||||
style="resize:both; width:100%;"
|
||||
/>
|
||||
</div>
|
||||
{#if $status}
|
||||
<p>{$status}</p>
|
||||
{/if}
|
||||
<button class="btn" type="submit">Save Preferences</button>
|
||||
</form>
|
||||
{:else}
|
||||
<p>Loading...</p>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
@ -1,27 +0,0 @@
|
||||
import type { Load } from "@sveltejs/kit";
|
||||
|
||||
interface User {
|
||||
id: string;
|
||||
username: string;
|
||||
email: string;
|
||||
pref_theme: "light" | "dark";
|
||||
full_name: string;
|
||||
default_prompt: string;
|
||||
}
|
||||
|
||||
export const load: Load = async () => {
|
||||
const user = await fetch("/api/user/", {
|
||||
method: "GET",
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.status == 401) {
|
||||
window.location.href = "/";
|
||||
}
|
||||
return response.json();
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
window.location.href = "/";
|
||||
});
|
||||
return { user };
|
||||
};
|
||||
@ -2,7 +2,7 @@
|
||||
import type { PageData } from "./$types";
|
||||
import { invalidate, goto } from "$app/navigation";
|
||||
import { page } from "$app/stores";
|
||||
import { newChat, themeStore } from "$lib/stores";
|
||||
import { barVisible, newChat, themeStore } from "$lib/stores";
|
||||
import { onMount, onDestroy } from "svelte";
|
||||
import ClipboardJS from "clipboard";
|
||||
import hljs from "highlight.js";
|
||||
@ -11,6 +11,7 @@
|
||||
import css from "highlight.js/lib/languages/css";
|
||||
import cpp from "highlight.js/lib/languages/cpp";
|
||||
import dockerfile from "highlight.js/lib/languages/dockerfile";
|
||||
import graphql from "highlight.js/lib/languages/graphql";
|
||||
import go from "highlight.js/lib/languages/go";
|
||||
import javascript from "highlight.js/lib/languages/javascript";
|
||||
import json from "highlight.js/lib/languages/json";
|
||||
@ -31,6 +32,7 @@
|
||||
hljs.registerLanguage("bash", bash);
|
||||
hljs.registerLanguage("css", css);
|
||||
hljs.registerLanguage("cpp", cpp);
|
||||
hljs.registerLanguage("graphql", graphql);
|
||||
hljs.registerLanguage("dockerfile", dockerfile);
|
||||
hljs.registerLanguage("go", go);
|
||||
hljs.registerLanguage("javascript", javascript);
|
||||
@ -59,6 +61,8 @@
|
||||
messageContainer.scrollBottom = messageContainer.scrollHeight;
|
||||
}
|
||||
let prompt = "";
|
||||
let bar_visible: boolean;
|
||||
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
|
||||
|
||||
async function askQuestion() {
|
||||
const data = new URLSearchParams();
|
||||
@ -102,7 +106,10 @@
|
||||
});
|
||||
|
||||
eventSource.onerror = async (error) => {
|
||||
console.log("error", error);
|
||||
eventSource.close();
|
||||
//history[history.length - 1].data.content = "A server error occurred.";
|
||||
//await invalidate("/api/chat/" + $page.params.id);
|
||||
};
|
||||
}
|
||||
|
||||
@ -118,7 +125,7 @@
|
||||
`/api/chat/?model=${data.chat.params.model_path}&temperature=${data.chat.params.temperature}&top_k=${data.chat.params.top_k}` +
|
||||
`&top_p=${data.chat.params.top_p}&max_length=${data.chat.params.max_tokens}&context_window=${data.chat.params.n_ctx}` +
|
||||
`&repeat_last_n=${data.chat.params.last_n_tokens_size}&repeat_penalty=${data.chat.params.repeat_penalty}` +
|
||||
`&n_threads=${data.chat.params.n_threads}&init_prompt=${data.chat.history[0].data.content}` +
|
||||
`&init_prompt=${data.chat.history[0].data.content}` +
|
||||
`&gpu_layers=${data.chat.params.n_gpu_layers}`,
|
||||
|
||||
{
|
||||
@ -127,23 +134,17 @@
|
||||
accept: "application/json",
|
||||
},
|
||||
},
|
||||
)
|
||||
.then((response) => {
|
||||
if (response.status == 401) {
|
||||
console.log("Not authorized");
|
||||
window.location.href = "/";
|
||||
} else {
|
||||
return response.json();
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
window.location.href = "/";
|
||||
});
|
||||
).then((response) => response.json());
|
||||
await invalidate("/api/chat/");
|
||||
await goto("/chat/" + newData);
|
||||
}
|
||||
|
||||
document.addEventListener("keydown", async (event) => {
|
||||
if (event.key === "n" && event.altKey) {
|
||||
await createSameSession();
|
||||
}
|
||||
});
|
||||
|
||||
async function deletePrompt(chatID: string, idx: number) {
|
||||
const response = await fetch(
|
||||
`/api/chat/${chatID}/prompt?idx=${idx.toString()}`,
|
||||
@ -152,36 +153,11 @@
|
||||
|
||||
if (response.status === 200) {
|
||||
await invalidate("/api/chat/" + $page.params.id);
|
||||
} else if (response.status === 202) {
|
||||
showToast("Chat in progress!");
|
||||
} else if (response.status === 401) {
|
||||
window.location.href = "/";
|
||||
} else {
|
||||
showToast("An error occurred: " + response.statusText);
|
||||
console.error("Error " + response.status + ": " + response.statusText);
|
||||
}
|
||||
}
|
||||
|
||||
function showToast(message: string) {
|
||||
// Create the toast element
|
||||
const toast = document.createElement("div");
|
||||
toast.className = `alert alert-info`;
|
||||
toast.textContent = message;
|
||||
const toastContainer = document.getElementById("toast-container");
|
||||
|
||||
// Append the toast to the toast container if it exists
|
||||
if (toastContainer) {
|
||||
toastContainer.appendChild(toast);
|
||||
} else {
|
||||
console.error("Toast container not found?");
|
||||
return;
|
||||
}
|
||||
|
||||
// Automatically remove the toast after a delay
|
||||
setTimeout(() => {
|
||||
toast.remove();
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
const md: MarkdownIt = new MarkdownIt({
|
||||
html: true,
|
||||
linkify: true,
|
||||
@ -249,12 +225,6 @@
|
||||
themeStore.subscribe((newTheme) => {
|
||||
updateThemeStyle(newTheme);
|
||||
});
|
||||
|
||||
document.addEventListener("keydown", async (event) => {
|
||||
if (event.key === "n" && event.altKey) {
|
||||
await createSameSession();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
function updateThemeStyle(currentTheme: string) {
|
||||
@ -273,6 +243,10 @@
|
||||
const onMouseLeave = () => {
|
||||
sendBottomHovered = false;
|
||||
};
|
||||
const toggleBar = () => {
|
||||
bar_visible = !bar_visible;
|
||||
barVisible.set(bar_visible);
|
||||
};
|
||||
const scrollToBottom = (node: Element, history: any[]) => {
|
||||
const scroll = () =>
|
||||
node.scroll({
|
||||
@ -284,22 +258,41 @@
|
||||
return { update: scroll };
|
||||
};
|
||||
onDestroy(() => {
|
||||
unsubscribe;
|
||||
styleElement && styleElement.remove();
|
||||
});
|
||||
</script>
|
||||
|
||||
<!-- svelte-ignore a11y-no-static-element-interactions -->
|
||||
<div
|
||||
class="relative h-full max-h-screen overflow-hidden"
|
||||
class="relative mx-auto h-full max-h-screen w-full overflow-hidden"
|
||||
on:keydown={handleKeyDown}
|
||||
>
|
||||
<div class="mx-20">
|
||||
<div class="h-8 justify-content border-b border-base-content/[.2]">
|
||||
<div class="h-full relative flex items-center justify-center">
|
||||
<div
|
||||
class="flex flex-row items-center justify-center color-base-300"
|
||||
title="Model"
|
||||
>
|
||||
<div class="w-full border-b border-base-content/[.2]">
|
||||
<div class="h-8 px-2 md:container md:mx-auto md:px-0">
|
||||
<div class="w-full h-full relative flex items-center justify-center">
|
||||
{#if !bar_visible}
|
||||
<button
|
||||
class="absolute p-0 top-0 bottom-0 left-0 w-10 h-8 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
|
||||
on:click={toggleBar}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
class="w-4 h-4 fill-base-content"
|
||||
>
|
||||
<path
|
||||
d="M7.22 14.47 9.69 12 7.22 9.53a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l3 3a.75.75 0 0 1 0 1.06l-3 3a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Z"
|
||||
>
|
||||
</path>
|
||||
<path
|
||||
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
</button>
|
||||
{/if}
|
||||
<div class="flex flex-row items-center justify-center color-base-300">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 11.12744 16"
|
||||
@ -314,10 +307,7 @@
|
||||
{data.chat.params.model_path}
|
||||
</span>
|
||||
</div>
|
||||
<div
|
||||
class="pl-4 hidden sm:flex flex-row items-center justify-center"
|
||||
title="Temperature"
|
||||
>
|
||||
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 5.31286 16"
|
||||
@ -332,10 +322,7 @@
|
||||
{data.chat.params.temperature}
|
||||
</span>
|
||||
</div>
|
||||
<div
|
||||
class="pl-4 hidden sm:flex flex-row items-center justify-center"
|
||||
title="Context Length/Maximum Generated Tokens"
|
||||
>
|
||||
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -350,35 +337,8 @@
|
||||
{data.chat.params.n_ctx}/{data.chat.params.max_tokens}
|
||||
</span>
|
||||
</div>
|
||||
{#if data.chat.params.n_threads > 0}
|
||||
<div
|
||||
class="pl-4 hidden sm:flex flex-row items-center justify-center"
|
||||
title="Threads"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
stroke-width="1.5"
|
||||
stroke="currentColor"
|
||||
class="w-4 h-4"
|
||||
>
|
||||
<path
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 002.25-2.25V6.75a2.25 2.25 0 00-2.25-2.25H6.75A2.25 2.25 0 004.5 6.75v10.5a2.25 2.25 0 002.25 2.25zm.75-12h9v9h-9v-9z"
|
||||
/>
|
||||
</svg>
|
||||
<span class="ml-2 inline-block text-center text-sm font-semibold">
|
||||
{data.chat.params.n_threads}
|
||||
</span>
|
||||
</div>
|
||||
{/if}
|
||||
{#if data.chat.params.n_gpu_layers > 0}
|
||||
<div
|
||||
class="pl-4 hidden sm:flex flex-row items-center justify-center"
|
||||
title="GPU Layers"
|
||||
>
|
||||
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -394,10 +354,7 @@
|
||||
</span>
|
||||
</div>
|
||||
{/if}
|
||||
<div
|
||||
class="pl-4 hidden sm:flex flex-row items-center justify-center"
|
||||
title="Repeat Penalty"
|
||||
>
|
||||
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -416,10 +373,7 @@
|
||||
{data.chat.params.repeat_penalty}
|
||||
</span>
|
||||
</div>
|
||||
<div
|
||||
class="pl-4 hidden sm:flex flex-row items-center justify-center"
|
||||
title="Top_k-Top_p"
|
||||
>
|
||||
<div class="pl-4 hidden sm:flex flex-row items-center justify-center">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -444,46 +398,49 @@
|
||||
<div class="h-max pb-4">
|
||||
{#each history as question, i}
|
||||
{#if question.type === "human"}
|
||||
<div class="w-10/12 mx-auto sm:w-10/12 chat chat-end py-4">
|
||||
<div class="chat-image self-start pl-1 pt-1">
|
||||
<div
|
||||
class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
|
||||
>
|
||||
<span class="text-xs text-neutral-content">I</span>
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
class="chat-bubble whitespace-normal break-words bg-base-300 text-base font-light text-base-content"
|
||||
>
|
||||
<!-- {question.data.content} -->
|
||||
<div class="w-full overflow-hidden break-words">
|
||||
{@html renderMarkdown(question.data.content)}
|
||||
</div>
|
||||
</div>
|
||||
{#if i === history.length - 1 && !isLoading}
|
||||
<div style="width: 100%; text-align: right;">
|
||||
<button
|
||||
disabled={isLoading}
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={() => deletePrompt(data.chat.id, i)}
|
||||
<div class="w-full border-y border-base-content/[.2] bg-base-300">
|
||||
<div class="w-11/12 mx-auto sm:w-10/12 chat chat-start py-4">
|
||||
<div class="chat-image self-start pl-1 pt-1">
|
||||
<div
|
||||
class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<span class="text-xs text-neutral-content">I</span>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
<div
|
||||
class="chat-bubble whitespace-normal break-words bg-base-300 text-base font-light text-base-content"
|
||||
>
|
||||
<!-- {question.data.content} -->
|
||||
<div class="w-full overflow-hidden break-words">
|
||||
{@html renderMarkdown(question.data.content)}
|
||||
</div>
|
||||
</div>
|
||||
{#if i === history.length - 1 && !isLoading}
|
||||
<div style="width: 100%; text-align: right;">
|
||||
<button
|
||||
disabled={isLoading}
|
||||
class="btn-ghost btn-sm btn"
|
||||
on:click|preventDefault={() =>
|
||||
deletePrompt(data.chat.id, i)}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="16"
|
||||
height="16"
|
||||
>
|
||||
<path
|
||||
class="fill-base-content"
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{:else if question.type === "ai"}
|
||||
<div class="w-10/12 mx-auto sm:w-10/12 chat chat-start py-4">
|
||||
<div class="w-11/12 mx-auto sm:w-10/12 chat chat-start py-4">
|
||||
<div class="chat-image self-start pl-1 pt-1">
|
||||
<div
|
||||
class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
|
||||
@ -524,7 +481,6 @@
|
||||
d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
|
||||
/>
|
||||
</svg>
|
||||
<span class="sr-only">Delete</span>
|
||||
</button>
|
||||
</div>
|
||||
{/if}
|
||||
@ -564,7 +520,7 @@
|
||||
class="btn btn-ghost h-10 w-14 rounded-l-none rounded-r-lg border-0 text-lg"
|
||||
class:loading={isLoading}
|
||||
on:click|preventDefault={askQuestion}
|
||||
><span class="sr-only">Send</span>
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
@ -580,7 +536,4 @@
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="toast-container" class="toast">
|
||||
<!-- Toast notifications will be added here -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -15,7 +15,6 @@ interface Params {
|
||||
model_path: string;
|
||||
n_ctx: number;
|
||||
n_gpu_layers: number;
|
||||
n_threads: number;
|
||||
last_n_tokens_size: number;
|
||||
max_tokens: number;
|
||||
temperature: number;
|
||||
@ -28,22 +27,12 @@ interface Response {
|
||||
id: string;
|
||||
created: string;
|
||||
params: Params;
|
||||
owner: string;
|
||||
history: Message[];
|
||||
}
|
||||
|
||||
export const load: PageLoad = async ({ fetch, params }) => {
|
||||
const data = await fetch("/api/chat/" + params.id)
|
||||
.then((response) => {
|
||||
if (response.status == 401) {
|
||||
window.location.href = "/";
|
||||
}
|
||||
return response.json();
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
window.location.href = "/";
|
||||
});
|
||||
const r = await fetch("/api/chat/" + params.id);
|
||||
const data = (await r.json()) as Response;
|
||||
|
||||
return {
|
||||
chat: data,
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
<script lang="ts">
|
||||
import { goto } from "$app/navigation";
|
||||
import { writable } from "svelte/store";
|
||||
|
||||
let username = "";
|
||||
let password = "";
|
||||
let error = writable<string | null>(null);
|
||||
|
||||
async function handleSubmit(event: Event) {
|
||||
event.preventDefault();
|
||||
try {
|
||||
const response = await fetch("/api/auth/token", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
body: new URLSearchParams({
|
||||
username,
|
||||
password,
|
||||
}),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
localStorage.setItem("token", data.access_token);
|
||||
goto("/", { invalidateAll: true });
|
||||
} else {
|
||||
const errorData = await response.json();
|
||||
error.set(errorData.detail || "Login failed");
|
||||
}
|
||||
} catch (err) {
|
||||
error.set("An error occurred");
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<main>
|
||||
<div class="card-group">
|
||||
<div class="card">
|
||||
<div class="card-title p-3 text-3xl justify-center font-bold">
|
||||
Sign In
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<form on:submit={handleSubmit}>
|
||||
<div class="form-control">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Username"
|
||||
bind:value={username}
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
<div class="form-control">
|
||||
<input
|
||||
type="password"
|
||||
placeholder="Password"
|
||||
bind:value={password}
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
{#if $error}
|
||||
<p style="color: red;">{$error}</p>
|
||||
{/if}
|
||||
<button class="btn" type="submit">Authenticate</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
@ -1,359 +1,166 @@
|
||||
<script lang="ts">
|
||||
import { invalidate } from "$app/navigation";
|
||||
import type { ModelStatus } from "../+page";
|
||||
import type { PageData } from "./$types";
|
||||
import Icon from "@iconify/svelte";
|
||||
import { onMount } from "svelte";
|
||||
import RefreshModal from "../../lib/components/models/RefreshModal.svelte";
|
||||
import { barVisible } from "$lib/stores";
|
||||
import { onDestroy } from "svelte";
|
||||
|
||||
export let data: PageData;
|
||||
let searchQuery = "";
|
||||
let selectedVariant: Record<string, string> = {};
|
||||
|
||||
// Add a reactive statement to keep track of downloading models
|
||||
$: downloadingModels = new Set(
|
||||
data.models
|
||||
.filter(
|
||||
(model) =>
|
||||
(model.progress > 0 && model.progress < 100) || !model.available,
|
||||
)
|
||||
.map((model) => model.name),
|
||||
);
|
||||
|
||||
function onComponentMount() {
|
||||
const downloadingModelsArray = JSON.parse(
|
||||
localStorage.getItem("downloadingModels") || "[]",
|
||||
);
|
||||
downloadingModelsArray.forEach((model: string) => {
|
||||
downloadingModels.add(model);
|
||||
checkDownloadProgress(model);
|
||||
});
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
onComponentMount();
|
||||
});
|
||||
|
||||
/**
|
||||
* Handles the fetching the status of an active download
|
||||
* @param modelName - The model name.
|
||||
*/
|
||||
async function fetchDownloadProgress(modelName: string) {
|
||||
const response = await fetch(`/api/model/${modelName}/download/status`);
|
||||
if (response.ok) {
|
||||
const progress = await response.text();
|
||||
const progressNumber = parseFloat(progress);
|
||||
const modelIndex = data.models.findIndex((m) => m.name === modelName);
|
||||
|
||||
if (modelIndex !== -1) {
|
||||
data.models[modelIndex].progress = progressNumber;
|
||||
data.models = [...data.models]; // enable reactivity
|
||||
}
|
||||
return progressNumber;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function startDownload(modelName: string) {
|
||||
const currentDownloads = JSON.parse(
|
||||
localStorage.getItem("downloadingModels") || "[]",
|
||||
);
|
||||
if (!currentDownloads.includes(modelName)) {
|
||||
currentDownloads.push(modelName);
|
||||
localStorage.setItem(
|
||||
"downloadingModels",
|
||||
JSON.stringify(currentDownloads),
|
||||
);
|
||||
}
|
||||
downloadingModels.add(modelName);
|
||||
checkDownloadProgress(modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Debounce function to limit how often a function can be called.
|
||||
* @param func - The function to be debounced.
|
||||
* @param wait - The time to wait in milliseconds.
|
||||
* @returns A debounced version of the given function.
|
||||
*/
|
||||
function debounce(func: (...args: any[]) => void, wait: number) {
|
||||
let timeout: ReturnType<typeof setTimeout>;
|
||||
return function (...args: any[]) {
|
||||
const later = () => {
|
||||
clearTimeout(timeout);
|
||||
func(...args);
|
||||
};
|
||||
clearTimeout(timeout);
|
||||
timeout = setTimeout(later, wait);
|
||||
};
|
||||
}
|
||||
|
||||
// Update search query with debounce to improve performance
|
||||
const updateSearch = debounce((query: string) => {
|
||||
searchQuery = query;
|
||||
}, 300);
|
||||
|
||||
/**
|
||||
* Wrapper function for fetch to include invalidate call on successful response.
|
||||
* @param url - The URL to fetch.
|
||||
* @param options - Fetch request options.
|
||||
* @returns The fetch response.
|
||||
*/
|
||||
async function fetchWithInvalidate(url: string, options: any) {
|
||||
const response = await fetch(url, options);
|
||||
if (response.ok) {
|
||||
let downloading = false;
|
||||
let bar_visible: boolean;
|
||||
const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
|
||||
console.log(data);
|
||||
setInterval(async () => {
|
||||
if (downloading) {
|
||||
await invalidate("/api/model/all");
|
||||
}
|
||||
return response;
|
||||
}
|
||||
}, 2500);
|
||||
|
||||
/**
|
||||
* Truncates a string to the specified length and appends an ellipsis.
|
||||
* @param str - The string to truncate.
|
||||
* @param maxLength - The maximum length of the truncated string.
|
||||
* @returns The truncated string with an ellipsis if needed.
|
||||
*/
|
||||
function truncateString(str: string, maxLength: number): string {
|
||||
return str.length > maxLength
|
||||
? str.substring(0, maxLength - 1) + "..."
|
||||
: str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles the action (download/delete) on a model.
|
||||
* @param model - The model name.
|
||||
* @param isAvailable - Boolean indicating if the model is available.
|
||||
*/
|
||||
async function handleModelAction(
|
||||
model: string,
|
||||
isAvailable: boolean,
|
||||
isDownloading: boolean = false,
|
||||
) {
|
||||
if (isDownloading) {
|
||||
await cancelDownload(model);
|
||||
async function onClick(model: string) {
|
||||
if (downloading) {
|
||||
return;
|
||||
}
|
||||
const url = `/api/model/${model}${isAvailable ? "" : "/download"}`;
|
||||
const method = isAvailable ? "DELETE" : "POST";
|
||||
|
||||
console.log("Before fetch invalidate");
|
||||
fetchWithInvalidate(url, { method }).then((response) => {
|
||||
console.log(`After fetch for ${url}`);
|
||||
downloading = true;
|
||||
const r = await fetch(`/api/model/${model}/download`, {
|
||||
method: "POST",
|
||||
});
|
||||
|
||||
if (method === "POST") {
|
||||
// Start tracking download progress for the model
|
||||
console.log(`Calling startDownload() for ${model}`);
|
||||
startDownload(model);
|
||||
if (r.ok) {
|
||||
await invalidate("/api/model/all");
|
||||
}
|
||||
downloading = false;
|
||||
}
|
||||
|
||||
async function deleteModel(model: string) {
|
||||
const r = await fetch(`/api/model/${model}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
|
||||
if (r.ok) {
|
||||
await invalidate("/api/model/all");
|
||||
}
|
||||
}
|
||||
|
||||
// Function to periodically check download progress for a model
|
||||
async function checkDownloadProgress(modelName: string) {
|
||||
let progress = await fetchDownloadProgress(modelName);
|
||||
console.log(`Download status for ${modelName} ${progress}/100.0%`);
|
||||
|
||||
// Continue checking until progress reaches 100
|
||||
if (progress < 100) {
|
||||
setTimeout(() => checkDownloadProgress(modelName), 1500);
|
||||
} else {
|
||||
// Stop tracking the model once download is complete
|
||||
console.log(`Stopping tracker for ${modelName}`);
|
||||
const currentDownloads = JSON.parse(
|
||||
localStorage.getItem("downloadingModels") || "[]",
|
||||
);
|
||||
const updatedDownloads = currentDownloads.filter(
|
||||
(model: string) => model !== modelName,
|
||||
);
|
||||
localStorage.setItem(
|
||||
"downloadingModels",
|
||||
JSON.stringify(updatedDownloads),
|
||||
);
|
||||
downloadingModels.delete(modelName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Groups models by their prefix.
|
||||
* @param models - Array of ModelStatus objects.
|
||||
* @returns An object grouping models by their prefix.
|
||||
*/
|
||||
function groupModelsByPrefix(
|
||||
models: ModelStatus[],
|
||||
): Record<string, ModelStatus[]> {
|
||||
return models.reduce(
|
||||
(acc, model) => {
|
||||
const prefix = model.name.split("-")[0];
|
||||
acc[prefix] = acc[prefix] || [];
|
||||
acc[prefix].push(model);
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, ModelStatus[]>,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles change in variant selection for a model.
|
||||
* @param modelPrefix - The prefix of the model.
|
||||
* @param event - The change event.
|
||||
*/
|
||||
function handleVariantChange(modelPrefix: string, event: Event) {
|
||||
const target = event.target as HTMLSelectElement;
|
||||
selectedVariant[modelPrefix] = target.value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves model details based on the selected variant or default.
|
||||
* @param models - Array of ModelStatus objects.
|
||||
* @param prefix - The prefix of the model group.
|
||||
* @returns The selected or default ModelStatus object.
|
||||
*/
|
||||
function getModelDetails(models: ModelStatus[], prefix: string): ModelStatus {
|
||||
return models.find((m) => m.name === selectedVariant[prefix]) || models[0];
|
||||
}
|
||||
|
||||
// Reactive statements to filter and group models based on search query
|
||||
$: filteredModels = data.models
|
||||
.filter(
|
||||
(model) =>
|
||||
!downloadedOrDownloadingModels.includes(model) &&
|
||||
model.name.toLowerCase().includes(searchQuery.toLowerCase()),
|
||||
)
|
||||
.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
// Reactive statement with models grouped by prefix
|
||||
$: groupedModels = groupModelsByPrefix(filteredModels);
|
||||
|
||||
// Reactive statement to filter models that are downloaded or downloading
|
||||
$: downloadedOrDownloadingModels = data.models
|
||||
.filter((model) => model.progress > 0 || model.available)
|
||||
.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
async function cancelDownload(modelName: string) {
|
||||
try {
|
||||
const response = await fetch(`/api/model/${modelName}/download/cancel`, {
|
||||
method: "POST",
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
console.log(`Download for ${modelName} cancelled successfully.`);
|
||||
// Update UI based on successful cancellation
|
||||
const modelIndex = data.models.findIndex((m) => m.name === modelName);
|
||||
if (modelIndex !== -1) {
|
||||
data.models[modelIndex].progress = 0;
|
||||
data.models[modelIndex].available = false;
|
||||
data.models = [...data.models]; // trigger reactivity
|
||||
}
|
||||
|
||||
// Remove model from tracking and local storage
|
||||
downloadingModels.delete(modelName);
|
||||
const currentDownloads = JSON.parse(
|
||||
localStorage.getItem("downloadingModels") || "[]",
|
||||
);
|
||||
const updatedDownloads = currentDownloads.filter(
|
||||
(model: string) => model !== modelName,
|
||||
);
|
||||
localStorage.setItem(
|
||||
"downloadingModels",
|
||||
JSON.stringify(updatedDownloads),
|
||||
);
|
||||
} else {
|
||||
console.error(`Failed to cancel download for ${modelName}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error cancelling download for ${modelName}:`, error);
|
||||
}
|
||||
function toggleBar() {
|
||||
bar_visible = !bar_visible;
|
||||
barVisible.set(bar_visible);
|
||||
}
|
||||
onDestroy(unsubscribe);
|
||||
</script>
|
||||
|
||||
<div class="ml-12 pt-1">
|
||||
<div class="search-row">
|
||||
<input
|
||||
type="text"
|
||||
bind:value={searchQuery}
|
||||
class="input input-bordered flex-grow"
|
||||
placeholder="Search models..."
|
||||
on:input={(e) => {
|
||||
const target = e.target;
|
||||
if (target instanceof HTMLInputElement) {
|
||||
updateSearch(target.value);
|
||||
}
|
||||
}}
|
||||
{#if !bar_visible}
|
||||
<button
|
||||
class="absolute p-0 top-1 left-2 md:left-16 h-10 w-10 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
|
||||
on:click={toggleBar}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="currentColor"
|
||||
class="w-4 h-4"
|
||||
>
|
||||
<path
|
||||
d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
|
||||
>
|
||||
</path>
|
||||
<path
|
||||
d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
|
||||
>
|
||||
</path>
|
||||
</svg>
|
||||
</button>
|
||||
{/if}
|
||||
<div class="flex flex-row items-center justify-center pt-5">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="24"
|
||||
height="24"
|
||||
>
|
||||
<path
|
||||
class="fill-warning"
|
||||
d="M9.504.43a1.516 1.516 0 0 1 2.437 1.713L10.415 5.5h2.123c1.57 0 2.346 1.909 1.22 3.004l-7.34 7.142a1.249 1.249 0 0 1-.871.354h-.302a1.25 1.25 0 0 1-1.157-1.723L5.633 10.5H3.462c-1.57 0-2.346-1.909-1.22-3.004L9.503.429Zm1.047 1.074L3.286 8.571A.25.25 0 0 0 3.462 9H6.75a.75.75 0 0 1 .694 1.034l-1.713 4.188 6.982-6.793A.25.25 0 0 0 12.538 7H9.25a.75.75 0 0 1-.683-1.06l2.008-4.418.003-.006a.036.036 0 0 0-.004-.009l-.006-.006-.008-.001c-.003 0-.006.002-.009.004Z"
|
||||
/>
|
||||
</svg>
|
||||
<h1 class="px-2 text-center text-3xl font-bold">Download a model</h1>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="24"
|
||||
height="24"
|
||||
>
|
||||
<path
|
||||
class="fill-warning"
|
||||
d="M9.504.43a1.516 1.516 0 0 1 2.437 1.713L10.415 5.5h2.123c1.57 0 2.346 1.909 1.22 3.004l-7.34 7.142a1.249 1.249 0 0 1-.871.354h-.302a1.25 1.25 0 0 1-1.157-1.723L5.633 10.5H3.462c-1.57 0-2.346-1.909-1.22-3.004L9.503.429Zm1.047 1.074L3.286 8.571A.25.25 0 0 0 3.462 9H6.75a.75.75 0 0 1 .694 1.034l-1.713 4.188 6.982-6.793A.25.25 0 0 0 12.538 7H9.25a.75.75 0 0 1-.683-1.06l2.008-4.418.003-.006a.036.036 0 0 0-.004-.009l-.006-.006-.008-.001c-.003 0-.006.002-.009.004Z"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
|
||||
<h1 class="pb-5 pt-2 text-center text-xl font-light">
|
||||
Make sure you have enough disk space and available RAM to run them.<br />
|
||||
7B requires about 4.5GB of free RAM, 13B requires about 12GB free, 30B requires
|
||||
about 20GB free
|
||||
</h1>
|
||||
|
||||
<div class="mx-auto w-fit">
|
||||
<RefreshModal />
|
||||
</div>
|
||||
|
||||
<div class="mt-30 mx-auto flex flex-col">
|
||||
<div class="mx-auto w-full max-w-4xl">
|
||||
<div class="divider" />
|
||||
{#each data.models as model}
|
||||
<div class="my-5 flex flex-col content-around">
|
||||
<div
|
||||
class="mx-auto flex flex-row items-center justify-center text-3xl font-semibold"
|
||||
>
|
||||
<span class="mr-2">{model.name}</span>
|
||||
{#if model.available}
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 16 16"
|
||||
width="24"
|
||||
height="24"
|
||||
>
|
||||
<path
|
||||
class="fill-info"
|
||||
d="m9.585.52.929.68c.153.112.331.186.518.215l1.138.175a2.678 2.678 0 0 1 2.24 2.24l.174 1.139c.029.187.103.365.215.518l.68.928a2.677 2.677 0 0 1 0 3.17l-.68.928a1.174 1.174 0 0 0-.215.518l-.175 1.138a2.678 2.678 0 0 1-2.241 2.241l-1.138.175a1.17 1.17 0 0 0-.518.215l-.928.68a2.677 2.677 0 0 1-3.17 0l-.928-.68a1.174 1.174 0 0 0-.518-.215L3.83 14.41a2.678 2.678 0 0 1-2.24-2.24l-.175-1.138a1.17 1.17 0 0 0-.215-.518l-.68-.928a2.677 2.677 0 0 1 0-3.17l.68-.928c.112-.153.186-.331.215-.518l.175-1.14a2.678 2.678 0 0 1 2.24-2.24l1.139-.175c.187-.029.365-.103.518-.215l.928-.68a2.677 2.677 0 0 1 3.17 0ZM7.303 1.728l-.927.68a2.67 2.67 0 0 1-1.18.489l-1.137.174a1.179 1.179 0 0 0-.987.987l-.174 1.136a2.677 2.677 0 0 1-.489 1.18l-.68.928a1.18 1.18 0 0 0 0 1.394l.68.927c.256.348.424.753.489 1.18l.174 1.137c.078.509.478.909.987.987l1.136.174a2.67 2.67 0 0 1 1.18.489l.928.68c.414.305.979.305 1.394 0l.927-.68a2.67 2.67 0 0 1 1.18-.489l1.137-.174a1.18 1.18 0 0 0 .987-.987l.174-1.136a2.67 2.67 0 0 1 .489-1.18l.68-.928a1.176 1.176 0 0 0 0-1.394l-.68-.927a2.686 2.686 0 0 1-.489-1.18l-.174-1.137a1.179 1.179 0 0 0-.987-.987l-1.136-.174a2.677 2.677 0 0 1-1.18-.489l-.928-.68a1.176 1.176 0 0 0-1.394 0ZM11.28 6.78l-3.75 3.75a.75.75 0 0 1-1.06 0L4.72 8.78a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L7 8.94l3.22-3.22a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
|
||||
/>
|
||||
</svg>
|
||||
{/if}
|
||||
</div>
|
||||
<p class="mx-auto pb-2 text-xl font-light">
|
||||
({model.size / 1e9}GB)
|
||||
</p>
|
||||
{#if model.progress}
|
||||
<div class="mx-auto my-5 w-56 justify-center">
|
||||
<p class="w-full text-center font-light">{model.progress}%</p>
|
||||
<progress
|
||||
class="progress-primary progress mx-auto h-5 w-56"
|
||||
value={model.progress}
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
{#if model.available}
|
||||
<button
|
||||
on:click={() => deleteModel(model.name)}
|
||||
class="btn-warning btn-outline btn mx-auto">Delete</button
|
||||
>
|
||||
{:else}
|
||||
<button
|
||||
on:click={() => onClick(model.name)}
|
||||
class="btn-primary btn mx-auto"
|
||||
class:model.available={() => "btn-outline"}
|
||||
disabled={model.available ||
|
||||
!!(model.progress && model.progress > 0)}
|
||||
>
|
||||
Download
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="divider" />
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="models-grid grid">
|
||||
{#each downloadedOrDownloadingModels as model}
|
||||
<div class="model card card-bordered">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">{truncateString(model.name, 24)}</h2>
|
||||
<div class="model-details">
|
||||
{#if model.progress < 100}
|
||||
<div class="progress-bar">
|
||||
<progress value={model.progress} max="100"></progress> / {model.progress}%
|
||||
</div>
|
||||
{/if}
|
||||
{#if model.progress >= 100}
|
||||
<p>Size: {(model.size / 1e9).toFixed(2)} GB</p>
|
||||
<button
|
||||
on:click={() => handleModelAction(model.name, model.available)}
|
||||
class="btn btn-error mt-2"
|
||||
>
|
||||
<Icon icon="mdi:trash" width="32" height="32" />
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
on:click={() =>
|
||||
handleModelAction(
|
||||
model.name,
|
||||
model.available,
|
||||
model.progress > 0 && model.progress < 100,
|
||||
)}
|
||||
class="btn btn-error mt-2"
|
||||
>
|
||||
<Icon icon="mdi:cancel" width="32" height="32" />
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
<div class="models-grid grid">
|
||||
{#each Object.entries(groupedModels) as [prefix, models]}
|
||||
<div class="model-group card card-bordered">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">{truncateString(prefix, 24)}</h2>
|
||||
<div class="model-details">
|
||||
{#if models.length > 1}
|
||||
<select
|
||||
class="select-bordered select w-full"
|
||||
bind:value={selectedVariant[prefix]}
|
||||
on:change={(event) => handleVariantChange(prefix, event)}
|
||||
>
|
||||
{#each models as model}
|
||||
<option value={model.name}
|
||||
>{truncateString(model.name, 32)}</option
|
||||
>
|
||||
{/each}
|
||||
</select>
|
||||
{/if}
|
||||
|
||||
{#if models.length === 1 || selectedVariant[prefix]}
|
||||
{@const model = getModelDetails(models, prefix)}
|
||||
{#if models.length === 1}
|
||||
<h3>{truncateString(model.name, 24)}</h3>
|
||||
{/if}
|
||||
<p>Size: {(model.size / 1e9).toFixed(2)} GB</p>
|
||||
<button
|
||||
on:click={() => handleModelAction(model.name, model.available)}
|
||||
class="btn btn-primary mt-2"
|
||||
>
|
||||
<Icon icon="ic:baseline-download" width="32" height="32" />
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
@ -4,7 +4,7 @@ interface ModelStatus {
|
||||
name: string;
|
||||
size: number;
|
||||
available: boolean;
|
||||
progress: number;
|
||||
progress?: number;
|
||||
}
|
||||
|
||||
export const load: PageLoad = async ({ fetch }) => {
|
||||
|
||||
@ -1,165 +0,0 @@
|
||||
<script lang="ts">
|
||||
import { onMount } from "svelte";
|
||||
import { goto } from "$app/navigation";
|
||||
let username = "";
|
||||
let secret = "";
|
||||
let full_name = "";
|
||||
let email = "";
|
||||
let auth_type = 1;
|
||||
let error = "";
|
||||
let success = "";
|
||||
|
||||
async function handleSubmit(event: Event) {
|
||||
event.preventDefault();
|
||||
error = "";
|
||||
success = "";
|
||||
const response = await fetch("/api/user/create", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
username,
|
||||
secret,
|
||||
full_name,
|
||||
email,
|
||||
auth_type,
|
||||
}),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
success = "User created successfully!";
|
||||
await authAfterCreate(event);
|
||||
goto("/account");
|
||||
} else {
|
||||
const data = await response.json();
|
||||
error = data.detail || "An error occurred";
|
||||
}
|
||||
}
|
||||
|
||||
async function authAfterCreate(event: Event) {
|
||||
event.preventDefault();
|
||||
try {
|
||||
const response = await fetch("/api/auth/token", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
body: new URLSearchParams({
|
||||
username: username,
|
||||
password: secret,
|
||||
}),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
goto("/", { invalidateAll: true });
|
||||
} else {
|
||||
const errorData = await response.json();
|
||||
error = errorData.detail || "Login failed";
|
||||
}
|
||||
} catch (err) {
|
||||
error = err instanceof Error ? err.message : "An unknown error occurred";
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<main>
|
||||
<div class="card-group">
|
||||
<div class="card">
|
||||
<div class="card-title p-3 text-3xl justify-center font-bold">
|
||||
Register a new user
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<form on:submit={handleSubmit}>
|
||||
<div class="form-control">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Username"
|
||||
bind:value={username}
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
<div class="form-control">
|
||||
<input
|
||||
type="password"
|
||||
placeholder="Password"
|
||||
bind:value={secret}
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
|
||||
{#if error}
|
||||
<p class="error-message">{error}</p>
|
||||
{/if}
|
||||
{#if success}
|
||||
<p class="success-message">{success}</p>
|
||||
{/if}
|
||||
<button class="btn" type="submit">Submit</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-title p-3 text-3xl justify-center font-bold">
|
||||
Or link an account (comming soon)
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<button name="google-btn" class="btn" disabled={true}>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path
|
||||
d="M15.545 6.558a9.4 9.4 0 0 1 .139 1.626c0 2.434-.87 4.492-2.384 5.885h.002C11.978 15.292 10.158 16 8 16A8 8 0 1 1 8 0a7.7 7.7 0 0 1 5.352 2.082l-2.284 2.284A4.35 4.35 0 0 0 8 3.166c-2.087 0-3.86 1.408-4.492 3.304a4.8 4.8 0 0 0 0 3.063h.003c.635 1.893 2.405 3.301 4.492 3.301 1.078 0 2.004-.276 2.722-.764h-.003a3.7 3.7 0 0 0 1.599-2.431H8v-3.08z"
|
||||
/>
|
||||
</svg>
|
||||
<span>Link Google Account</span>
|
||||
</button>
|
||||
<button name="reddit-btn" class="btn" disabled={true}>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path
|
||||
d="M6.167 8a.83.83 0 0 0-.83.83c0 .459.372.84.83.831a.831.831 0 0 0 0-1.661m1.843 3.647c.315 0 1.403-.038 1.976-.611a.23.23 0 0 0 0-.306.213.213 0 0 0-.306 0c-.353.363-1.126.487-1.67.487-.545 0-1.308-.124-1.671-.487a.213.213 0 0 0-.306 0 .213.213 0 0 0 0 .306c.564.563 1.652.61 1.977.61zm.992-2.807c0 .458.373.83.831.83s.83-.381.83-.83a.831.831 0 0 0-1.66 0z"
|
||||
/>
|
||||
<path
|
||||
d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0m-3.828-1.165c-.315 0-.602.124-.812.325-.801-.573-1.9-.945-3.121-.993l.534-2.501 1.738.372a.83.83 0 1 0 .83-.869.83.83 0 0 0-.744.468l-1.938-.41a.2.2 0 0 0-.153.028.2.2 0 0 0-.086.134l-.592 2.788c-1.24.038-2.358.41-3.17.992-.21-.2-.496-.324-.81-.324a1.163 1.163 0 0 0-.478 2.224q-.03.17-.029.353c0 1.795 2.091 3.256 4.669 3.256s4.668-1.451 4.668-3.256c0-.114-.01-.238-.029-.353.401-.181.688-.592.688-1.069 0-.65-.525-1.165-1.165-1.165"
|
||||
/>
|
||||
</svg>
|
||||
<span>Link Reddit Account</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-title pt-3 text-3xl justify-center font-bold">
|
||||
Already have an account?
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<button name="login-btn" class="btn" on:click={() => goto("/login")}>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
fill="currentColor"
|
||||
class="mr-3"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path
|
||||
d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m1.679-4.493-1.335 2.226a.75.75 0 0 1-1.174.144l-.774-.773a.5.5 0 0 1 .708-.708l.547.548 1.17-1.951a.5.5 0 1 1 .858.514M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
|
||||
/>
|
||||
<path
|
||||
d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
|
||||
/>
|
||||
</svg>
|
||||
<span>Login Instead</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
@ -1,5 +1,5 @@
|
||||
import adapter from "@sveltejs/adapter-static";
|
||||
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
|
||||
import { vitePreprocess } from "@sveltejs/kit/vite";
|
||||
|
||||
/** @type {import('@sveltejs/kit').Config} */
|
||||
const config = {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user