Add support for dynamic threads

2023-09-19 23:06:35 -04:00
61 changed files with 4950 additions and 6740 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -12,6 +12,7 @@ CONTRIBUTING.md
 Dockerfile
 docker-compose.yml
 docker-compose.dev.yml
+/vendor
 .vscode/

 **/node_modules/
--- a/.github/ISSUE_TEMPLATE/sweep-template.yml
+++ b/.github/ISSUE_TEMPLATE/sweep-template.yml
@ -1,15 +0,0 @@
-name: Sweep Issue
-title: 'Sweep: '
-description: For small bugs, features, refactors, and tests to be handled by Sweep, an AI-powered junior developer.
-labels: sweep
-body:
-  - type: textarea
-    id: description
-    attributes:
-      label: Details
-      description: Tell Sweep where and what to edit and provide enough context for a new developer to the codebase
-      placeholder: |
-        Unit Tests: Write unit tests for <FILE>. Test each function in the file. Make sure to test edge cases.
-        Bugs: The bug might be in <FILE>. Here are the logs: ...
-        Features: the new endpoint should use the ... class from <FILE> because it contains ... logic.
-        Refactors: We are migrating this function to ... version because ...
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@ -13,17 +13,12 @@ categories:
  - title: '📚 Documentation:'
    labels:
      - '📒 Documentation'
-  - title: '🧠 Models'
-    labels:
-      - '🧠 Models'
  - title: '🧹 Updates:'
    labels:
      - '🧹 Updates'
-  - title: '🤖 Dependencies:'
-    labels:
      - '🤖 Dependencies'
 change-template: '- $TITLE (#$NUMBER)'
-change-title-escapes: '\<*_&'
+change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
 exclude-contributors:
  - dependabot
  - dependabot[bot]
@ -43,7 +38,6 @@ version-resolver:
      - '☢️ Bug'
      - '🤖 Dependencies'
      - '🧹 Updates'
-      - '🧠 Models'
  default: patch
 template: |
    $CHANGES
@ -62,18 +56,15 @@ autolabeler:
    - '*.md'
  title:
    - '/(docs|doc:|\[doc\]|typos|comment|documentation)/i'
- label: '🧠 Models'
-  files:
-    - 'api/src/serge/data/*.json'
 - label: '☢️ Bug'
  title:
-    - '/(fix|bug|missing|correct)/i'
+    - '/(fix|race|bug|missing|correct)/i'
 - label: '🧹 Updates'
  title: 
-    - '/(improve|update|migrate|refactor|deprecated|remove|unused|test)/i'
+    - '/(improve|update|update|refactor|deprecated|remove|unused|test)/i'
 - label: '🤖 Dependencies'
  title:
    - '/(bump|dependencies)/i'
 - label: '✏️ Feature'
  title:
-    - '/(feature|feat|create|implement)/i'
+    - '/(feature|feat|create|implement|add)/i'
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,4 +1,4 @@
-name: CI Checks
+name: CI/CD Process

 on:
  push:
@ -49,10 +49,9 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v4
        with:
          python-version: "3.11"
-          cache: 'pip' # caching pip dependencies
      - name: Install dependencies with poetry
        working-directory: ./api
        run: |
@ -61,15 +60,14 @@ jobs:
      - name: Run unit tests
        working-directory: ./api
        run: |
-          poetry run python -m pytest -v --color=yes
+          poetry run python -m pytest
  check-sh-files:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: luizm/action-sh-checker@v0.8.0
+      - uses: luizm/action-sh-checker@v0.7.0
        env:
          SHFMT_OPTS: "-s"
-          SHELLCHECK_OPTS: "-P scripts/ -e SC1091"
        with:
          sh_checker_only_diff: false
          sh_checker_comment: false
@ -77,25 +75,24 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v4
        with:
          python-version: "3.11"
-          cache: 'pip' # caching pip dependencies
      - name: Run ruff check
        uses: chartboost/ruff-action@v1
        with:
          src: "./api"
-          args: "check --verbose"
-      - name: Run ruff format check
-        uses: chartboost/ruff-action@v1
+          args: "--verbose"
+      - name: Run black check
+        uses: psf/black@stable
        with:
+          options: "--check --diff --verbose"
          src: "./api"
-          args: "format --check --verbose"
  check-web-code:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@v3
        with:
          node-version: '20'
      - name: Install Web
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -1,4 +1,4 @@
-name: Docker
+name: CI/CD Docker Build/Publish

 on:
  push:
@ -58,7 +58,7 @@ jobs:
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build and Publish Docker Image
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          context: .
          push: ${{ github.event_name != 'pull_request' }}
--- a/.github/workflows/helm-test.yml
+++ b/.github/workflows/helm-test.yml
@ -1,4 +1,4 @@
-name: Helm
+name: Lint and Test Helm Chart

 on:
  push:
@ -37,17 +37,17 @@ jobs:
          fetch-depth: 0

      - name: Set up Helm
-        uses: azure/setup-helm@v4
+        uses: azure/setup-helm@v3
        with:
          version: v3.12.3

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v4
        with:
          python-version: '3.10'
          check-latest: true

      - name: Set up chart-testing
-        uses: helm/chart-testing-action@v2.6.1
+        uses: helm/chart-testing-action@v2.4.0

      - name: Run chart-testing (list-changed)
        id: list-changed
@ -63,7 +63,7 @@ jobs:

      - name: Create kind cluster
        if: steps.list-changed.outputs.changed == 'true'
-        uses: helm/kind-action@v1.10.0
+        uses: helm/kind-action@v1.8.0

      - name: Run chart-testing (install)
        if: steps.list-changed.outputs.changed == 'true'
--- a/.github/workflows/model-check.yml
+++ b/.github/workflows/model-check.yml
@ -1,4 +1,4 @@
-name: LLM Healthcheck
+name: LLM Models Healthcheck

 on:
  push:
@ -34,7 +34,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v4
        with:
          python-version: "3.11"
      - name: Install dependencies with poetry
@ -45,4 +45,4 @@ jobs:
      - name: Run model health check
        working-directory: ./api
        run: |
-          poetry run python -m pytest -v --color=yes test/healthcheck_models.py
+          poetry run python -m pytest test/healthcheck_models.py
--- a/.github/workflows/release-drafter.yml
+++ b/.github/workflows/release-drafter.yml
@ -17,6 +17,6 @@ jobs:
      pull-requests: write
    runs-on: ubuntu-latest
    steps:
-      - uses: release-drafter/release-drafter@v6
+      - uses: release-drafter/release-drafter@v5
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -12,4 +12,3 @@ api/static/*
 **/node_modules/
 **/dist
 **/.mypy_cache/
-.vscode
--- a/8
+++ b/8
@ -29,14 +29,11 @@ COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
 COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
 COPY ./api /usr/src/app/api
 COPY scripts/deploy.sh /usr/src/app/deploy.sh
-COPY scripts/serge.env /usr/src/app/serge.env
-COPY vendor/requirements.txt /usr/src/app/requirements.txt

 # Install api dependencies
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends dumb-init libgomp1 musl-dev \
+    && apt-get install -y --no-install-recommends cmake build-essential dumb-init curl \
    && pip install --no-cache-dir ./api \
-    && pip install -r /usr/src/app/requirements.txt \
    && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \
    && chmod 755 /usr/src/app/deploy.sh \
    && chmod 755 /usr/local/bin/redis-server \
@ -45,8 +42,7 @@ RUN apt-get update \
    && mkdir -p /data/db \
    && mkdir -p /usr/src/app/weights \
    && echo "appendonly yes" >> /etc/redis/redis.conf \
-    && echo "dir /data/db/" >> /etc/redis/redis.conf \ 
-    && ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
+    && echo "dir /data/db/" >> /etc/redis/redis.conf

 EXPOSE 8008
 ENTRYPOINT ["/usr/bin/dumb-init", "--"]
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@ -17,15 +17,13 @@ ENV NODE_ENV='development'

 # Install dependencies
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends dumb-init musl-dev
+    && apt-get install -y --no-install-recommends cmake build-essential dumb-init curl

 # Copy database, source code, and scripts
 COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
 COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
 COPY --from=node_base /usr/local /usr/local
 COPY scripts/dev.sh /usr/src/app/dev.sh
-COPY scripts/serge.env /usr/src/app/serge.env
-COPY vendor/requirements.txt /usr/src/app/requirements.txt
 COPY ./web/package.json ./web/package-lock.json ./

 RUN npm ci \
@ -36,8 +34,7 @@ RUN npm ci \
    && mkdir -p /data/db \
    && mkdir -p /usr/src/app/weights \
    && echo "appendonly yes" >> /etc/redis/redis.conf \
-    && echo "dir /data/db/" >> /etc/redis/redis.conf \
-    && ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
+    && echo "dir /data/db/" >> /etc/redis/redis.conf

 EXPOSE 8008
 EXPOSE 9124
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2023-present Nathan Sarrazin and Contributors
+Copyright (c) 2023 Nathan Sarrazin and contributors

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
--- a/201
+++ b/201
@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2023 Nathan Sarrazin and contributors
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/README.md
+++ b/README.md
@ -3,7 +3,7 @@
 ![License](https://img.shields.io/github/license/serge-chat/serge)
 [![Discord](https://img.shields.io/discord/1088427963801948201?label=Discord)](https://discord.gg/62Hc6FEYQH)

-Serge is a chat interface crafted with [llama.cpp](https://github.com/ggerganov/llama.cpp) for running GGUF models. No API keys, entirely self-hosted!
+Serge is a chat interface crafted with [llama.cpp](https://github.com/ggerganov/llama.cpp) for running Alpaca models. No API keys, entirely self-hosted!

 - 🌐 **SvelteKit** frontend
 - 💾 **[Redis](https://github.com/redis/redis)** for storing chat history & parameters
@ -43,24 +43,13 @@ volumes:
  datadb:
 ```

-Then, just visit http://localhost:8008, You can find the API documentation at http://localhost:8008/api/docs
+Then, just visit http://localhost:8008/, You can find the API documentation at http://localhost:8008/api/docs

-### 🌍 Environment Variables
+## 🖥️ Windows Setup

-The following Environment Variables are available:
+Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models. 

-| Variable Name         | Description                                             | Default Value                        |
-|-----------------------|---------------------------------------------------------|--------------------------------------|
-| `SERGE_DATABASE_URL`  | Database connection string                              | `sqlite:////data/db/sql_app.db`      |
-| `SERGE_JWT_SECRET`    | Key for auth token encryption. Use a random string      | `uF7FGN5uzfGdFiPzR`                   |
-| `SERGE_SESSION_EXPIRY`| Duration in minutes before a user must reauthenticate   | `60`                                 |
-| `NODE_ENV`            | Node.js running environment                   | `production`                        |
-
-## 🖥️ Windows
-
-Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
-
-## ☁️ Kubernetes
+## ☁️ Kubernetes & Docker Compose Setup

 Instructions for setting up Serge on Kubernetes can be found in the [wiki](https://github.com/serge-chat/serge/wiki/Integrating-Serge-in-your-orchestration#kubernetes-example).

@ -68,57 +57,79 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https

 | Category      | Models |
 |:-------------:|:-------|
-| **Alfred** | 40B-1023 |
-| **BioMistral** | 7B |
-| **Code** | 13B, 33B |
-| **CodeLLaMA** | 7B, 7B-Instruct, 7B-Python, 13B, 13B-Instruct, 13B-Python, 34B, 34B-Instruct, 34B-Python |
-| **Codestral** | 22B v0.1 |
-| **Gemma** | 2B, 1.1-2B-Instruct, 7B, 1.1-7B-Instruct |
-| **Gorilla** | Falcon-7B-HF-v0, 7B-HF-v1, Openfunctions-v1, Openfunctions-v2 |
-| **Falcon** | 7B, 7B-Instruct, 40B, 40B-Instruct |
-| **LLaMA 2** | 7B, 7B-Chat, 7B-Coder, 13B, 13B-Chat, 70B, 70B-Chat, 70B-OASST |
-| **LLaMA 3** | 11B-Instruct, 13B-Instruct, 16B-Instruct |
-| **LLaMA Pro** | 8B, 8B-Instruct |
-| **Med42** | 70B |
-| **Medalpaca** | 13B |
-| **Medicine** | Chat, LLM |
-| **Meditron** | 7B, 7B-Chat, 70B |
-| **Meta-LlaMA-3** | 8B, 8B-Instruct, 70B, 70B-Instruct |
-| **Mistral** | 7B-V0.1, 7B-Instruct-v0.2, 7B-OpenOrca |
-| **MistralLite** | 7B |
-| **Mixtral** | 8x7B-v0.1, 8x7B-Dolphin-2.7, 8x7B-Instruct-v0.1 |
-| **Neural-Chat** | 7B-v3.3 | 
-| **Notus** | 7B-v1 |
-| **Notux** | 8x7b-v1 |
-| **Nous-Hermes 2** | Mistral-7B-DPO, Mixtral-8x7B-DPO, Mistral-8x7B-SFT |
-| **OpenChat** | 7B-v3.5-1210 |
-| **OpenCodeInterpreter** | DS-6.7B, DS-33B, CL-7B, CL-13B, CL-70B |
-| **OpenLLaMA** | 3B-v2, 7B-v2, 13B-v2 |
-| **Orca 2** | 7B, 13B |
-| **Phi 2** | 2.7B |
-| **Phi 3** | mini-4k-instruct, medium-4k-instruct, medium-128k-instruct |
-| **Python Code** | 13B, 33B |
-| **PsyMedRP** | 13B-v1, 20B-v1 |
-| **Starling LM** | 7B-Alpha |
-| **SOLAR** | 10.7B-v1.0, 10.7B-instruct-v1.0 |
-| **TinyLlama** | 1.1B |
-| **Vicuna** | 7B-v1.5, 13B-v1.5, 33B-v1.3, 33B-Coder |
-| **WizardLM** | 2-7B, 13B-v1.2, 70B-v1.0 |
-| **Zephyr** | 3B, 7B-Alpha, 7B-Beta |
+| **Alpaca 🦙** | Alpaca-LoRA-65B, GPT4-Alpaca-LoRA-30B |
+| **Chronos 🌑**| Chronos-13B, Chronos-33B, Chronos-Hermes-13B |
+| **GPT4All 🌍**| GPT4All-13B |
+| **Koala 🐨**  | Koala-7B, Koala-13B |
+| **LLaMA 🦙**  | FinLLaMA-33B, LLaMA-Supercot-30B, LLaMA2 7B, LLaMA2 13B, LLaMA2 70B |
+| **Lazarus 💀**| Lazarus-30B |
+| **Nous 🧠**   | Nous-Hermes-13B |
+| **OpenAssistant 🎙️** | OpenAssistant-30B |
+| **Orca 🐬**   | Orca-Mini-v2-7B, Orca-Mini-v2-13B, OpenOrca-Preview1-13B |
+| **Samantha 👩**| Samantha-7B, Samantha-13B, Samantha-33B |
+| **Vicuna 🦙** | Stable-Vicuna-13B, Vicuna-CoT-7B, Vicuna-CoT-13B, Vicuna-v1.1-7B, Vicuna-v1.1-13B, VicUnlocked-30B, VicUnlocked-65B |
+| **Wizard 🧙** | Wizard-Mega-13B, WizardLM-Uncensored-7B, WizardLM-Uncensored-13B, WizardLM-Uncensored-30B, WizardCoder-Python-13B-V1.0 |

-Additional models can be requested by opening a GitHub issue. Other models are also available at [Serge Models](https://github.com/Smartappli/serge-models).
+Additional weights can be added to the `serge_weights` volume using `docker cp`:
+
+```bash
+docker cp ./my_weight.bin serge:/usr/src/app/weights/
+```

 ## ⚠️ Memory Usage

-LLaMA will crash if you don't have enough available memory for the model
+LLaMA will crash if you don't have enough available memory for the model:
+
+| Model       | Max RAM Required |
+|-------------|------------------|
+| 7B          | 4.5GB            |
+| 7B-q2_K     | 5.37GB           |
+| 7B-q3_K_L   | 6.10GB           |
+| 7B-q4_1     | 6.71GB           |
+| 7B-q4_K_M   | 6.58GB           |
+| 7B-q5_1     | 7.56GB           |
+| 7B-q5_K_M   | 7.28GB           |
+| 7B-q6_K     | 8.03GB           |
+| 7B-q8_0     | 9.66GB           |
+| 13B         | 12GB             |
+| 13B-q2_K    | 8.01GB           |
+| 13B-q3_K_L  | 9.43GB           |
+| 13B-q4_1    | 10.64GB          |
+| 13B-q4_K_M  | 10.37GB          |
+| 13B-q5_1    | 12.26GB          |
+| 13B-q5_K_M  | 11.73GB          |
+| 13B-q6_K    | 13.18GB          |
+| 13B-q8_0    | 16.33GB          |
+| 33B         | 20GB             |
+| 33B-q2_K    | 16.21GB          |
+| 33B-q3_K_L  | 19.78GB          |
+| 33B-q4_1    | 22.83GB          |
+| 33B-q4_K_M  | 22.12GB          |
+| 33B-q5_1    | 26.90GB          |
+| 33B-q5_K_M  | 25.55GB          |
+| 33B-q6_K    | 29.19GB          |
+| 33B-q8_0    | 37.06GB          |
+| 65B         | 50GB             |
+| 65B-q2_K    | 29.95GB          |
+| 65B-q3_K_L  | 37.15GB          |
+| 65B-q4_1    | 43.31GB          |
+| 65B-q4_K_M  | 41.85GB          |
+| 65B-q5_1    | 51.47GB          |
+| 65B-q5_K_M  | 48.74GB          |
+| 65B-q6_K    | 56.06GB          |
+| 65B-q8_0    | 71.87GB          |

 ## 💬 Support

 Need help? Join our [Discord](https://discord.gg/62Hc6FEYQH)

+## ⭐️ Stargazers
+
+<img src="https://starchart.cc/serge-chat/serge.svg" alt="Stargazers over time" style="max-width: 100%">
+
 ## 🧾 License

-[Nathan Sarrazin](https://github.com/nsarrazin) and [Contributors](https://github.com/serge-chat/serge/graphs/contributors). `Serge` is free and open-source software licensed under the [MIT License](https://github.com/serge-chat/serge/blob/main/LICENSE-MIT) and [Apache-2.0](https://github.com/serge-chat/serge/blob/main/LICENSE-APACHE).
+[Nathan Sarrazin](https://github.com/nsarrazin) and [Contributors](https://github.com/serge-chat/serge/graphs/contributors). `Serge` is free and open-source software licensed under the [MIT License](https://github.com/serge-chat/serge/blob/master/LICENSE).

 ## 🤝 Contributing

@ -127,32 +138,5 @@ If you discover a bug or have a feature idea, feel free to open an issue or PR.
 To run Serge in development mode:
 ```bash
 git clone https://github.com/serge-chat/serge.git
-cd serge/
-docker compose -f docker-compose.dev.yml up --build
-```
-
-The solution will accept a python debugger session on port 5678. Example launch.json for VSCode:
-
-```json
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Remote Debug",
-            "type": "python",
-            "request": "attach",
-            "connect": {
-                "host": "localhost",
-                "port": 5678
-            },
-            "pathMappings": [
-                {
-                    "localRoot": "${workspaceFolder}/api",
-                    "remoteRoot": "/usr/src/app/api/"
-                }
-            ],
-            "justMyCode": false
-        }
-    ]
-}
-```
+docker compose -f docker-compose.dev.yml up -d --build
+```
--- a/api/.dockerignore
+++ b/api/.dockerignore
@ -1,2 +1 @@
-./weights/*.bin**
-./weights/*.gguf**
+./weights/*.bin**
--- a/api/.gitignore
+++ b/api/.gitignore
@ -157,6 +157,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-
-*.db
+#.idea/
--- a/api/poetry.lock
+++ b/api/poetry.lock
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -21,29 +21,53 @@ requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

 [tool.poetry.dependencies]
-python=">=3.10,<4.0"
+python=">=3.9,<4.0"
 asyncio = "^3.4.3"
-packaging = "^24.1"
-pydantic = "^1.10.17"
-sse-starlette = "^1.8.2"
+packaging = "^23.1"
+pydantic = "^1.10.12"
+python-dotenv = "^1.0.0"
+python-multipart = "^0.0.6"
+pyyaml = "^6.0"
+rfc3986 = "^2.0.0"
+sentencepiece = "^0.1.99"
+sniffio = "^1.3.0"
+sse-starlette = "^1.6.5"
 starlette = "^0.26.1"
-typing-extensions = "^4.12.2"
-urllib3 = "^2.2.2"
+toml = "^0.10.2"
+tqdm = "^4.66.1"
+typing-extensions = "^4.8.0"
+ujson = "^5.8.0"
+urllib3 = "^2.0.4"
+uvicorn = "^0.23.2"
+uvloop = "^0.17.0"
+watchfiles = "^0.20.0"
+websockets = "^11.0"
+anyio = "^4.0.0"
+certifi = "^2023.7.22"
+charset-normalizer = "^3.2.0"
+click = "^8.1.7"
+email-validator = "^2.0.0"
 fastapi = "^0.95.1"
-huggingface-hub = "^0.24.5"
-requests = "^2.32.3"
+filelock = "^3.12.4"
+h11 = "^0.14.0"
+httpcore = "^0.18.0"
+httptools = "^0.6.0"
+huggingface-hub = "^0.16.4"
+idna = "^3.4"
+itsdangerous = "^2.1.2"
+jinja2 = "^3.1.2"
+markupsafe = "^2.1.3"
+motor = "^3.3.1"
+orjson = "^3.9.7"
+dnspython = "^2.4.2"
+lazy-model = "^0.2.0"
+requests = "^2.31.0"
+numpy = "^1.25.2"
 langchain = "^0.0.180"
 loguru = "^0.7.2"
-redis = {extras = ["hiredis"], version = "^5.0.8"}
-pytest = "^8.3.2"
-hypercorn = {extras = ["trio"], version = "^0.17.3"}
+redis = {extras = ["hiredis"], version = "^5.0.0"}
+pytest = "^7.4.2"

-pyjwt = "^2.9.0"
-python-jose = {extras = ["cryptography"], version = "^3.3.0"}
-aiofiles = "^24.1.0"
-python-multipart = "^0.0.9"
-debugpy = "^1.8.5"
-sqlalchemy = "^2.0.32"
 [tool.ruff]
 # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
 select = ["E", "F"]
@ -94,3 +118,6 @@ target-version = "py311"
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 10

+[tool.black]
+line-length = 150
+target-version = ['py311']
--- a/api/src/serge/crud.py
+++ b/api/src/serge/crud.py
@ -1,110 +0,0 @@
-import logging
-import uuid
-from typing import List, Optional
-
-from serge.schema import user as user_schema
-from serge.utils.security import get_password_hash
-from sqlalchemy.orm import Session
-
-from serge.models import user as user_model
-
-
-def get_user(db: Session, username: str) -> Optional[user_schema.User]:
-    return Mappers.user_db_to_view(
-        db.query(user_model.User).filter(user_model.User.username == username).first(),
-        include_auth=True,
-    )
-
-
-def get_user_by_email(db: Session, email: str) -> Optional[user_schema.User]:
-    return Mappers.user_db_to_view(db.query(user_model.User).filter(user_model.User.email == email).first())
-
-
-def get_users(db: Session, skip: int = 0, limit: int = 100) -> List[user_schema.User]:
-    return [Mappers.user_db_to_view(u) for u in db.query(user_model.User).offset(skip).limit(limit).all()]
-
-
-def create_user(db: Session, ua: user_schema.UserAuth) -> Optional[user_schema.User]:
-    # Check already exists
-    if get_user(db, ua.username):
-        logging.error(f"Tried to create new user, but already exists: {ua.username}")
-        return None
-
-    match ua.auth_type:
-        case 1:
-            ua.secret = get_password_hash(ua.secret)
-        case _:  # Todo: More auth types
-            return None
-
-    db_user, db_user_auth = Mappers.user_view_to_db(None, ua)
-    db.add(db_user_auth)
-    db.add(db_user)
-    db.commit()
-    return Mappers.user_db_to_view(db_user)
-
-
-def update_user(db: Session, u: user_schema.User) -> Optional[user_schema.User]:
-    user = db.query(user_model.User).filter(user_model.User.username == u.username).first()
-    if not user:
-        return None
-    for k, v in u.dict().items():
-        if k in ["auth", "chats"]:
-            continue
-        setattr(user, k, v)
-    db.commit()
-    return user
-
-
-def create_chat(db: Session, chat: user_schema.Chat):
-    c = user_model.Chat(owner=chat.owner, chat_id=chat.chat_id)
-    db.add(c)
-    db.commit()
-
-
-def remove_chat(db: Session, chat: user_schema.Chat):
-    c = db.query(user_model.Chat).filter(user_model.Chat.chat_id == chat.chat_id).one()
-    db.delete(c)
-    db.commit()
-
-
-class Mappers:
-    @staticmethod
-    def user_db_to_view(u: user_model.User, include_auth=False) -> user_schema.User:
-        if not u:
-            return None
-        auths = chats = []
-        if include_auth:
-            auths = u.auth
-        # u.auth = []
-        chats = u.chats
-        # u.chats = []
-        app_user = user_schema.User(**{k: v for k, v in u.__dict__.items() if not k.startswith("_") and k not in ["chats", "auth"]})
-
-        app_user.auth = [user_schema.UserAuth(username=u.username, secret=x.secret, auth_type=x.auth_type) for x in auths]
-
-        app_user.chats = [user_schema.Chat(chat_id=x.chat_id, owner=x.owner) for x in chats]
-
-        return app_user
-
-    @staticmethod
-    def user_view_to_db(
-        u: Optional[user_schema.User] = None, ua: Optional[user_schema.UserAuth] = None
-    ) -> (user_model.User, Optional[user_model.UserAuth]):
-        assert u or ua, "One of User or UserAuth must be passed"
-        if not u:  # Creating a new user
-            u = user_schema.User(id=uuid.uuid4(), username=ua.username)
-        auth = []
-        if ua:
-            auth = Mappers.user_auth_view_to_db(ua, u.id)
-        user = user_model.User(**u.dict())
-        if auth:
-            user.auth.append(auth)
-        for chat in u.chats:
-            user.chats.append(user_model.Chat(chat_id=chat.chat_id))
-        return (user, auth)
-
-    @staticmethod
-    def user_auth_view_to_db(ua: user_schema.UserAuth, user_id: uuid.UUID) -> user_model.UserAuth:
-        if not ua:
-            return None
-        return user_model.UserAuth(secret=ua.secret, auth_type=ua.auth_type, user_id=user_id)
--- a/api/src/serge/data/models.json
+++ b/api/src/serge/data/models.json
--- a/api/src/serge/database.py
+++ b/api/src/serge/database.py
@ -1,31 +0,0 @@
-import logging
-import uuid
-
-from serge.models.settings import Settings
-from serge.models.user import User, UserAuth
-from sqlalchemy import create_engine
-from sqlalchemy.orm import Session, sessionmaker
-
-settings = Settings()
-
-engine = create_engine(settings.SERGE_DATABASE_URL, connect_args={"check_same_thread": False})
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
-
-def seed_db(db: Session):
-    sys_u = db.query(User).filter(User.username == "system").first()
-    if sys_u:
-        return
-    system_user = User(
-        id=uuid.uuid4(),
-        username="system",
-        email="",
-        full_name="Default User",
-        theme_light=False,
-        default_prompt="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
-        is_active=True,
-        auth=[UserAuth(secret="", auth_type=0)],
-    )
-    db.add(system_user)
-    db.commit()
-    logging.info("System user created")
--- a/api/src/serge/main.py
+++ b/api/src/serge/main.py
@ -5,21 +5,18 @@ from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from loguru import logger
-from serge.database import SessionLocal, engine, seed_db
+from starlette.responses import FileResponse
+
 from serge.models.settings import Settings
-from serge.routers.auth import auth_router
 from serge.routers.chat import chat_router
 from serge.routers.model import model_router
 from serge.routers.ping import ping_router
-from serge.routers.user import user_router
-from starlette.responses import FileResponse
-
-from serge.models import user as user_models
+from serge.utils.convert import convert_all

 # Configure logging settings

 # Define a logger for the current mo
-logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
+logger.add(sys.stderr, format="{time} {level} {message}", level="DEBUG")

 settings = Settings()

@ -45,17 +42,12 @@ origins = [
    "http://localhost:9124",
 ]

-# Seed the database
-user_models.Base.metadata.create_all(bind=engine)
-
 app = FastAPI(title="Serge", version="0.0.1", description=description, tags_metadata=tags_metadata)

 api_app = FastAPI(title="Serge API")
 api_app.include_router(chat_router)
 api_app.include_router(ping_router)
 api_app.include_router(model_router)
-api_app.include_router(auth_router)
-api_app.include_router(user_router)
 app.mount("/api", api_app)

 # handle serving the frontend as static files in production
@ -92,8 +84,8 @@ async def start_database():
    for file in files:
        os.remove(WEIGHTS + file)

-    db = SessionLocal()
-    seed_db(db)
+    logger.info("initializing models")
+    convert_all("/usr/src/app/weights/", "/usr/src/app/weights/tokenizer.model")


 app.add_middleware(
--- a/api/src/serge/models/chat.py
+++ b/api/src/serge/models/chat.py
@ -14,7 +14,6 @@ class ChatParameters(BaseModel):
    # logits_all: bool
    # vocab_only: bool
    # use_mlock: bool
-    n_threads: int
    # n_batch: int
    last_n_tokens_size: int
    max_tokens: int
@ -32,5 +31,5 @@ class ChatParameters(BaseModel):
 class Chat(BaseModel):
    id: str = Field(default_factory=lambda: str(uuid4()))
    created: datetime = Field(default_factory=datetime.now)
-    owner: str = Field("system")
+
    params: ChatParameters
--- a/api/src/serge/models/settings.py
+++ b/api/src/serge/models/settings.py
@ -1,13 +1,8 @@
-from os import getenv
-
 from pydantic import BaseSettings


 class Settings(BaseSettings):
-    SERGE_DATABASE_URL: str = getenv("SERGE_DATABASE_URL", "sqlite:////data/db/sql_app.db")
    NODE_ENV: str = "development"
-    SERGE_JWT_SECRET: str = getenv("SERGE_JWT_SECRET", "uF7FGN5uzfGdFiPzR")
-    SERGE_SESSION_EXPIRY: int = getenv("SERGE_SESSION_EXPIRY", 60)

    class Config:
        orm_mode = True
--- a/api/src/serge/models/user.py
+++ b/api/src/serge/models/user.py
@ -1,40 +0,0 @@
-from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, Uuid
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
-
-Base = declarative_base()
-
-
-class User(Base):
-    __tablename__ = "users"
-
-    id = Column(Uuid, primary_key=True)
-    username = Column(String, unique=True, index=True)
-    email = Column(String)
-    full_name = Column(String)
-    theme_light = Column(Boolean)
-    default_prompt = Column(String)
-    is_active = Column(Boolean, default=True)
-
-    auth = relationship("UserAuth", back_populates="user", lazy="joined")
-    chats = relationship("Chat", back_populates="user", lazy="joined")
-
-
-class Chat(Base):
-    __tablename__ = "chats"
-
-    id = Column(Integer, primary_key=True)
-    chat_id = Column(String, index=True)
-    owner = Column(String, ForeignKey("users.username"))
-    user = relationship("User", back_populates="chats")
-
-
-class UserAuth(Base):
-    __tablename__ = "auth"
-
-    id = Column(Integer, primary_key=True)
-    secret = Column(String)
-    auth_type = Column(Integer)
-    user_id = Column(Uuid, ForeignKey("users.id"))
-
-    user = relationship("User", back_populates="auth")
--- a/api/src/serge/routers/auth.py
+++ b/api/src/serge/routers/auth.py
@ -1,108 +0,0 @@
-import logging
-from datetime import timedelta
-from typing import Optional
-
-from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
-from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
-from jose import JWTError
-from serge.crud import get_user
-from serge.database import SessionLocal
-from serge.schema.user import Token, User
-from serge.models.settings import Settings
-from serge.utils.security import create_access_token, decode_access_token, verify_password
-from sqlalchemy.orm import Session
-
-oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
-settings = Settings()
-
-auth_router = APIRouter(
-    prefix="/auth",
-    tags=["auth"],
-)
-
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-
-def authenticate_user(username: str, password: str, db: Session) -> Optional[User]:
-    user = get_user(db, username)
-    if not user:
-        return None
-    # Users may have multipe ways to authenticate
-    auths = [a.auth_type for a in user.auth]
-    if 0 in auths:  # Default user, passwordless
-        return user
-    if 1 in auths:  # Password auth
-        secret = [x for x in user.auth if x.auth_type == 1][0].secret
-        if verify_password(password, secret):
-            return user
-    if 2 in auths:  # todo future auths
-        pass
-    return False
-
-
-@auth_router.post("/token", response_model=Token)
-async def login_for_access_token(
-    response: Response,
-    form_data: OAuth2PasswordRequestForm = Depends(),
-    db: Session = Depends(get_db),
-):
-    user = authenticate_user(form_data.username, form_data.password, db)
-    if not user:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Incorrect username or password",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-    access_token_expires = timedelta(minutes=settings.SERGE_SESSION_EXPIRY)
-    access_token = create_access_token(data={"sub": user.username}, expires_delta=access_token_expires)
-    response.set_cookie(key="token", value=access_token, httponly=True, secure=True, samesite="strict")
-    return {"access_token": access_token, "token_type": "bearer"}
-
-
-@auth_router.post("/logout")
-async def logout(response: Response):
-    # Clear the token cookie by setting it to expire immediately
-    response.delete_cookie(key="token")
-    return {"message": "Logged out successfully"}
-
-
-async def get_current_user(token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)) -> User:
-    credentials_exception = HTTPException(
-        status_code=status.HTTP_401_UNAUTHORIZED,
-        detail="Could not validate credentials",
-        headers={"WWW-Authenticate": "Bearer"},
-    )
-    try:
-        username = decode_access_token(token)
-        if username is None:
-            raise credentials_exception
-    except JWTError as e:
-        logging.exception(e)
-        raise credentials_exception
-
-    user = get_user(db, username)
-
-    if user is None:
-        raise credentials_exception
-    return user
-
-
-async def get_current_active_user(request: Request, response: Response, db: Session = Depends(get_db)) -> User:
-    token = request.cookies.get("token")
-
-    if not token:
-        return get_user(db, "system")
-
-    u = None
-    try:
-        u = await get_current_user(token, db)
-    except HTTPException:
-        await logout(response)
-        u = get_user(db, "system")
-    return u
--- a/api/src/serge/routers/chat.py
+++ b/api/src/serge/routers/chat.py
@ -1,60 +1,25 @@
 import os
-from typing import Optional

-from fastapi import APIRouter, Depends, HTTPException, status
+from typing import Optional
+from fastapi import APIRouter
 from langchain.memory import RedisChatMessageHistory
-from langchain.schema import AIMessage, HumanMessage, SystemMessage, messages_to_dict
+from langchain.schema import SystemMessage, messages_to_dict, AIMessage, HumanMessage
 from llama_cpp import Llama
 from loguru import logger
 from redis import Redis
-from serge.crud import create_chat, remove_chat, update_user
-from serge.database import SessionLocal
-from serge.models.chat import Chat, ChatParameters
-from serge.routers.auth import get_current_active_user
-from serge.schema.user import Chat as UserChat
-from serge.schema.user import User
-from serge.utils.stream import get_prompt
-from sqlalchemy.orm import Session
 from sse_starlette.sse import EventSourceResponse

+from serge.models.chat import Chat, ChatParameters
+from serge.utils.stream import get_prompt
+
 chat_router = APIRouter(
    prefix="/chat",
    tags=["chat"],
 )

-unauth_error = HTTPException(
-    status_code=status.HTTP_401_UNAUTHORIZED,
-    detail="Unauthorized",
-    headers={"WWW-Authenticate": "Bearer"},
-)
-
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-
-def _try_get_chat(client, chat_id):
-    if not client.sismember("chats", chat_id):
-        raise ValueError("Chat does not exist")
-
-    chat_raw = client.get(f"chat:{chat_id}")
-    chat = Chat.parse_raw(chat_raw)
-
-    # backwards compat
-    if not hasattr(chat, "owner"):
-        chat.owner = "system"
-
-    return chat
-

@chat_router.post("/")
 async def create_new_chat(
-    u: User = Depends(get_current_active_user),
-    db: Session = Depends(get_db),
    model: str = "7B",
    temperature: float = 0.1,
    top_k: int = 50,
@ -65,12 +30,17 @@ async def create_new_chat(
    repeat_last_n: int = 64,
    repeat_penalty: float = 1.3,
    init_prompt: str = "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
-    n_threads: int = 4,
 ):
-    if not os.path.exists(f"/usr/src/app/weights/{model}.bin"):
-        raise ValueError(f"Model can't be found: /usr/src/app/weights/{model}.bin")
+    try:
+        client = Llama(
+            model_path="/usr/src/app/weights/" + model + ".bin",
+        )
+        del client
+    except Exception as exc:
+        raise ValueError(f"Model can't be found: {exc}")

    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")

    params = ChatParameters(
        model_path=model,
@ -82,20 +52,15 @@ async def create_new_chat(
        n_gpu_layers=gpu_layers,
        last_n_tokens_size=repeat_last_n,
        repeat_penalty=repeat_penalty,
-        n_threads=n_threads,
+        n_threads=len(os.sched_getaffinity(0)),
        init_prompt=init_prompt,
    )
    # create the chat
-    chat = Chat(owner=u.username, params=params)
+    chat = Chat(params=params)

    # store the parameters
    client.set(f"chat:{chat.id}", chat.json())

-    uc = UserChat(chat_id=chat.id, owner=u.username)
-    create_chat(db, uc)
-    u.chats.append(uc)
-    update_user(db, u)
-
    # create the message history
    history = RedisChatMessageHistory(chat.id)
    history.append(SystemMessage(content=init_prompt))
@ -107,11 +72,15 @@ async def create_new_chat(


@chat_router.get("/")
-async def get_all_chats(u: User = Depends(get_current_active_user)):
+async def get_all_chats():
    res = []
+    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")
+
+    ids = client.smembers("chats")

    chats = sorted(
-        [await get_specific_chat(x.chat_id, u) for x in u.chats],
+        [await get_specific_chat(id.decode()) for id in ids],
        key=lambda x: x["created"],
        reverse=True,
    )
@ -134,39 +103,47 @@ async def get_all_chats(u: User = Depends(get_current_active_user)):


@chat_router.get("/{chat_id}")
-async def get_specific_chat(chat_id: str, u: User = Depends(get_current_active_user)):
+async def get_specific_chat(chat_id: str):
    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")

-    if chat_id not in [x.chat_id for x in u.chats]:
-        raise unauth_error
+    if not client.sismember("chats", chat_id):
+        raise ValueError("Chat does not exist")

-    chat = _try_get_chat(client, chat_id)
+    chat_raw = client.get(f"chat:{chat_id}")
+    chat = Chat.parse_raw(chat_raw)

    history = RedisChatMessageHistory(chat.id)
+
    chat_dict = chat.dict()
    chat_dict["history"] = messages_to_dict(history.messages)
    return chat_dict


@chat_router.get("/{chat_id}/history")
-async def get_chat_history(chat_id: str, u: User = Depends(get_current_active_user)):
-    if chat_id not in [x.chat_id for x in u.chats]:
-        raise unauth_error
+async def get_chat_history(chat_id: str):
+    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")
+
+    if not client.sismember("chats", chat_id):
+        raise ValueError("Chat does not exist")

    history = RedisChatMessageHistory(chat_id)
    return messages_to_dict(history.messages)


@chat_router.delete("/{chat_id}/prompt")
-async def delete_prompt(chat_id: str, idx: int, u: User = Depends(get_current_active_user)):
-    if chat_id not in [x.chat_id for x in u.chats]:
-        raise unauth_error
+async def delete_prompt(chat_id: str, idx: int):
+    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")
+
+    if not client.sismember("chats", chat_id):
+        raise ValueError("Chat does not exist")

    history = RedisChatMessageHistory(chat_id)

    if idx >= len(history.messages):
-        logger.error("Unable to delete message, chat in progress")
-        raise HTTPException(status_code=202, detail="Unable to delete message, chat in progress")
+        raise ValueError("Index out of range")

    messages = history.messages.copy()[:idx]
    history.clear()
@ -178,17 +155,13 @@ async def delete_prompt(chat_id: str, idx: int, u: User = Depends(get_current_ac


@chat_router.delete("/{chat_id}")
-async def delete_chat(chat_id: str, u: User = Depends(get_current_active_user), db: Session = Depends(get_db)):
+async def delete_chat(chat_id: str):
    client = Redis(host="localhost", port=6379, decode_responses=False)
-    if chat_id not in [x.chat_id for x in u.chats]:
-        raise unauth_error
+    logger.info(f"Connected to Redis? {client.ping()}")

    if not client.sismember("chats", chat_id):
        raise ValueError("Chat does not exist")

-    if cid := next((x for x in u.chats if x.chat_id == chat_id), None):
-        remove_chat(db, cid)
-
    RedisChatMessageHistory(chat_id).clear()

    client.delete(f"chat:{chat_id}")
@ -198,25 +171,28 @@ async def delete_chat(chat_id: str, u: User = Depends(get_current_active_user),


@chat_router.delete("/delete/all")
-async def delete_all_chats(u: User = Depends(get_current_active_user), db: Session = Depends(get_db)):
-    [delete_chat(x.chat_id, u, db) for x in u.chats]
+async def delete_all_chats():
+    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")
+
+    client.flushdb()
+    client.flushall()
+
    return True


@chat_router.get("/{chat_id}/question")
-async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_current_active_user)):
-    if chat_id not in [x.chat_id for x in u.chats]:
-        raise unauth_error
-
+def stream_ask_a_question(chat_id: str, prompt: str):
    logger.info("Starting redis client")
-
    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")

    if not client.sismember("chats", chat_id):
        raise ValueError("Chat does not exist")

    logger.debug("creating chat")
-    chat = _try_get_chat(client, chat_id)
+    chat_raw = client.get(f"chat:{chat_id}")
+    chat = Chat.parse_raw(chat_raw)

    logger.debug(chat.params)
    logger.debug("creating history")
@ -232,7 +208,7 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
    logger.debug("creating Llama client")
    try:
        client = Llama(
-            model_path=f"/usr/src/app/weights/{chat.params.model_path}.bin",
+            model_path="/usr/src/app/weights/" + chat.params.model_path + ".bin",
            n_ctx=len(chat.params.init_prompt) + chat.params.n_ctx,
            n_gpu_layers=chat.params.n_gpu_layers,
            n_threads=chat.params.n_threads,
@ -262,7 +238,7 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get
                yield {"event": "message", "data": txt}

        except Exception as e:
-            if type(e) is UnicodeDecodeError:
+            if type(e) == UnicodeDecodeError:
                pass
            else:
                error = e.__str__()
@ -281,16 +257,16 @@ async def stream_ask_a_question(chat_id: str, prompt: str, u: User = Depends(get


@chat_router.post("/{chat_id}/question")
-async def ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_current_active_user)):
-    if chat_id not in [x.chat_id for x in u.chats]:
-        raise unauth_error
-
+async def ask_a_question(chat_id: str, prompt: str):
    client = Redis(host="localhost", port=6379, decode_responses=False)
+    logger.info(f"Connected to Redis? {client.ping()}")

    if not client.sismember("chats", chat_id):
        raise ValueError("Chat does not exist")

-    chat = _try_get_chat(client, chat_id)
+    chat_raw = client.get(f"chat:{chat_id}")
+    chat = Chat.parse_raw(chat_raw)
+
    history = RedisChatMessageHistory(chat.id)

    if len(prompt) > 0:
@ -302,7 +278,7 @@ async def ask_a_question(chat_id: str, prompt: str, u: User = Depends(get_curren

    try:
        client = Llama(
-            model_path=f"/usr/src/app/weights/{chat.params.model_path}.bin",
+            model_path="/usr/src/app/weights/" + chat.params.model_path + ".bin",
            n_ctx=len(chat.params.init_prompt) + chat.params.n_ctx,
            n_threads=chat.params.n_threads,
            n_gpu_layers=chat.params.n_gpu_layers,
--- a/api/src/serge/routers/model.py
+++ b/api/src/serge/routers/model.py
@ -1,12 +1,14 @@
-import asyncio
 import os
-import shutil
+import urllib.request
+import requests
+import huggingface_hub

-import aiohttp
+from typing import Annotated

-from fastapi import APIRouter, HTTPException
-from huggingface_hub import hf_hub_url
+from fastapi import APIRouter, HTTPException, Form
 from serge.models.models import Families
+from serge.utils.convert import convert_one_file
+from serge.utils.migrate import migrate

 from pathlib import Path

@ -15,8 +17,6 @@ model_router = APIRouter(
    tags=["model"],
 )

-active_downloads = {}
-
 WEIGHTS = "/usr/src/app/weights/"

 models_file_path = Path(__file__).parent.parent / "data" / "models.json"
@ -34,212 +34,144 @@ for family in families.__root__:
            )


-# Helper functions
-async def is_model_installed(model_name: str) -> bool:
-    installed_models = await list_of_installed_models()
-    return any(file_name == f"{model_name}.bin" and not file_name.startswith(".") for file_name in installed_models)
+@model_router.post("/refresh")
+async def refresh_models(url: Annotated[str, Form()]):
+    """
+    Refreshes the list of models available for download.
+    """
+    global models_info
+
+    r = requests.get(url)
+
+    if not r.ok:
+        raise HTTPException(status_code=500, detail="Could not refresh models using the link provided.")
+
+    families = Families.parse_obj(r.json())
+
+    models_info = {}
+    for family in families.__root__:
+        for model in family.models:
+            for file in model.files:
+                models_info[model.name] = (
+                    model.repo,
+                    file.filename,
+                    file.disk_space,
+                )
+
+    return


-async def get_file_size(file_path: str) -> int:
-    return os.stat(file_path).st_size
-
-
-async def cleanup_model_resources(model_name: str):
-    model_repo, _, _ = models_info.get(model_name, (None, None, None))
-    if not model_repo:
-        print(f"No model repo found for {model_name}, cleanup may be incomplete.")
-        return
-
-    temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
-    lock_dir = os.path.join(WEIGHTS, ".locks", f"models--{model_repo.replace('/', '--')}")
-    cache_dir = os.path.join(WEIGHTS, f"models--{model_repo.replace('/', '--')}")
-
-    # Try to cleanup temporary file if it exists
-    if os.path.exists(temp_model_path):
-        try:
-            os.remove(temp_model_path)
-        except OSError as e:
-            print(f"Error removing temporary file for {model_name}: {e}")
-
-    # Remove lock file if it exists
-    if os.path.exists(lock_dir):
-        try:
-            shutil.rmtree(lock_dir)
-        except OSError as e:
-            print(f"Error removing lock directory for {model_name}: {e}")
-
-    # Remove cache directory if it exists
-    if os.path.exists(cache_dir):
-        try:
-            shutil.rmtree(cache_dir)
-        except OSError as e:
-            print(f"Error removing cache directory for {model_name}: {e}")
-
-
-async def download_file(session: aiohttp.ClientSession, url: str, path: str) -> None:
-    async with session.get(url) as response:
-        if response.status != 200:
-            raise HTTPException(status_code=500, detail="Error downloading model")
-
-        # Write response content to file asynchronously
-        with open(path, "wb") as f:
-            while True:
-                chunk = await response.content.read(1024)
-                if not chunk:
-                    break
-                f.write(chunk)
-
-
-# Handlers
@model_router.get("/all")
 async def list_of_all_models():
+    res = []
    installed_models = await list_of_installed_models()
-    resp = []
-
    for model in models_info.keys():
-        if await is_model_installed(model):
+        progress = await download_status(model)
+        if f"{model}.bin" in installed_models:
            available = True
+            # if model exists in WEIGHTS directory remove it from the list
            installed_models.remove(f"{model}.bin")
        else:
            available = False
-        resp.append(
+        res.append(
            {
                "name": model,
                "size": models_info[model][2],
                "available": available,
-                "progress": await download_status(model),
+                "progress": progress,
            }
        )
+    # append the rest of the models
    for model in installed_models:
-        resp.append(
+        # .bin is removed for compatibility with generate.py
+        res.append(
            {
                "name": model.replace(".bin", "").lstrip("/"),
-                "size": await get_file_size(WEIGHTS + model),
+                "size": os.stat(WEIGHTS + model).st_size,
                "available": True,
-                "progress": 100.0,
+                "progress": None,
            }
        )
-    return resp
+
+    return res
+
+
+@model_router.get("/downloadable")
+async def list_of_downloadable_models():
+    files = os.listdir(WEIGHTS)
+    files = list(filter(lambda x: x.endswith(".bin"), files))
+
+    installed_models = [i.rstrip(".bin") for i in files]
+
+    return list(filter(lambda x: x not in installed_models, models_info.keys()))


@model_router.get("/installed")
 async def list_of_installed_models():
-    # Iterate through the WEIGHTS directory and return filenames that end with .bin and do not start with a dot
+    # after iterating through the WEIGHTS directory, return location and filename
    files = [
-        os.path.join(model_location.replace(WEIGHTS, "").lstrip("/"), bin_file)
-        for model_location, _, filenames in os.walk(WEIGHTS)
+        model_location.replace(WEIGHTS, "") + "/" + bin_file
+        for model_location, directory, filenames in os.walk(WEIGHTS)
        for bin_file in filenames
-        if bin_file.endswith(".bin") and not bin_file.startswith(".")
+        if os.path.splitext(bin_file)[1] == ".bin"
    ]
+    files = [i.lstrip("/") for i in files]
    return files


@model_router.post("/{model_name}/download")
-async def download_model(model_name: str):
-    if model_name not in models_info:
+def download_model(model_name: str):
+    models = list(models_info.keys())
+    if model_name not in models:
        raise HTTPException(status_code=404, detail="Model not found")

-    try:
-        model_repo, filename, _ = models_info[model_name]
-        model_url = hf_hub_url(repo_id=model_repo, filename=filename)
-        temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
-        model_path = os.path.join(WEIGHTS, f"{model_name}.bin")
+    if not os.path.exists(WEIGHTS + "tokenizer.model"):
+        print("Downloading tokenizer...")
+        url = huggingface_hub.hf_hub_url(
+            "nsarrazin/alpaca",
+            "alpaca-7B-ggml/tokenizer.model",
+            repo_type="model",
+            revision="main",
+        )
+        urllib.request.urlretrieve(url, WEIGHTS + "tokenizer.model")

-        # Create an aiohttp session with timeout settings
-        timeout = aiohttp.ClientTimeout(total=None, connect=300, sock_read=300)
-        async with aiohttp.ClientSession(timeout=timeout) as session:
-            # Start the download and add to active_downloads
-            download_task = asyncio.create_task(download_file(session, model_url, temp_model_path))
-            active_downloads[model_name] = download_task
-            await download_task
+    repo_id, filename, _ = models_info[model_name]

-        # Rename the dotfile to its final name
-        os.rename(temp_model_path, model_path)
+    print(f"Downloading {model_name} model from {repo_id}...")
+    url = huggingface_hub.hf_hub_url(repo_id, filename, repo_type="model", revision="main")
+    urllib.request.urlretrieve(url, WEIGHTS + f"{model_name}.bin.tmp")

-        # Remove the entry from active_downloads after successful download
-        active_downloads.pop(model_name, None)
+    os.rename(WEIGHTS + f"{model_name}.bin.tmp", WEIGHTS + f"{model_name}.bin")
+    convert_one_file(WEIGHTS + f"{model_name}.bin", WEIGHTS + "tokenizer.model")
+    migrate(WEIGHTS + f"{model_name}.bin")

-        return {"message": f"Model {model_name} downloaded"}
-    except asyncio.CancelledError:
-        await cleanup_model_resources(model_name)
-        raise HTTPException(status_code=200, detail="Download cancelled")
-    except Exception as exc:
-        await cleanup_model_resources(model_name)
-        raise HTTPException(status_code=500, detail=f"Error downloading model: {exc}")
-
-
-@model_router.post("/{model_name}/download/cancel")
-async def cancel_download(model_name: str):
-    try:
-        task = active_downloads.get(model_name)
-        if not task:
-            raise HTTPException(status_code=404, detail="No active download for this model")
-
-        # Remove the entry from active downloads after cancellation
-        task.cancel()
-
-        # Remove entry from active downloads
-        active_downloads.pop(model_name, None)
-
-        # Wait for the task to be cancelled
-        try:
-            # Wait for the task to respond to cancellation
-            print(f"Waiting for download for {model_name} to be cancelled")
-            await task
-        except asyncio.CancelledError:
-            # Handle the expected cancellation exception
-            pass
-
-        # Cleanup resources
-        await cleanup_model_resources(model_name)
-
-        print(f"Download for {model_name} cancelled")
-        return {"message": f"Download for {model_name} cancelled"}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error cancelling model download: {str(e)}")
+    return {"message": f"Model {model_name} downloaded"}


@model_router.get("/{model_name}/download/status")
 async def download_status(model_name: str):
-    if model_name not in models_info:
+    models = list(models_info.keys())
+
+    if model_name not in models:
        raise HTTPException(status_code=404, detail="Model not found")

    filesize = models_info[model_name][2]
-    model_repo, _, _ = models_info[model_name]

-    # Construct the path to the blobs directory
-    temp_model_path = os.path.join(WEIGHTS, f".{model_name}.bin")
-    model_path = os.path.join(WEIGHTS, f"{model_name}.bin")
+    bin_path = WEIGHTS + f"{model_name}.bin.tmp"

-    # Check if the model is currently being downloaded
-    task = active_downloads.get(model_name)
-
-    if os.path.exists(model_path):
-        currentsize = os.path.getsize(model_path)
-        progress = min(round(currentsize / filesize * 100, 1), 100)
-        return progress
-    elif task and not task.done():
-        # If the task is still running, check for incomplete files
-        if os.path.exists(temp_model_path):
-            currentsize = os.path.getsize(temp_model_path)
-            return min(round(currentsize / filesize * 100, 1), 100)
-        # If temp_model_path doesn't exist, the download is likely just starting, progress is 0
-        return 0
-    else:
-        # No active download and the file does not exist
-        return None
+    if os.path.exists(bin_path):
+        currentsize = os.path.getsize(bin_path)
+        return min(round(currentsize / filesize * 100, 1), 100)
+    return None


@model_router.delete("/{model_name}")
 async def delete_model(model_name: str):
-    if f"{model_name}.bin" not in await list_of_installed_models():
+    if model_name + ".bin" not in await list_of_installed_models():
        raise HTTPException(status_code=404, detail="Model not found")

-    try:
-        os.remove(os.path.join(WEIGHTS, f"{model_name}.bin"))
-    except OSError as e:
-        print(f"Error removing model file: {e}")
+    if os.path.exists(WEIGHTS + f"{model_name}.bin"):
+        os.remove(WEIGHTS + f"{model_name}.bin")
+        return {"message": f"Model {model_name} deleted"}

-    await cleanup_model_resources(model_name)
-
-    return {"message": f"Model {model_name} deleted"}
+    raise HTTPException(status_code=404, detail="Model file not found")
--- a/api/src/serge/routers/user.py
+++ b/api/src/serge/routers/user.py
@ -1,63 +0,0 @@
-import logging
-
-from fastapi import APIRouter, Depends, HTTPException, status
-from serge.crud import create_user, update_user
-from serge.database import SessionLocal
-from serge.routers.auth import get_current_active_user
-from serge.schema import user as user_schema
-from sqlalchemy.orm import Session
-
-user_router = APIRouter(
-    prefix="/user",
-    tags=["user"],
-)
-
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-
-@user_router.get("/", response_model=user_schema.User)
-async def get_user(u: user_schema.User = Depends(get_current_active_user)):
-    if not u:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Incorrect username or password",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-    return u.to_public_dict()
-
-
-@user_router.post("/create", response_model=user_schema.User)
-async def create_user_with_pass(ua: user_schema.UserAuth, db: Session = Depends(get_db)):
-    try:
-        u = create_user(db, ua)
-    except Exception as e:
-        logging.exception(e)
-        raise HTTPException(
-            status_code=status.HTTP_409_CONFLICT,
-            detail=f"Failed to create. {e}",
-        )
-    if not u:
-        raise HTTPException(
-            status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
-            detail="Could not create user",
-        )
-    return u.to_public_dict()
-
-
-@user_router.put("/", response_model=user_schema.User)
-async def self_update_user(
-    new_data: user_schema.User,
-    current: user_schema.User = Depends(get_current_active_user),
-    db: Session = Depends(get_db),
-):
-    current.email = new_data.email
-    current.full_name = new_data.full_name
-    current.default_prompt = new_data.default_prompt
-    update_user(db, current)
-    return current.to_public_dict()
--- a/api/src/serge/schema/schema.json
+++ b/api/src/serge/schema/schema.json
@ -73,35 +73,16 @@
        "Name": {
            "type": "string",
            "enum": [
-                "fp16",
-                "iq1_M",
-                "iq1_S",
-                "iq2_M",
-                "iq2_S",
-                "iq2_XS",
-                "iq2_XXS",
-                "iq3_M",
-                "iq3_S",
-                "iq3_XS",
-                "iq3_XXS",
-                "iq4_NL",
-                "iq4_XS",
                "q2_K",
                "q3_K_L",
-                "q3_K_M",
-                "q3_K_S",
                "q4_0",
                "q4_1",
                "q4_K_M",
-                "q4_K_S",
                "q5_0",
                "q5_1",
                "q5_K_M",
-                "q5_K_S",
                "q6_K",
-                "q8_0",
-                "q8_1",
-                "q8_K"
+                "q8_0"
            ],
            "title": "Name"
        }
--- a/api/src/serge/schema/user.py
+++ b/api/src/serge/schema/user.py
@ -1,42 +0,0 @@
-import uuid
-
-from pydantic import BaseModel
-
-
-class UserBase(BaseModel):
-    username: str
-
-
-class UserAuth(UserBase):
-    secret: str
-    auth_type: int
-
-
-class Chat(BaseModel):
-    chat_id: str
-    owner: str
-
-
-class User(UserBase):
-    id: uuid.UUID
-    is_active: bool = True
-    email: str = ""
-    full_name: str = ""
-    theme_light: bool = False
-    default_prompt: str = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
-    auth: list[UserAuth] = []
-    chats: list[Chat] = []
-
-    class Config:
-        orm_mode = True
-
-    def to_public_dict(self):
-        user_dict = self.dict()
-        for auth in user_dict["auth"]:
-            auth["secret"] = "********"
-        return user_dict
-
-
-class Token(BaseModel):
-    access_token: str
-    token_type: str
--- a/api/src/serge/utils/convert.py
+++ b/api/src/serge/utils/convert.py
@ -0,0 +1,127 @@
+# code from @eiz
+# Thanks! See the relevant comment here:
+# https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818
+
+#!/usr/bin/env python3
+import argparse
+import glob
+import os
+import struct
+import sys
+
+from sentencepiece import SentencePieceProcessor
+
+from serge.utils.migrate import migrate
+
+HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Upgrade old ggml model files to the current format")
+    parser.add_argument("dir_model", help="directory containing ggml .bin files")
+    parser.add_argument("tokenizer_model", help="path to LLaMA tokenizer.model file")
+    return parser.parse_args()
+
+
+def read_header(f_in):
+    struct_fmt = "i" * (3 + len(HPARAMS))
+    struct_size = struct.calcsize(struct_fmt)
+    buf = f_in.read(struct_size)
+    return struct.unpack(struct_fmt, buf)
+
+
+def write_header(f_out, header):
+    (magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header
+
+    if magic != 0x67676D6C:
+        raise Exception("Invalid file magic. Must be an old style ggml file.")
+
+    values = [
+        0x67676D66,  # magic: ggml in hex
+        1,  # file version
+        vocab_size,
+        dim,
+        multiple_of,
+        n_heads,
+        n_layers,
+        rot,
+        ftype,
+    ]
+    f_out.write(struct.pack("i" * len(values), *values))
+
+
+def write_tokens(fout, tokenizer):
+    for i in range(tokenizer.vocab_size()):
+        if tokenizer.is_unknown(i):
+            text = " \u2047 ".encode()
+        elif tokenizer.is_control(i):
+            text = b""
+        elif tokenizer.is_byte(i):
+            piece = tokenizer.id_to_piece(i)
+            if len(piece) != 6:
+                print(f"Invalid token: {piece}")
+                sys.exit(1)
+            byte_value = int(piece[3:-1], 16)
+            text = struct.pack("B", byte_value)
+        else:
+            text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
+        fout.write(struct.pack("i", len(text)))
+        fout.write(text)
+        fout.write(struct.pack("f", tokenizer.get_score(i)))
+
+
+def read_tokens(f_in, tokenizer):
+    for i in range(tokenizer.vocab_size()):
+        len_b = f_in.read(4)
+        (length,) = struct.unpack("i", len_b)
+        f_in.read(length)
+
+
+def copy_all_data(f_out, f_in):
+    while True:
+        buf = f_in.read(1024 * 1024)
+        if not buf:
+            break
+        f_out.write(buf)
+
+
+def convert_one_file(path_in, tokenizer):
+    path_tmp = f"{path_in}.tmp"
+
+    print(f"converting {path_in}")
+    try:
+        with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out:
+            write_header(f_out, read_header(f_in))
+            read_tokens(f_in, tokenizer)
+            write_tokens(f_out, tokenizer)
+            copy_all_data(f_out, f_in)
+    except Exception:
+        print(f"File {path_in} already converted")
+    else:
+        os.rename(path_in, path_in + ".old")
+        os.rename(path_tmp, path_in)
+
+    try:
+        os.remove(path_tmp)
+    except OSError:
+        pass
+
+
+def convert_all(dir_model: str, tokenizer_model: str):
+    files = []
+    files.extend(glob.glob(f"{dir_model}/*.bin"))
+    files.extend(glob.glob(f"{dir_model}/*.bin"))
+
+    try:
+        tokenizer = SentencePieceProcessor(tokenizer_model)
+        for file in files:
+            print("Converting file: ", file)
+            convert_one_file(file, tokenizer)
+            migrate(file)
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    convert_all(args.dir_model, args.tokenizer_model)
--- a/api/src/serge/utils/llm.py
+++ b/api/src/serge/utils/llm.py
@ -0,0 +1,245 @@
+"""Wrapper around llama.cpp."""
+from typing import Any
+
+from langchain.llms.base import LLM
+from pydantic import Extra, Field, root_validator
+
+
+class LlamaCpp(LLM):
+    """Wrapper around the llama.cpp model.
+
+    To use, you should have the llama-cpp-python library installed, and provide the
+    path to the Llama model as a named parameter to the constructor.
+    Check out: https://github.com/abetlen/llama-cpp-python
+
+    Example:
+        .. code-block:: python
+
+            from langchain.llms import LlamaCppEmbeddings
+            llm = LlamaCppEmbeddings(model_path="/path/to/llama/model")
+    """
+
+    client: Any  #: :meta private:
+    model_path: str
+    """The path to the Llama model file."""
+
+    n_ctx: int = Field(2048, alias="n_ctx")
+    """Token context window."""
+
+    n_gpu_layers: int = Field(0, alias="n_gpu_layers")
+    """The number of layers to put on the GPU. The rest will be on the CPU."""
+
+    n_parts: int = Field(-1, alias="n_parts")
+    """Number of parts to split the model into. 
+    If -1, the number of parts is automatically determined."""
+
+    seed: int = Field(-1, alias="seed")
+    """Seed. If -1, a random seed is used."""
+
+    f16_kv: bool = Field(False, alias="f16_kv")
+    """Use half-precision for key/value cache."""
+
+    logits_all: bool = Field(False, alias="logits_all")
+    """Return logits for all tokens, not just the last token."""
+
+    vocab_only: bool = Field(False, alias="vocab_only")
+    """Only load the vocabulary, no weights."""
+
+    use_mlock: bool = Field(False, alias="use_mlock")
+    """Force system to keep model in RAM."""
+
+    n_threads: int | None = Field(None, alias="n_threads")
+    """Number of threads to use. 
+    If None, the number of threads is automatically determined."""
+
+    n_batch: int | None = Field(8, alias="n_batch")
+    """Number of tokens to process in parallel.
+    Should be a number between 1 and n_ctx."""
+
+    max_tokens: int | None = 2048
+    """The maximum number of tokens to generate."""
+
+    temperature: float | None = 0.8
+    """The temperature to use for sampling."""
+
+    top_p: float | None = 0.95
+    """The top-p value to use for sampling."""
+
+    logprobs: int | None = Field(None)
+    """The number of logprobs to return. If None, no logprobs are returned."""
+
+    echo: bool | None = False
+    """Whether to echo the prompt."""
+
+    stop_sequences: list[str] | None = []
+    """A list of strings to stop generation when encountered."""
+
+    repeat_penalty: float | None = 1.1
+    """The penalty to apply to repeated tokens."""
+
+    top_k: int | None = 40
+    """The top-k value to use for sampling."""
+
+    last_n_tokens_size: int | None = 64
+    """The number of tokens to look back when applying the repeat_penalty."""
+
+    streaming: bool = False
+
+    class Config:
+        extra = Extra.ignore
+
+    @root_validator()
+    def validate_environment(cls, values: dict) -> dict:
+        """Validate that llama-cpp-python library is installed."""
+        model_path = values["model_path"]
+
+        try:
+            pass
+
+        except ImportError:
+            raise ModuleNotFoundError(
+                "Could not import llama-cpp-python library. "
+                "Please install the llama-cpp-python library to "
+                "use this embedding model: pip install llama-cpp-python"
+            )
+        except Exception:
+            raise NameError(f"Could not load Llama model from path: {model_path}")
+
+        return values
+
+    @property
+    def _default_params(self) -> dict[str, Any]:
+        """Get the default parameters for calling llama_cpp."""
+        return {
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "logprobs": self.logprobs,
+            "echo": self.echo,
+            "stop_sequences": self.stop_sequences,
+            "repeat_penalty": self.repeat_penalty,
+            "top_k": self.top_k,
+            "n_ctx": self.n_ctx,
+            "n_gpu_layers": self.n_gpu_layers,
+            "n_parts": self.n_parts,
+            "seed": self.seed,
+            "f16_kv": self.f16_kv,
+            "logits_all": self.logits_all,
+            "vocab_only": self.vocab_only,
+            "use_mlock": self.use_mlock,
+            "n_batch": self.n_batch,
+            "last_n_tokens_size": self.last_n_tokens_size,
+            "streaming": self.streaming,
+        }
+
+    @property
+    def _identifying_params(self) -> dict[str, Any]:
+        """Get the identifying parameters."""
+        return {**{"model_path": self.model_path}, **self._default_params}
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "llama.cpp"
+
+    def _call(self, prompt: str, stop: list[str] | None = None) -> str:
+        """Call the Llama model and return the output.
+
+        Args:
+            prompt: The prompt to use for generation.
+            stop: A list of strings to stop generation when encountered.
+
+        Returns:
+            The generated text.
+
+        Example:
+            .. code-block:: python
+
+                from langchain.llms import LlamaCppEmbeddings
+                llm = LlamaCppEmbeddings(model_path="/path/to/local/llama/model.bin")
+                llm("This is a prompt.")
+        """
+        from llama_cpp import Llama
+
+        params = self._identifying_params
+        client = Llama(
+            model_path="/usr/src/app/weights/" + self.model_path + ".bin",
+            n_ctx=self.n_ctx,
+            n_gpu_layers=self.n_gpu_layers,
+            n_parts=self.n_parts,
+            seed=self.seed,
+            f16_kv=self.f16_kv,
+            logits_all=self.logits_all,
+            vocab_only=self.vocab_only,
+            use_mlock=self.use_mlock,
+            n_threads=self.n_threads,
+            n_batch=self.n_batch,
+            last_n_tokens_size=self.last_n_tokens_size,
+        )
+
+        if self.stop_sequences and stop is not None:
+            raise ValueError("`stop_sequences` found in both the input and default params.")
+        elif self.stop_sequences:
+            params["stop_sequences"] = self.stop_sequences
+        else:
+            params["stop_sequences"] = []
+
+        if self.streaming:
+            response = ""
+            stream = client(
+                prompt=prompt,
+                max_tokens=params["max_tokens"],
+                temperature=params["temperature"],
+                top_p=params["top_p"],
+                logprobs=params["logprobs"],
+                echo=params["echo"],
+                stop=params["stop_sequences"],
+                repeat_penalty=params["repeat_penalty"],
+                top_k=params["top_k"],
+                stream=True,
+            )
+            for stream_resp in stream:
+                try:
+                    token = stream_resp["choices"][0]["text"]
+                except BaseException:
+                    token = ""
+
+                response += token
+
+                self.callback_manager.on_llm_new_token(token, verbose=self.verbose)
+            return response
+
+        else:
+            """Call the Llama model and return the output."""
+            output = client(
+                prompt=prompt,
+                max_tokens=params["max_tokens"],
+                temperature=params["temperature"],
+                top_p=params["top_p"],
+                logprobs=params["logprobs"],
+                echo=params["echo"],
+                stop=params["stop_sequences"],
+                repeat_penalty=params["repeat_penalty"],
+                top_k=params["top_k"],
+            )
+            text = output["choices"][0]["text"]
+
+            return text
+
+
+if __name__ == "__main__":
+    from langchain.callbacks.base import CallbackManager
+
+    from serge.utils.stream import ChainRedisHandler
+
+    llm = LlamaCpp(
+        streaming=True,
+        model_path="gpt4all",
+        callback_manager=CallbackManager([ChainRedisHandler("1")]),
+        verbose=True,
+        temperature=0.1,
+        max_tokens=128,
+    )
+
+    input()
+    resp = llm("Write a paragraph about France please.")
--- a/api/src/serge/utils/migrate.py
+++ b/api/src/serge/utils/migrate.py
@ -0,0 +1,308 @@
+# Migrate ggml file(s) with ggmf magic to ggml file with ggjt magic
+#
+# We caused a breaking change to the file format on 2023-03-30 in:
+#     https://github.com/ggerganov/llama.cpp/pull/613
+#
+# (1) If you still have the Meta LLaMA .pth files, then close this
+#     file now; you can just run `convert-pth-to-ggml.py` again to
+#     migrate to the new format. The tool is easier to use too. It
+#     isn't necessary anymore to manage split output files because
+#     the new format always combines things into a single file.
+#
+# (2) If you deleted the Meta LLaMA .pth files due to save on disk
+#     space, then this tool is intended to help you.  Please check
+#     out the instructions below.
+#
+# USAGE
+#
+#     python migrate-ggml-2023-03-30-pr613.py INPUT OUTPUT
+#
+# PREREQUISITES
+#
+#     pip install numpy
+#     cd llama.cpp
+#     make -j4
+#
+# EXAMPLE (7B MODEL)
+#
+#     # you can replace all the 'f16' with 'q4_0' if you're using quantized weights
+#     python migrate-ggml-2023-03-30-pr613.py models/7B/ggml-model-f16.bin models/7B/ggml-model-f16-ggjt.bin
+#
+#     # check that it works
+#     ./main -m models/7B/ggml-model-f16-ggjt.bin -p 'Question: Do you love me?'
+#
+#     # you can delete the old files
+#     rm -f models/7B/ggml-model-f16.bin
+#     mv models/7B/ggml-model-f16-ggjt.bin models/7B/ggml-model-f16.bin
+#
+# EXAMPLE (13B MODEL)
+#
+#     # you can replace all the 'f16' with 'q4_0' if you're using quantized weights
+#     python migrate-ggml-2023-03-30-pr613.py models/13B/ggml-model-f16.bin models/13B/ggml-model-f16-ggjt.bin
+#
+#     # check that it works
+#     ./main -m models/13B/ggml-model-f16-ggjt.bin -p 'Question: Do you love me?'
+#
+#     # you can delete the old files
+#     rm -f models/13B/ggml-model-f16.bin*
+#     mv models/13B/ggml-model-f16-ggjt.bin models/13B/ggml-model-f16.bin
+#
+
+import os
+import struct
+
+
+QK = 32
+
+GGML_TYPE_Q4_0 = 0
+GGML_TYPE_Q4_1 = 1
+GGML_TYPE_I8 = 2
+GGML_TYPE_I16 = 3
+GGML_TYPE_I32 = 4
+GGML_TYPE_F16 = 5
+GGML_TYPE_F32 = 6
+
+WTYPE_NAMES = {
+    0: "F32",
+    1: "F16",
+    2: "Q4_0",
+    3: "Q4_1",
+}
+
+WTYPES = {
+    0: GGML_TYPE_F32,
+    1: GGML_TYPE_F16,
+    2: GGML_TYPE_Q4_0,
+    3: GGML_TYPE_Q4_1,
+}
+
+GGML_BLCK_SIZE = {
+    GGML_TYPE_Q4_0: QK,
+    GGML_TYPE_Q4_1: QK,
+    GGML_TYPE_I8: 1,
+    GGML_TYPE_I16: 1,
+    GGML_TYPE_I32: 1,
+    GGML_TYPE_F16: 1,
+    GGML_TYPE_F32: 1,
+}
+
+GGML_TYPE_SIZE = {
+    GGML_TYPE_Q4_0: 4 + QK // 2,
+    GGML_TYPE_Q4_1: 4 * 2 + QK // 2,
+    GGML_TYPE_I8: 1,
+    GGML_TYPE_I16: 2,
+    GGML_TYPE_I32: 4,
+    GGML_TYPE_F16: 2,
+    GGML_TYPE_F32: 4,
+}
+
+HPARAMS = [
+    "magic",  # int32
+    "version",  # int32
+    "n_vocab",  # int32
+    "n_embd",  # int32
+    "n_mult",  # int32
+    "n_head",  # int32
+    "n_layer",  # int32
+    "n_rot",  # int32
+    "f16",  # int32
+]
+
+
+def read_hparams(fin):
+    struct_fmt = "i" * len(HPARAMS)
+    struct_size = struct.calcsize(struct_fmt)
+    buf = fin.read(struct_size)
+    ints = struct.unpack(struct_fmt, buf)
+    hparams = dict(zip(HPARAMS, ints))
+    return hparams
+
+
+def write_hparams(fout, hparams):
+    struct_fmt = "i" * len(HPARAMS)
+    struct.calcsize(struct_fmt)
+    ints = [hparams[h] for h in HPARAMS]
+    fout.write(struct.pack(struct_fmt, *ints))
+
+
+def read_tokens(fin, hparams):
+    tokens = []
+    for i in range(hparams["n_vocab"]):
+        len_b = fin.read(4)
+        (length,) = struct.unpack("i", len_b)
+        word = fin.read(length)
+        score_b = fin.read(4)
+        (score,) = struct.unpack("f", score_b)
+        tokens.append((word, score))
+    return tokens
+
+
+def write_tokens(fout, tokens):
+    for word, score in tokens:
+        fout.write(struct.pack("i", len(word)))
+        fout.write(word)
+        fout.write(struct.pack("f", score))
+
+
+def ggml_nelements(shape):
+    r = 1
+    for i in shape:
+        r *= i
+    return r
+
+
+def ggml_nbytes(shape, ftype):
+    x = ggml_nelements(shape)
+    t = WTYPES[ftype]
+    x *= GGML_TYPE_SIZE[t]
+    x //= GGML_BLCK_SIZE[t]
+    return x
+
+
+def copy_tensors(fin, fout, part_id, n_parts):
+    while True:
+        b = fin.read(4)
+        if not b:
+            break
+        (n_dims,) = struct.unpack("i", b)
+        b = fin.read(4)
+        (length,) = struct.unpack("i", b)
+        b = fin.read(4)
+        (ftype,) = struct.unpack("i", b)
+
+        assert n_dims in (1, 2)
+
+        partshape = list(range(n_dims))
+        for i in range(n_dims):
+            b = fin.read(4)
+            partshape[i] = struct.unpack("i", b)[0]
+        partshape = list(reversed(partshape))
+
+        name = fin.read(length)
+        data = fin.read(ggml_nbytes(partshape, ftype))
+
+        blck_size = GGML_BLCK_SIZE[WTYPES[ftype]]
+        type_size = GGML_TYPE_SIZE[WTYPES[ftype]]
+
+        print(f"Processing tensor {name} with shape: {partshape} and type: {WTYPE_NAMES[ftype]}")
+
+        # determine dimension along which multipart tensor is sharded
+        #
+        # split_dim 0 regex:
+        #   - output.*
+        #   - layers.*.attention.wq.weight
+        #   - layers.*.attention.wk.weight
+        #   - layers.*.attention.wv.weight
+        #   - layers.*.feed_forward.w1.weight
+        #   - layers.*.feed_forward.w3.weight
+        #
+        # split_dim 1 regex:
+        #   - tok_embeddings.*
+        #   - layers.*.attention.wo.weight
+        #   - layers.*.feed_forward.w2.weight
+        #
+        if n_dims > 1:
+            split_dim = 1
+            if b"tok_embeddings" in name:
+                split_dim = 1
+            elif b"layers" in name:
+                if b"attention.wo.weight" in name:
+                    split_dim = 1
+                elif b"feed_forward.w2.weight" in name:
+                    split_dim = 1
+                else:
+                    split_dim = 0
+            elif b"output" in name:
+                split_dim = 0
+
+        # output tensor header
+        fullshape = list(partshape)
+        if n_dims > 1:
+            fullshape[split_dim] *= n_parts
+        fout.write(struct.pack("iii", n_dims, len(name), ftype))
+        for dim in reversed(fullshape):
+            fout.write(struct.pack("i", dim))
+        fout.write(name)
+
+        # ensure tensor data is aligned
+        tensor_data_offset = fout.tell()
+        while tensor_data_offset % QK != 0:
+            fout.write(struct.pack("B", 0))
+            tensor_data_offset += 1
+
+        # output unified mappable tensor data
+        if n_dims == 1 or n_parts == 1:
+            # copy tensor which we thankfully received in one piece
+            if part_id == 0:
+                fout.write(data)
+        elif split_dim == 0:
+            # reassemble multifile tensor containing some of the rows
+            rows_per_chunk = partshape[0]
+            current_row = part_id * rows_per_chunk
+            bytes_per_row = fullshape[1] // blck_size * type_size
+            offset = current_row * bytes_per_row
+            fout.seek(tensor_data_offset + offset)
+            fout.write(data)
+        elif split_dim == 1:
+            # reassemble multifile tensor containing some of the cols
+            cols_per_chunk = partshape[1]
+            current_col = part_id * cols_per_chunk
+            bpr = partshape[1] // blck_size * type_size
+            bytes_per_row = fullshape[1] // blck_size * type_size
+            offset_current_col = current_col // blck_size * type_size
+            for row in range(partshape[0]):
+                offset_row = row * bytes_per_row
+                offset = offset_row + offset_current_col
+                fout.seek(tensor_data_offset + offset)
+                fout.write(data[row * bpr : row * bpr + bpr])
+
+        # advance file position to next tensor
+        fout.seek(tensor_data_offset + ggml_nbytes(fullshape, ftype))
+
+
+def migrate(fin_path):
+    assert fin_path
+    assert os.path.exists(fin_path)
+
+    with open(fin_path, "rb") as fin:
+        hparams = read_hparams(fin)
+        tokens = read_tokens(fin, hparams)
+
+    if hparams["magic"] == 0x67676A74:  # ggjt
+        print(f"{fin_path}: input ggml has already been converted to 'ggjt' magic\n")
+        return
+
+    if hparams["magic"] != 0x67676D66:  # ggmf
+        print(f"{fin_path}: input ggml file doesn't have expected 'ggmf' magic: {hparams['magic']:#x}\n")
+        return
+
+    hparams["magic"] = 0x67676A74  # ggjt
+
+    # count number of multipart files by convention
+    n_parts = 1
+    while True:
+        if os.path.exists("%s.%d" % (fin_path, n_parts)):
+            n_parts += 1
+        else:
+            break
+
+    # we output a single file for ggml
+    with open(fin_path + ".migrated", "wb") as fout:
+        write_hparams(fout, hparams)
+        write_tokens(fout, tokens)
+        offset_of_tensors = fout.tell()
+        # the tensors we load could be split across multiple files
+        for part_id in range(n_parts):
+            fout.seek(offset_of_tensors)
+            print(f"Processing part {part_id+1} of {n_parts}\n")
+            fin_path = fin_path
+            if part_id > 0:
+                fin_path += ".%d" % (part_id)
+            with open(fin_path, "rb") as fin:
+                read_tokens(fin, read_hparams(fin))
+                copy_tensors(fin, fout, part_id, n_parts)
+
+    os.remove(fin_path)
+    os.rename(fin_path + ".migrated", fin_path)
+
+    print(f"Done. Output file: {fin_path+'.migrated'}\n")
--- a/api/src/serge/utils/security.py
+++ b/api/src/serge/utils/security.py
@ -1,56 +0,0 @@
-import base64
-import hashlib
-import os
-
-from datetime import datetime, timedelta
-from typing import Optional
-
-from fastapi import HTTPException, status
-from jose import JWTError, jwt
-from serge.models.settings import Settings
-
-ALGORITHM = "HS256"
-settings = Settings()
-
-credentials_exception = HTTPException(
-    status_code=status.HTTP_401_UNAUTHORIZED,
-    detail="Could not validate credentials",
-    headers={"WWW-Authenticate": "Bearer"},
-)
-
-
-def verify_password(plain_password: str, hashed_password: str) -> bool:
-    salt_and_hash = base64.b64decode(hashed_password.encode("utf-8"))
-    salt = salt_and_hash[:16]
-    stored_password = salt_and_hash[16:]
-    new_hashed_password = hashlib.scrypt(plain_password.encode("utf-8"), salt=salt, n=8192, r=8, p=1, dklen=64)
-    return new_hashed_password == stored_password
-
-
-def get_password_hash(password: str) -> str:
-    salt = os.urandom(16)
-    hashed_password = hashlib.scrypt(password.encode("utf-8"), salt=salt, n=8192, r=8, p=1, dklen=64)
-    salt_and_hash = salt + hashed_password
-    return base64.b64encode(salt_and_hash).decode("utf-8")
-
-
-def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
-    to_encode = data.copy()
-    if expires_delta:
-        expire = datetime.utcnow() + expires_delta
-    else:
-        expire = datetime.utcnow() + timedelta(minutes=settings.SERGE_SESSION_EXPIRY)
-    to_encode.update({"exp": expire})
-    encoded_jwt = jwt.encode(to_encode, settings.SERGE_JWT_SECRET, algorithm=ALGORITHM)
-    return encoded_jwt
-
-
-def decode_access_token(token: str):
-    try:
-        payload = jwt.decode(token, settings.SERGE_JWT_SECRET, algorithms=[ALGORITHM])
-        username: str = payload.get("sub")
-        if username is None:
-            raise credentials_exception
-        return username
-    except JWTError:
-        raise credentials_exception
--- a/api/src/serge/utils/stream.py
+++ b/api/src/serge/utils/stream.py
@ -1,8 +1,53 @@
 import re

+from typing import Any

+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.memory import RedisChatMessageHistory
+from langchain.schema import LLMResult
 from loguru import logger
+from redis import Redis
+
+
+# Not used yet. WIP
+class ChainRedisHandler(StreamingStdOutCallbackHandler):
+    """Callback handler for streaming. Only works with LLMs that support streaming."""
+
+    def __init__(self, id: str):
+        logger.debug(f"Setting up ChainRedisHandler with id {id}")
+        super().__init__()
+        self.id = id
+        self.client = Redis(host="localhost", port=6379, decode_responses=False)
+        logger.info(f"Connected to Redis? {self.client.ping()}")
+        logger.info(f"Stream key : {self.stream_key}")
+
+    @property
+    def stream_key(self):
+        return "stream:" + self.id
+
+    def on_llm_start(self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any) -> None:
+        super().on_llm_start(serialized, prompts, **kwargs)
+        logger.info("starting")
+        self.client.set(self.stream_key, "")
+        """Run when LLM starts running."""
+
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        super().on_llm_start(token, **kwargs)
+        logger.info(token)
+        self.client.append(self.stream_key, token)
+
+        """Run on new LLM token. Only available when streaming is enabled."""
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        super().on_llm_end(response, **kwargs)
+        self.client.set(self.stream_key, "")
+
+        """Run when LLM ends running."""
+
+    def on_llm_error(self, error: Exception | KeyboardInterrupt, **kwargs: Any) -> None:
+        super().on_llm_error(error, **kwargs)
+        self.client.set(self.stream_key, str(error))
+        """Run when LLM errors."""


 def get_prompt(history: RedisChatMessageHistory, params):
@ -51,7 +96,7 @@ def get_prompt(history: RedisChatMessageHistory, params):
            else:
                stop = True
        if len(next_prompt) > 0:
-            prompts.append(f"{instruction + next_prompt}\n")
+            prompts.append(instruction + next_prompt + "\n")
        if stop:
            break

@ -60,6 +105,6 @@ def get_prompt(history: RedisChatMessageHistory, params):
    for next_prompt in prompts:
        message_prompt += next_prompt

-    final_prompt = f"{params.init_prompt}\n{message_prompt[:params.n_ctx]}"
+    final_prompt = params.init_prompt + "\n" + message_prompt[: params.n_ctx]
    logger.debug(final_prompt)
    return final_prompt
--- a/api/test/healthcheck_models.py
+++ b/api/test/healthcheck_models.py
@ -1,36 +1,26 @@
 import json
 from pathlib import Path
-import requests
-from huggingface_hub import hf_hub_url
+
+import huggingface_hub
 import pytest
+import requests

-
-def load_model_data(file_path):
-    with open(file_path, "r") as models_file:
-        return json.load(models_file)
-
-
-def flatten_model_data(families):
-    for family in families:
-        for model in family["models"]:
-            for file in model["files"]:
-                yield model["repo"], file["filename"]
-
-
-def check_model_availability(repo, filename):
-    url = hf_hub_url(repo, filename, repo_type="model", revision="main")
-    response = requests.head(url)
-    if response.ok:
-        return True
-    else:
-        return False
-
+# this test file specifically doesn't start with test_* so it's not picked up by pytest

 test_dir = Path(__file__).parent
-model_data = load_model_data(test_dir.parent / "src/serge/data/models.json")
-checks = list(flatten_model_data(model_data))
+with open(test_dir.parent / "src/serge/data/models.json", "r") as models_file:
+    families = json.load(models_file)
+
+# generate list of checks
+checks = []
+for family in families:
+    for model in family["models"]:
+        for file in model["files"]:
+            checks.append((model["repo"], file["filename"]))


@pytest.mark.parametrize("repo,filename", checks)
 def test_model_available(repo, filename):
-    assert check_model_availability(repo, filename), f"Model {repo}/{filename} not available"
+    url = huggingface_hub.hf_hub_url(repo, filename, repo_type="model", revision="main")
+    r = requests.head(url)
+    assert r.ok, f"Model {repo}/{filename} not available"
--- a/charts/serge/values.yaml
+++ b/charts/serge/values.yaml
@ -9,7 +9,7 @@ image:
  image:
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
-  tag: "main"
+  tag: ""

 imagePullSecrets: []
 nameOverride: ""
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@ -1,19 +1,19 @@
 services:
  serge:
    restart: on-failure
-    build:
+    build: 
      context: .
      dockerfile: Dockerfile.dev
    volumes:
-      - ./web:/usr/src/app/web/:z
-      - ./api:/usr/src/app/api/:z
+      - ./web:/usr/src/app/web/
+      - ./api:/usr/src/app/api/
      - datadb:/data/db
      - weights:/usr/src/app/weights/
      - /etc/localtime:/etc/localtime:ro
    ports:
-      - 8008:8008
-      - 9124:9124
-      - 5678:5678
+      - "8008:8008"
+      - "9124:9124"
+
 volumes:
  datadb:
  weights:
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@ -1,36 +1,6 @@
 #!/bin/bash

 set -x
-source serge.env
-
-# Get CPU Architecture
-cpu_arch=$(uname -m)
-
-# Function to detect CPU features
-detect_cpu_features() {
-	cpu_info=$(lscpu)
-	if echo "$cpu_info" | grep -q "avx512"; then
-		echo "AVX512"
-	elif echo "$cpu_info" | grep -q "avx2"; then
-		echo "AVX2"
-	elif echo "$cpu_info" | grep -q "avx"; then
-		echo "AVX"
-	else
-		echo "basic"
-	fi
-}
-
-# Check if the CPU architecture is aarch64/arm64
-if [ "$cpu_arch" = "aarch64" ] || [ "$cpu_arch" = "arm64" ]; then
-	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
-else
-	# Use @smartappli provided wheels
-	#cpu_feature=$(detect_cpu_features)
-	#pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu-$cpu_feature/"
-	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
-fi
-
-echo "Recommended install command for llama-cpp-python: $pip_command"

 # Handle termination signals
 _term() {
@ -40,7 +10,7 @@ _term() {
 }

 # Install python bindings
-eval "$pip_command" || {
+pip install llama-cpp-python==0.1.78 || {
 	echo 'Failed to install llama-cpp-python'
 	exit 1
 }
@ -51,18 +21,10 @@ redis_process=$!

 # Start the API
 cd /usr/src/app/api || exit 1
-hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008"
-if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
-	hypercorn_cmd="hypercorn src.serge.main:app --bind [::]:8008"
-elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
-	hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008 --bind [::]:8008"
-fi
-
-$hypercorn_cmd || {
+uvicorn src.serge.main:app --host 0.0.0.0 --port 8008 || {
 	echo 'Failed to start main app'
 	exit 1
 } &
-
 serge_process=$!

 # Set up a signal trap and wait for processes to finish
--- a/scripts/dev.sh
+++ b/scripts/dev.sh
@ -1,42 +1,6 @@
 #!/bin/bash

 set -x
-source serge.env
-
-# Get CPU Architecture
-cpu_arch=$(uname -m)
-
-# Function to detect CPU features
-detect_cpu_features() {
-	cpu_info=$(lscpu)
-	if echo "$cpu_info" | grep -q "avx512"; then
-		echo "AVX512"
-	elif echo "$cpu_info" | grep -q "avx2"; then
-		echo "AVX2"
-	elif echo "$cpu_info" | grep -q "avx"; then
-		echo "AVX"
-	else
-		echo "basic"
-	fi
-}
-
-# Check if the CPU architecture is aarch64/arm64
-if [ "$cpu_arch" = "aarch64" ] || [ "$cpu_arch" = "arm64" ]; then
-	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
-else
-	# Use @smartappli provided wheels
-	#cpu_feature=$(detect_cpu_features)
-	#pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu-$cpu_feature/"
-	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://abetlen.github.io/llama-cpp-python/whl/cpu/"
-fi
-
-echo "Recommended install command for llama-cpp-python: $pip_command"
-
-# Install python vendor dependencies
-pip install -r /usr/src/app/requirements.txt || {
-	echo 'Failed to install python dependencies from requirements.txt'
-	exit 1
-}

 # Install python dependencies
 pip install -e ./api || {
@ -45,7 +9,7 @@ pip install -e ./api || {
 }

 # Install python bindings
-eval "$pip_command" || {
+pip install llama-cpp-python==0.1.78 || {
 	echo 'Failed to install llama-cpp-python'
 	exit 1
 }
@ -57,18 +21,9 @@ redis-server /etc/redis/redis.conf &
 cd /usr/src/app/web || exit 1
 npm run dev -- --host 0.0.0.0 --port 8008 &

-python -m pip install debugpy -t /tmp
-
 # Start the API
 cd /usr/src/app/api || exit 1
-hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124"
-if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
-	hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind [::]:9124"
-elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
-	hypercorn_cmd="python /tmp/debugpy --listen 0.0.0.0:5678 -m hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124 --bind [::]:9124"
-fi
-
-$hypercorn_cmd || {
+uvicorn src.serge.main:api_app --reload --host 0.0.0.0 --port 9124 --root-path /api/ || {
 	echo 'Failed to start main app'
 	exit 1
 }
--- a/scripts/serge.env
+++ b/scripts/serge.env
@ -1,3 +0,0 @@
-LLAMA_PYTHON_VERSION=0.2.87
-SERGE_ENABLE_IPV4=true
-SERGE_ENABLE_IPV6=false
--- a/vendor/requirements.txt
+++ b/vendor/requirements.txt
@ -1,3 +0,0 @@
-typing-extensions>=4.12.2
-numpy>=1.26.0,<2.0.0
-diskcache>=5.6.3
--- a/web/package-lock.json
+++ b/web/package-lock.json
--- a/web/package.json
+++ b/web/package.json
@ -12,41 +12,39 @@
    "format": "prettier --write ."
  },
  "devDependencies": {
-    "@sveltejs/adapter-auto": "^3.2.2",
-    "@sveltejs/adapter-node": "^5.2.0",
-    "@sveltejs/adapter-static": "^3.0.2",
-    "@sveltejs/kit": "^2.5.20",
-    "@sveltejs/vite-plugin-svelte": "^3.1.1",
-    "@types/markdown-it": "^14.1.2",
-    "@typescript-eslint/eslint-plugin": "^7.17.0",
-    "@typescript-eslint/parser": "^7.18.0",
-    "autoprefixer": "^10.4.20",
-    "eslint": "^8.57.0",
-    "eslint-config-prettier": "^9.1.0",
-    "eslint-plugin-import": "^2.29.1",
-    "eslint-plugin-prettier": "^5.2.1",
-    "eslint-plugin-svelte": "^2.43.0",
-    "eslint-plugin-vue": "^9.27.0",
-    "postcss": "^8.4.40",
-    "prettier": "3.3.3",
-    "prettier-plugin-svelte": "^3.2.6",
-    "svelte": "^4.2.18",
-    "svelte-check": "^3.8.5",
-    "tailwindcss": "^3.4.7",
-    "tslib": "^2.6.3",
-    "typescript": "^5.5.4",
-    "vite": "^5.4.1"
+    "@sveltejs/adapter-auto": "^2.1.0",
+    "@sveltejs/adapter-node": "^1.3.1",
+    "@sveltejs/adapter-static": "^2.0.3",
+    "@sveltejs/kit": "^1.25.0",
+    "@types/markdown-it": "^13.0.1",
+    "@typescript-eslint/eslint-plugin": "^6.7.2",
+    "@typescript-eslint/parser": "^6.7.2",
+    "autoprefixer": "^10.4.15",
+    "eslint": "^8.49.0",
+    "eslint-config-prettier": "^9.0.0",
+    "eslint-plugin-import": "^2.28.1",
+    "eslint-plugin-prettier": "^5.0.0",
+    "eslint-plugin-svelte": "^2.33.1",
+    "eslint-plugin-vue": "^9.17.0",
+    "postcss": "^8.4.30",
+    "prettier": "3.0.3",
+    "svelte": "^4.2.0",
+    "svelte-check": "^3.5.1",
+    "tailwindcss": "^3.3.3",
+    "tslib": "^2.6.2",
+    "typescript": "^5.2.2",
+    "vite": "^4.4.9"
  },
  "type": "module",
  "dependencies": {
-    "@iconify/svelte": "^4.0.2",
+    "@iconify/svelte": "^3.1.4",
    "@sveltestack/svelte-query": "^1.6.0",
    "clipboard": "^2.0.11",
-    "daisyui": "^4.12.10",
-    "highlight.js": "^11.10.0",
-    "ioredis": "^5.4.1",
-    "markdown-it": "^14.1.0",
-    "markdown-it-highlightjs": "^4.1.0",
-    "prettier-plugin-tailwindcss": "^0.6.5"
+    "daisyui": "^3.7.5",
+    "highlight.js": "^11.8.0",
+    "markdown-it": "^13.0.1",
+    "markdown-it-highlightjs": "^4.0.1",
+    "prettier-plugin-svelte": "^3.0.3",
+    "prettier-plugin-tailwindcss": "^0.5.4"
  }
 }
--- a/web/src/app.css
+++ b/web/src/app.css
@ -18,7 +18,7 @@
  width: auto;
 }

-markdown .hljs {
+markdown. .hljs {
  background: hsl(var(--b3)) !important;
 }

@ -85,93 +85,3 @@ markdown .hljs {
 .ie-edge-no-scrollbar {
  -ms-overflow-style: none;
 }
-
-/* Models Grid Layout */
-.models-grid {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
-  gap: 25px;
-  padding-left: 80px;
-  padding-right: 40px;
-  padding-top: 40px;
-  padding-bottom: 10px;
-}
-
-/* Model Accordion Styles */
-.model-accordion {
-  border-radius: 8px;
-  box-shadow: 0 5px 8px rgba(0, 0, 0, 0.1);
-  overflow: hidden;
-  background-color: "bg-base-200";
-}
-
-.model-accordion button {
-  width: 100%;
-  padding: 15px;
-  text-align: left;
-  border: none;
-  outline: none;
-  transition: background-color 0.3s ease;
-  cursor: pointer;
-}
-
-.model-details {
-  padding: 10px;
-  border-top: 1px solid #ddd;
-}
-
-.model-details p {
-  margin: 10px 0;
-}
-
-.top-section {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  justify-content: center;
-  margin-bottom: 20px;
-  padding-top: 10px;
-}
-
-.search-row {
-  position: fixed;
-  top: 5px;
-  left: 0;
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  width: 100%;
-  padding-left: 80px;
-  padding-right: 40px;
-  padding-bottom: 0px;
-}
-
-main {
-  max-width: 600px;
-  margin: 0 auto;
-  padding: 1rem;
-}
-
-form {
-  display: flex;
-  flex-direction: column;
-}
-
-div {
-  margin-bottom: 1rem;
-}
-
-label {
-  display: block;
-  margin-bottom: 0.5rem;
-}
-
-input {
-  width: 100%;
-  padding: 0.5rem;
-  box-sizing: border-box;
-}
-
-button {
-  padding: 0.5rem 1rem;
-}
--- a/web/src/lib/components/models/RefreshModal.svelte
+++ b/web/src/lib/components/models/RefreshModal.svelte
@ -0,0 +1,57 @@
+<script lang="ts">
+  import { invalidate, invalidateAll } from "$app/navigation";
+
+  let dialogTag: HTMLDialogElement;
+  let isLoading = false;
+
+  let link =
+    "https://raw.githubusercontent.com/serge-chat/serge/main/api/src/serge/data/models.json";
+
+  const handleRefresh = async (e: Event) => {
+    isLoading = true;
+    const r = await fetch("/api/model/refresh", {
+      method: "POST",
+      body: new FormData(e.target as HTMLFormElement),
+    });
+
+    if (r.ok) {
+      await invalidate("/api/model/all");
+      dialogTag.close();
+    } else {
+      console.error("Error refreshing models");
+    }
+    isLoading = false;
+  };
+</script>
+
+<button class="btn-outline btn" on:click={() => dialogTag.showModal()}
+  >Refresh Models</button
+>
+<dialog bind:this={dialogTag} class="modal">
+  <form method="dialog" class="modal-box">
+    <button class="btn-ghost btn-sm btn-circle btn absolute right-2 top-2"
+      >✕</button
+    >
+    <form on:submit|preventDefault={handleRefresh}>
+      <h3 class="text-lg font-bold">Model refresh</h3>
+      <p class="py-4">
+        Enter the URL of the JSON file containing the models below
+      </p>
+      <input
+        type="text"
+        name="url"
+        class="input-bordered input-primary input mb-4 w-full"
+        bind:value={link}
+      />
+      <div class="modal-action">
+        <!-- if there is a button in form, it will close the modal -->
+        <button type="submit" class="btn" disabled={isLoading}>
+          {#if isLoading}
+            <span class="loading loading-spinner" />
+          {/if}
+          Refresh
+        </button>
+      </div>
+    </form>
+  </form>
+</dialog>
--- a/web/src/lib/stores.ts
+++ b/web/src/lib/stores.ts
@ -2,6 +2,8 @@ import { writable, type Writable } from "svelte/store";

 const themeStore = writable("dark");

+const barVisible = writable(true);
+
 const newChat: Writable<object | null> = writable(null);

-export { newChat, themeStore };
+export { barVisible, newChat, themeStore };
--- a/web/src/routes/+layout.svelte
+++ b/web/src/routes/+layout.svelte
@ -1,52 +1,30 @@
 <script lang="ts">
  import "../app.css";
-  import type { PageData } from "./$types";
+  import type { LayoutData } from "./$types";
  import { invalidate, goto } from "$app/navigation";
  import { onMount, onDestroy } from "svelte";
  import { page } from "$app/stores";
-  import { newChat, themeStore } from "$lib/stores.js";
+  import { barVisible, newChat, themeStore } from "$lib/stores.js";
  import { fly } from "svelte/transition";
-  export let data: PageData;
+  export let data: LayoutData;

-  export let isSidebarOpen: boolean = true;
-
-  let models;
-  let modelAvailable: boolean;
  const isLoading = false;
-
  let deleteConfirm = false;
  let deleteAllConfirm = false;
  let theme: string;
+  let bar_visible: boolean;
  let dataCht: Response | any = null;
-  const unsubscribe = newChat.subscribe((value) => (dataCht = value));
-
-  function toggleSidebar(): void {
-    isSidebarOpen = !isSidebarOpen;
-  }
-
-  function hideSidebar(): void {
-    isSidebarOpen = false;
-  }
+  const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
+  const unsubscribe1 = newChat.subscribe((value) => (dataCht = value));

  onMount(() => {
+    bar_visible = window.innerWidth > 768;
+    barVisible.set(bar_visible);
    theme = localStorage.getItem("data-theme") || "dark";
    document.documentElement.setAttribute("data-theme", theme);
  });

-  $: if (data && data.models) {
-    models = data.models.filter((el) => el.available);
-    modelAvailable = models.length > 0;
-  } else {
-    models = [];
-    modelAvailable = false;
-  }
-
  $: id = $page.params.id || "";
-
-  async function goToHome() {
-    await goto("/");
-  }
-
  async function deleteChat(chatID: string) {
    const response = await fetch("/api/chat/" + chatID, { method: "DELETE" });
    if (response.status === 200) {
@ -115,147 +93,159 @@
    localStorage.setItem("data-theme", $themeStore);
  }

+  function toggleBar() {
+    bar_visible = !bar_visible;
+    barVisible.set(bar_visible);
+  }
+
+  async function createSameSession() {
+    if (dataCht) {
+      const newData = await fetch(
+        `/api/chat/?model=${dataCht.params.model_path}&temperature=${dataCht.params.temperature}&top_k=${dataCht.params.top_k}` +
+          `&top_p=${dataCht.params.top_p}&max_length=${dataCht.params.max_tokens}&context_window=${dataCht.params.n_ctx}` +
+          `&repeat_last_n=${dataCht.params.last_n_tokens_size}&repeat_penalty=${dataCht.params.repeat_penalty}` +
+          `&init_prompt=${dataCht.history[0].data.content}` +
+          `&gpu_layers=${dataCht.params.n_gpu_layers}`,
+
+        {
+          method: "POST",
+          headers: {
+            accept: "application/json",
+          },
+        },
+      ).then((response) => response.json());
+      await invalidate("/api/chat/");
+      await goto("/chat/" + newData);
+    }
+  }
+
  onDestroy(() => {
    unsubscribe;
+    unsubscribe1;
  });
+  // onDestroy(unsubscribe1);
 </script>

-<button
-  on:click={toggleSidebar}
-  class="border-base-content/[.2] btn btn-square z-10 my-1 mx-2 fixed border"
->
-  <svg
-    xmlns="http://www.w3.org/2000/svg"
-    fill="none"
-    viewBox="0 0 24 24"
-    class="inline-block w-5 h-5 stroke-current"
-    ><path
-      stroke-linecap="round"
-      stroke-linejoin="round"
-      stroke-width="2"
-      d="M4 6h16M4 12h16M4 18h16"
-    ></path></svg
-  >
-</button>
-
 <aside
-  class="border-base-content/[.2] fixed top-0 z-40 min-h-full border-r transition-all overflow-hidden aria-label=Sidebar"
-  class:left-0={isSidebarOpen}
-  class:-left-80={!isSidebarOpen}
+  id="default-sidebar"
+  class={"border-base-content/[.2] fixed left-0 top-0 z-40 h-screen w-80 -translate-x-full border-r transition-transform overflow-hidden" +
+    (bar_visible ? " translate-x-0" : "")}
+  aria-label="Sidebar"
 >
  <div
    class="bg-base-200 relative h-screen py-1 px-2 overflow-hidden flex flex-col items-center justify-between"
  >
-    <div class="w-full flex items-center pb-1">
+    <div
+      class="w-full flex items-center justify-between border-b border-base-content/[.2] pb-1"
+    >
      <button
-        on:click={toggleSidebar}
-        class="border-base-content/[.2] btn btn-square border"
-      >
-        <svg
-          xmlns="http://www.w3.org/2000/svg"
-          fill="none"
-          viewBox="0 0 24 24"
-          class="inline-block w-5 h-5 stroke-current"
-          ><path
-            stroke-linecap="round"
-            stroke-linejoin="round"
-            stroke-width="2"
-            d="M4 6h16M4 12h16M4 18h16"
-          ></path></svg
-        >
-      </button>
-      <button
-        disabled={isLoading || !modelAvailable}
-        class="btn btn-ghost flex-grow h-6 font-semibold text-left text-sm capitalize"
+        disabled={isLoading}
+        class="btn btn-ghost h-6 w-4/5 justify-between font-semibold text-left text-sm capitalize"
        class:loading={isLoading}
-        on:click|preventDefault={() => goto("/")}
-        style="justify-content: flex-start;"
+        on:click|preventDefault={() => createSameSession()}
      >
+        <span>New chat</span>
        <svg
          xmlns="http://www.w3.org/2000/svg"
          viewBox="0 0 16 16"
          fill="currentColor"
-          class="w-4 h-4 mr-2"
+          class="w-4 h-4"
        >
          <path
            d="M1 2.75C1 1.784 1.784 1 2.75 1h10.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 13.25 12H9.06l-2.573 2.573A1.458 1.458 0 0 1 4 13.543V12H2.75A1.75 1.75 0 0 1 1 10.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h4.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"
          >
          </path>
        </svg>
-        <span>New Chat</span>
      </button>
-
-      <button class="btn btn-ghost flex-shrink-0" on:click={goToHome}>
+      <button
+        class="btn btn-ghost flex h-6 w-1/6 items-center justify-center font-semibold z-40"
+        on:click={toggleBar}
+      >
        <svg
          xmlns="http://www.w3.org/2000/svg"
          viewBox="0 0 24 24"
          fill="currentColor"
-          class="w-5 h-5"
+          class="w-6 h-6"
        >
-          <path d="M10 20v-6h4v6h5v-8h3L12 3 2 12h3v8z" />
+          <path
+            d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
+          >
+          </path>
+          <path
+            d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
+          >
+          </path>
        </svg>
-        <span class="sr-only">Home</span>
      </button>
    </div>
    <ul
-      class="my-1 w-full flex-grow overflow-y-auto no-scrollbar firefox-no-scrollbar ie-edge-no-scrollbar"
+      class="my-1 w-full h-[85%] overflow-y-auto no-scrollbar firefox-no-scrollbar ie-edge-no-scrollbar"
    >
-      {#if data && Symbol.iterator in Object(data.chats)}
-        {#each data.chats as chat (chat.id)}
-          <li in:fly={{ x: -100, duration: 900 }}>
-            <a
-              href={"/chat/" + chat.id}
-              class="group hover:from-base-100 hover:text-base-content flex items-center rounded-lg py-2 pl-2 text-base font-normal hover:bg-gradient-to-r hover:to-transparent"
-              class:bg-base-300={id === chat.id}
-            >
-              <div
-                class="flex w-full flex-col space-y-2 p-2 border-b border-gray-200 relative"
-              >
+      {#each data.chats as chat (chat.id)}
+        <li in:fly={{ x: -100, duration: 900 }}>
+          <a
+            href={"/chat/" + chat.id}
+            class="group hover:from-base-100 hover:text-base-content flex items-center rounded-lg py-2 pl-2 text-base font-normal hover:bg-gradient-to-r hover:to-transparent"
+            class:bg-base-300={id === chat.id}
+          >
+            <div class="flex w-full flex-col">
+              <div class="flex w-full flex-col items-start justify-start">
                <div
-                  class="flex w-full flex-col items-start justify-start space-y-1"
+                  class="relative flex w-full flex-row items-center justify-between"
                >
-                  <div
-                    class="flex w-full flex-row items-center justify-between"
-                  >
-                    <div class="flex flex-col space-y-1.5">
-                      <p class="text-sm font-light max-w-[25ch] break-words">
-                        {truncate(chat.subtitle, 100)}
-                      </p>
-                      <span
-                        class="text-xs font-semibold max-w-[25ch] break-words"
-                        >{chat.model}</span
-                      >
-                      <span class="text-xs"
-                        >{timeSince(chat.created) + " ago"}</span
-                      >
-                    </div>
+                  <div class="flex flex-col">
+                    <p class="text-sm font-light">
+                      {truncate(chat.subtitle, 42)}
+                    </p>
+                    <span class="text-xs font-semibold">{chat.model}</span>
+                    <span class="text-xs"
+                      >{timeSince(chat.created) + " ago"}</span
+                    >
                  </div>
-                </div>
-                <div
-                  class="absolute bottom-1.5 right-2 opacity-0 group-hover:opacity-100 transition-opacity duration-300"
-                >
-                  {#if deleteConfirm}
-                    <div class="flex flex-row items-center space-x-2">
-                      <button
-                        name="confirm-delete"
-                        class="btn btn-sm btn"
-                        on:click|preventDefault={() => deleteChat(chat.id)}
-                      >
-                        <svg
-                          xmlns="http://www.w3.org/2000/svg"
-                          viewBox="0 0 16 16"
-                          width="16"
-                          height="16"
+                  <div
+                    class="absolute right-0 opacity-0 group-hover:opacity-100 transition"
+                  >
+                    <!-- {#if $page.params.id === chat.id} -->
+                    {#if deleteConfirm}
+                      <div class="flex flex-row items-center">
+                        <button
+                          name="confirm-delete"
+                          class="btn-ghost btn-sm btn"
+                          on:click|preventDefault={() => deleteChat(chat.id)}
                        >
-                          <path
-                            class="fill-base-content"
-                            d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
-                          />
-                        </svg>
-                      </button>
+                          <svg
+                            xmlns="http://www.w3.org/2000/svg"
+                            viewBox="0 0 16 16"
+                            width="16"
+                            height="16"
+                          >
+                            <path
+                              class="fill-base-content"
+                              d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
+                            />
+                          </svg>
+                        </button>
+                        <button
+                          name="cancel-delete"
+                          class="btn-ghost btn-sm btn"
+                          on:click|preventDefault={toggleDeleteConfirm}
+                        >
+                          <svg
+                            xmlns="http://www.w3.org/2000/svg"
+                            viewBox="0 0 16 16"
+                            width="16"
+                            height="16"
+                          >
+                            <path
+                              class="fill-base-content"
+                              d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
+                            />
+                          </svg>
+                        </button>
+                      </div>
+                    {:else}
                      <button
-                        name="cancel-delete"
-                        class="btn btn-sm btn"
+                        class="btn-ghost btn-sm btn"
                        on:click|preventDefault={toggleDeleteConfirm}
                      >
                        <svg
@ -266,189 +256,26 @@
                        >
                          <path
                            class="fill-base-content"
-                            d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
+                            d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
                          />
                        </svg>
                      </button>
-                    </div>
-                  {:else}
-                    <button
-                      class="btn btn-sm btn"
-                      on:click|preventDefault={toggleDeleteConfirm}
-                    >
-                      <svg
-                        xmlns="http://www.w3.org/2000/svg"
-                        viewBox="0 0 16 16"
-                        width="16"
-                        height="16"
-                      >
-                        <path
-                          class="fill-base-content"
-                          d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
-                        />
-                      </svg>
-                    </button>
-                  {/if}
+                    {/if}
+                    <!-- {/if} -->
+                  </div>
                </div>
              </div>
-            </a>
-          </li>
-        {/each}
-      {/if}
+            </div>
+          </a>
+        </li>
+      {/each}
    </ul>
    <div class="w-full border-t border-base-content/[.2] pt-1">
-      {#if data.userData?.username === "system"}
+      {#if deleteAllConfirm}
        <button
-          name="login-btn"
-          class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
-          on:click={() => goto("/login")}
+          class="btn btn-ghost w-full flex flex-row justify-between items-center p-2.5 text-left text-sm capitalize"
        >
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            width="18"
-            height="18"
-            fill="currentColor"
-            class="mr-3"
-            viewBox="0 0 16 16"
-          >
-            <path
-              d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m1.679-4.493-1.335 2.226a.75.75 0 0 1-1.174.144l-.774-.773a.5.5 0 0 1 .708-.708l.547.548 1.17-1.951a.5.5 0 1 1 .858.514M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
-            />
-            <path
-              d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
-            />
-          </svg>
-          <span>Login</span>
-        </button>
-        <button
-          name="create-btn"
-          class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
-          on:click={() => goto("/signup")}
-        >
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            width="18"
-            height="18"
-            fill="currentColor"
-            class="mr-3"
-            viewBox="0 0 16 16"
-          >
-            <path
-              d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m.5-5v1h1a.5.5 0 0 1 0 1h-1v1a.5.5 0 0 1-1 0v-1h-1a.5.5 0 0 1 0-1h1v-1a.5.5 0 0 1 1 0m-2-6a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
-            />
-            <path
-              d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
-            />
-          </svg>
-          <span>Create Account</span>
-        </button>
-      {:else}
-        <button
-          name="logout-btn"
-          class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
-          on:click={async () => {
-            const response = await fetch("/api/auth/logout", {
-              method: "POST",
-            });
-            data.userData = null;
-            window.location.href = "/";
-          }}
-        >
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            width="18"
-            height="18"
-            fill="currentColor"
-            class="mr-3"
-            viewBox="0 0 16 16"
-          >
-            <path
-              d="M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4m0 5.996V14H3s-1 0-1-1 1-4 6-4q.845.002 1.544.107a4.5 4.5 0 0 0-.803.918A11 11 0 0 0 8 10c-2.29 0-3.516.68-4.168 1.332-.678.678-.83 1.418-.832 1.664zM9 13a1 1 0 0 1 1-1v-1a2 2 0 1 1 4 0v1a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1h-4a1 1 0 0 1-1-1zm3-3a1 1 0 0 0-1 1v1h2v-1a1 1 0 0 0-1-1"
-            />
-          </svg>
-          <span>Log Out</span>
-        </button>
-        <a
-          href="/account"
-          class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
-        >
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            viewBox="0 0 16 16"
-            width="18"
-            height="18"
-            fill="currentColor"
-            class="mr-3"
-          >
-            <path
-              d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.029.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"
-            >
-            </path>
-          </svg>
-          <span>Settings</span>
-        </a>
-        {#if deleteAllConfirm}
-          <button
-            class="btn btn-ghost w-full flex flex-row justify-between items-center p-2.5 text-left text-sm capitalize"
-          >
-            <div class="h-6 flex flex-row items-center">
-              <svg
-                xmlns="http://www.w3.org/2000/svg"
-                viewBox="0 0 16 16"
-                width="18"
-                height="18"
-                fill="currentColor"
-                class="mr-3"
-              >
-                <path
-                  d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
-                >
-                </path>
-              </svg>
-              <span>Clear Chats</span>
-            </div>
-            <div class="h-6 flex flex-row items-center">
-              <button
-                name="confirm-delete"
-                class="btn-ghost btn-sm btn"
-                on:click|preventDefault={() => deleteAllChat()}
-              >
-                <svg
-                  xmlns="http://www.w3.org/2000/svg"
-                  viewBox="0 0 16 16"
-                  width="16"
-                  height="16"
-                >
-                  <path
-                    class="fill-base-content"
-                    d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
-                  />
-                </svg>
-              </button>
-              <button
-                name="cancel-delete"
-                class="btn-ghost btn-sm btn"
-                on:click|preventDefault={toggleDeleteAllConfirm}
-              >
-                <svg
-                  xmlns="http://www.w3.org/2000/svg"
-                  viewBox="0 0 16 16"
-                  width="16"
-                  height="16"
-                >
-                  <path
-                    class="fill-base-content"
-                    d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
-                  />
-                </svg>
-              </button>
-            </div>
-          </button>
-        {:else}
-          <button
-            on:click|preventDefault={toggleDeleteAllConfirm}
-            class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
-          >
+          <div class="h-6 flex flex-row items-center">
            <svg
              xmlns="http://www.w3.org/2000/svg"
              viewBox="0 0 16 16"
@ -463,14 +290,71 @@
              </path>
            </svg>
            <span>Clear Chats</span>
-          </button>
-        {/if}
+          </div>
+          <div class="h-6 flex flex-row items-center">
+            <button
+              name="confirm-delete"
+              class="btn-ghost btn-sm btn"
+              on:click|preventDefault={() => deleteAllChat()}
+            >
+              <svg
+                xmlns="http://www.w3.org/2000/svg"
+                viewBox="0 0 16 16"
+                width="16"
+                height="16"
+              >
+                <path
+                  class="fill-base-content"
+                  d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8Zm1.5 0a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm10.28-1.72-4.5 4.5a.75.75 0 0 1-1.06 0l-2-2a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018l1.47 1.47 3.97-3.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
+                />
+              </svg>
+            </button>
+            <button
+              name="cancel-delete"
+              class="btn-ghost btn-sm btn"
+              on:click|preventDefault={toggleDeleteAllConfirm}
+            >
+              <svg
+                xmlns="http://www.w3.org/2000/svg"
+                viewBox="0 0 16 16"
+                width="16"
+                height="16"
+              >
+                <path
+                  class="fill-base-content"
+                  d="M2.344 2.343h-.001a8 8 0 0 1 11.314 11.314A8.002 8.002 0 0 1 .234 10.089a8 8 0 0 1 2.11-7.746Zm1.06 10.253a6.5 6.5 0 1 0 9.108-9.275 6.5 6.5 0 0 0-9.108 9.275ZM6.03 4.97 8 6.94l1.97-1.97a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l1.97 1.97a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-1.97 1.97a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 4.97 6.03a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018Z"
+                />
+              </svg>
+            </button>
+          </div>
+        </button>
+      {:else}
+        <button
+          on:click|preventDefault={toggleDeleteAllConfirm}
+          class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
+        >
+          <svg
+            xmlns="http://www.w3.org/2000/svg"
+            viewBox="0 0 16 16"
+            width="18"
+            height="18"
+            fill="currentColor"
+            class="mr-3"
+          >
+            <path
+              d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
+            >
+            </path>
+          </svg>
+          <span>Clear Chats</span>
+        </button>
      {/if}
      <button
        on:click={toggleTheme}
        class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
      >
-        <label class="swap swap-rotate" for="theme-toggle">
+        <label class="swap swap-rotate">
+          <input type="checkbox" />
          <svg
            xmlns="http://www.w3.org/2000/svg"
            viewBox="0 0 16 16"
@ -500,12 +384,31 @@
        </label>
        <span>{theme == "dark" ? "Light" : "Dark"} theme</span>
      </button>
+      <a
+        href="/"
+        class="btn btn-ghost w-full flex justify-start items-center p-2.5 text-left text-sm capitalize"
+      >
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 16 16"
+          width="18"
+          height="18"
+          fill="currentColor"
+          class="mr-3"
+        >
+          <path
+            d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.029.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"
+          >
+          </path>
+        </svg>
+        <span>Settings</span>
+      </a>
    </div>
  </div>
 </aside>

-<button on:click={hideSidebar} type="button"></button>
-
-<div id="main_content" class="h-full w-full">
+<div
+  class={"relative h-full transition-all" + (bar_visible ? " md:ml-80" : "")}
+>
  <slot />
 </div>
--- a/web/src/routes/+layout.ts
+++ b/web/src/routes/+layout.ts
@ -1,47 +1,16 @@
 import type { LayoutLoad } from "./$types";

-interface ChatMetadata {
+interface t {
  id: string;
  created: string;
  model: string;
  subtitle: string;
 }

-export const ssr = false; // off for now because ssr with auth is broken
-
-export interface ModelStatus {
-  name: string;
-  size: number;
-  available: boolean;
-  progress?: number;
-}
-
-export interface User {
-  id: string;
-  username: string;
-  email: string;
-  pref_theme: "light" | "dark";
-  full_name: string;
-  default_prompt: string;
-}
-
 export const load: LayoutLoad = async ({ fetch }) => {
-  let userData: User | null = null;
-
-  const api_chat = await fetch("/api/chat/");
-  const chats = (await api_chat.json()) as ChatMetadata[];
-
-  const model_api = await fetch("/api/model/all");
-  const models = (await model_api.json()) as ModelStatus[];
-
-  const userData_api = await fetch("/api/user/");
-  if (userData_api.ok) {
-    userData = (await userData_api.json()) as User;
-  }
-
+  const r = await fetch("/api/chat/");
+  const chats = (await r.json()) as t[];
  return {
    chats,
-    models,
-    userData,
  };
 };
--- a/web/src/routes/+page.svelte
+++ b/web/src/routes/+page.svelte
@ -1,12 +1,16 @@
 <script lang="ts">
  import type { PageData } from "./$types";
  import { goto, invalidate } from "$app/navigation";
+  import { barVisible } from "$lib/stores";
+  import { onDestroy } from "svelte";
  export let data: PageData;

  const models = data.models.filter((el) => el.available);

  const modelAvailable = models.length > 0;
  const modelsLabels = models.map((el) => el.name);
+  let bar_visible: boolean;
+  const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));

  let temp = 0.1;
  let top_k = 50;
@ -17,10 +21,8 @@
  let repeat_penalty = 1.3;

  let init_prompt =
-    data.userData?.default_prompt ??
    "Below is an instruction that describes a task. Write a response that appropriately completes the request.";

-  let n_threads = 4;
  let context_window = 2048;
  let gpu_layers = 0;

@ -45,21 +47,43 @@
      await invalidate("/api/chat/");
    }
  }
+  function toggleBar() {
+    bar_visible = !bar_visible;
+    barVisible.set(bar_visible);
+  }
+  onDestroy(unsubscribe);
 </script>

+{#if !bar_visible}
+  <button
+    class="absolute p-0 top-1 left-2 md:left-16 h-10 w-10 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
+    on:click={toggleBar}
+  >
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      viewBox="0 0 24 24"
+      fill="currentColor"
+      class="w-4 h-4"
+    >
+      <path
+        d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
+      >
+      </path>
+      <path
+        d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
+      >
+      </path>
+    </svg>
+  </button>
+{/if}
 <div class="flex flex-col items-center justify-center pt-5">
  <h1 class="pb-2 text-3xl font-bold">Say Hi to Serge</h1>
 </div>
 <h1 class="pb-5 pt-2 text-center text-xl font-light">
-  An easy way to chat with LLaMA based models.
+  An easy way to chat with Alpaca & other LLaMA based models.
 </h1>

-<form
-  on:submit|preventDefault={onCreateChat}
-  id="form-create-chat"
-  class="p-5"
-  aria-label="Model Settings"
->
+<form on:submit|preventDefault={onCreateChat} id="form-create-chat" class="p-5">
  <div class="w-full pb-20">
    <div class="mx-auto w-fit pt-5 flex flex-col lg:flex-row justify-center">
      <button
@ -74,181 +98,161 @@
      >
    </div>
  </div>
-  <div class="flex justify-center">
-    <div class="grid grid-cols-3 gap-4 p-3 bg-base-200" id="model_settings">
-      <div class="col-span-3 text-xl font-medium">Model settings</div>
-      <div
-        class="tooltip tooltip-bottom col-span-2"
-        data-tip="Controls how random the generated text is. Higher temperatures lead to more random and creative text, while lower temperatures lead to more predictable and conservative text."
-      >
-        <label for="temperature" class="label-text"
-          >Temperature - [{temp}]</label
+
+  <div tabindex="-1" class="collapse-arrow rounded-box collapse bg-base-200">
+    <input type="checkbox" />
+    <div class="collapse-title text-xl font-medium">Model settings</div>
+    <div class="collapse-content">
+      <div class="grid grid-cols-3 gap-4 p-3">
+        <div
+          class="tooltip tooltip-bottom col-span-2"
+          data-tip="The higher the temperature, the more random the model output."
        >
-        <input
-          id="temperature"
-          name="temperature"
-          type="range"
-          bind:value={temp}
-          min="0.05"
-          max="2"
-          step="0.05"
-          class="range range-sm mt-auto"
-        />
-      </div>
-      <div
-        class="tooltip tooltip-bottom flex flex-col"
-        data-tip="Controls the number of tokens that are considered when generating the next token. Higher values of top_k lead to more predictable text, while lower values of top_k lead to more creative text."
-      >
-        <label for="top_k" class="label-text pb-1">top_k</label>
-        <input
-          id="top_k"
-          class="input-bordered input w-full"
-          name="top_k"
-          type="number"
-          bind:value={top_k}
-          min="0"
-          max="100"
-        />
-      </div>
-      <div
-        class="tooltip tooltip-bottom col-span-2"
-        data-tip="The maximum number of tokens that the model will generate. This parameter can be used to control the length of the generated text."
-      >
-        <label for="max_length" class="label-text"
-          >Maximum generated tokens - [{max_length}]</label
+          <label for="temperature" class="label-text"
+            >Temperature - [{temp}]</label
+          >
+          <input
+            name="temperature"
+            type="range"
+            bind:value={temp}
+            min="0.05"
+            max="2"
+            step="0.05"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="tooltip tooltip-bottom flex flex-col"
+          data-tip="The number of samples to consider for top_k sampling."
        >
-        <input
-          id="max_length"
-          name="max_length"
-          type="range"
-          bind:value={max_length}
-          min="32"
-          max="32768"
-          step="16"
-          class="range range-sm mt-auto"
-        />
-      </div>
-      <div
-        class="tooltip flex flex-col"
-        data-tip="Controls the diversity of the generated text. Higher values of top_p lead to more diverse text, while lower values of top_p lead to less diverse text."
-      >
-        <label for="top_p" class="label-text pb-1">top_p</label>
-        <input
-          class="input-bordered input w-full"
-          id="top_p"
-          name="top_p"
-          type="number"
-          bind:value={top_p}
-          min="0"
-          max="1"
-          step="0.025"
-        />
-      </div>
-      <div
-        class="tooltip col-span-2"
-        data-tip="The number of previous tokens that are considered when generating the next token. A longer context length can help the model to generate more coherent and informative text."
-      >
-        <label for="context_window" class="label-text"
-          >Context Length - [{context_window}]</label
+          <label for="top_k" class="label-text pb-1">top_k</label>
+          <input
+            class="input-bordered input w-full max-w-xs"
+            name="top_k"
+            type="number"
+            bind:value={top_k}
+            min="0"
+            max="100"
+          />
+        </div>
+        <div
+          class="tooltip tooltip-bottom col-span-2"
+          data-tip="Max text generated token"
        >
-        <input
-          id="context_window"
-          name="context_window"
-          type="range"
-          bind:value={context_window}
-          min="16"
-          max="2048"
-          step="16"
-          class="range range-sm mt-auto"
-        />
-      </div>
-      <div
-        class="tooltip col-span-2"
-        data-tip="Number of layers to put on the GPU. The rest will be on the CPU."
-      >
-        <label for="gpu_layers" class="label-text"
-          >GPU Layers - [{gpu_layers}]</label
+          <label for="max_length" class="label-text"
+            >Maximum generated tokens - [{max_length}]</label
+          >
+          <input
+            name="max_length"
+            type="range"
+            bind:value={max_length}
+            min="32"
+            max="32768"
+            step="16"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="tooltip flex flex-col"
+          data-tip="The cumulative probability of the tokens to keep for nucleus sampling."
        >
-        <input
-          id="gpu_layers"
-          name="gpu_layers"
-          type="range"
-          bind:value={gpu_layers}
-          min="0"
-          max="100"
-          step="1"
-          class="range range-sm mt-auto"
-        />
-      </div>
-      <div
-        class="tooltip flex flex-col"
-        data-tip="Defines the penalty associated with repeating the last 'n' tokens in a generated text sequence."
-      >
-        <label for="repeat_last_n" class="label-text pb-1">repeat_last_n</label>
-        <input
-          id="repeat_last_n"
-          class="input-bordered input w-full"
-          name="repeat_last_n"
-          type="number"
-          bind:value={repeat_last_n}
-          min="0"
-          max="100"
-        />
-      </div>
-      <div class="flex flex-col">
-        <label for="model" class="label-text pb-1"> Model choice</label>
-        <select
-          name="model"
-          id="models"
-          class="select-bordered select w-full"
-          aria-haspopup="menu"
+          <label for="top_p" class="label-text pb-1">top_p</label>
+          <input
+            class="input-bordered input w-full max-w-xs"
+            name="top_p"
+            type="number"
+            bind:value={top_p}
+            min="0"
+            max="1"
+            step="0.025"
+          />
+        </div>
+        <div
+          class="tooltip col-span-2"
+          data-tip="Size of the prompt context. Will determine how far the model will read back. Increases memory consumption."
        >
-          {#each modelsLabels as model}
-            <option id={model} value={model}>{model}</option>
-          {/each}
-        </select>
-      </div>
-      <div
-        class="tooltip flex flex-col"
-        data-tip="Number of threads to run LLaMA on."
-      >
-        <label for="n_threads" class="label-text pb-1">n_threads</label>
-        <input
-          id="n_threads"
-          class="input-bordered input w-full"
-          name="n_threads"
-          type="number"
-          bind:value={n_threads}
-          min="0"
-          max="64"
-        />
-      </div>
-      <div
-        class="tooltip flex flex-col"
-        data-tip="Defines the penalty assigned to the model when it repeats certain tokens or patterns in the generated text."
-      >
-        <label for="repeat_penalty" class="label-text pb-1">
-          repeat_penalty
-        </label>
-        <input
-          id="repeat_penalty"
-          class="input-bordered input w-full"
-          name="repeat_penalty"
-          type="number"
-          bind:value={repeat_penalty}
-          min="0"
-          max="2"
-          step="0.05"
-        />
-      </div>
-      <div class="col-span-3 flex flex-col">
-        <label for="init_prompt" class="label-text pb-1">Prompt Template</label>
-        <textarea
-          class="textarea-bordered textarea h-24 w-full"
-          name="init_prompt"
-          bind:value={init_prompt}
-          placeholder="Enter your prompt here"
-        />
+          <label for="context_window" class="label-text"
+            >Context Length - [{context_window}]</label
+          >
+          <input
+            name="context_window"
+            type="range"
+            bind:value={context_window}
+            min="16"
+            max="2048"
+            step="16"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="tooltip col-span-2"
+          data-tip="Number of layers to put on the GPU. The rest will be on the CPU."
+        >
+          <label for="gpu_layers" class="label-text"
+            >GPU Layers - [{gpu_layers}]</label
+          >
+          <input
+            name="gpu_layers"
+            type="range"
+            bind:value={gpu_layers}
+            min="0"
+            max="100"
+            step="1"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="tooltip flex flex-col"
+          data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
+        >
+          <label for="repeat_last_n" class="label-text pb-1"
+            >repeat_last_n</label
+          >
+          <input
+            class="input-bordered input w-full max-w-xs"
+            name="repeat_last_n"
+            type="number"
+            bind:value={repeat_last_n}
+            min="0"
+            max="100"
+          />
+        </div>
+        <div class="flex flex-col">
+          <label for="model" class="label-text pb-1"> Model choice</label>
+          <select name="model" class="select-bordered select w-full max-w-xs">
+            {#each modelsLabels as model}
+              <option value={model}>{model}</option>
+            {/each}
+          </select>
+        </div>
+        <div
+          class="tooltip flex flex-col"
+          data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens."
+        >
+          <label for="repeat_penalty" class="label-text pb-1">
+            repeat_penalty
+          </label>
+          <input
+            class="input-bordered input w-full max-w-xs"
+            name="repeat_penalty"
+            type="number"
+            bind:value={repeat_penalty}
+            min="0"
+            max="2"
+            step="0.05"
+          />
+        </div>
+        <div class="col-span-3 flex flex-col">
+          <label for="init_prompt" class="label-text pb-1"
+            >Pre-Prompt for initializing a conversation.</label
+          >
+          <textarea
+            class="textarea-bordered textarea h-24 w-full"
+            name="init_prompt"
+            bind:value={init_prompt}
+            placeholder="Enter your prompt here"
+          />
+        </div>
      </div>
    </div>
  </div>
--- a/web/src/routes/+page.ts
+++ b/web/src/routes/+page.ts
@ -1,6 +1,6 @@
 import type { PageLoad } from "./$types";

-export interface ModelStatus {
+interface ModelStatus {
  name: string;
  size: number;
  available: boolean;
@ -8,8 +8,8 @@ export interface ModelStatus {
 }

 export const load: PageLoad = async ({ fetch }) => {
-  const api_model = await fetch("/api/model/all");
-  const models = (await api_model.json()) as ModelStatus[];
+  const r = await fetch("/api/model/all");
+  const models = (await r.json()) as ModelStatus[];
  return {
    models,
  };
--- a/web/src/routes/account/+page.svelte
+++ b/web/src/routes/account/+page.svelte
@ -1,106 +0,0 @@
-<script context="module" lang="ts">
-  export { load } from "./+page";
-</script>
-
-<script lang="ts">
-  import { writable } from "svelte/store";
-  import { goto } from "$app/navigation";
-  export let data: {
-    user: {
-      id: string;
-      username: string;
-      email: string;
-      full_name: string;
-      pref_theme: "light" | "dark";
-      default_prompt: string;
-    } | null;
-  };
-  let user = data.user;
-  let id: string = user?.id ?? "";
-  let username: string = user?.username ?? "";
-  let email: string = user?.email ?? "";
-  let full_name: string = user?.full_name ?? "";
-  let pref_theme: "light" | "dark" = user?.pref_theme ?? "light";
-  let default_prompt: string = user?.default_prompt ?? "";
-  let status = writable<string | null>(null);
-
-  async function handleSubmit(event: Event) {
-    event.preventDefault();
-    // Implement the update logic here, e.g., sending a PUT request to update user preferences
-    try {
-      await fetch("/api/user/", {
-        method: "PUT",
-        headers: {
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          id,
-          username,
-          email,
-          full_name,
-          pref_theme,
-          default_prompt,
-        }),
-      });
-
-      status.set("Preferences updated successfully");
-      goto("/", { invalidateAll: true });
-    } catch (error) {
-      if (error instanceof Error) {
-        status.set(error.message);
-      } else {
-        status.set("Failed to update preferences");
-      }
-    }
-  }
-</script>
-
-<main>
-  <div class="card-group">
-    <div class="card">
-      <div class="card-title p-3 text-3xl justify-center font-bold">
-        User Preferences
-      </div>
-      <div class="card-body">
-        {#if user}
-          <form on:submit={handleSubmit}>
-            <div class="input-group">
-              <div class="input-group-prepend">
-                <span class="input-group-text">Username</span>
-              </div>
-              <input type="text" bind:value={username} disabled />
-            </div>
-            <div class="input-group">
-              <div class="input-group-prepend">
-                <span class="input-group-text">Full Name</span>
-              </div>
-              <input id="full_name" type="text" bind:value={full_name} />
-            </div>
-            <div class="input-group">
-              <div class="input-group-prepend">
-                <span class="input-group-text">Email</span>
-              </div>
-              <input id="email" type="email" bind:value={email} />
-            </div>
-            <div class="input-group">
-              <div class="input-group-prepend">
-                <span class="input-group-text">Default Prompt</span>
-              </div>
-              <textarea
-                id="default_prompt"
-                bind:value={default_prompt}
-                style="resize:both; width:100%;"
-              />
-            </div>
-            {#if $status}
-              <p>{$status}</p>
-            {/if}
-            <button class="btn" type="submit">Save Preferences</button>
-          </form>
-        {:else}
-          <p>Loading...</p>
-        {/if}
-      </div>
-    </div>
-  </div>
-</main>
--- a/web/src/routes/account/+page.ts
+++ b/web/src/routes/account/+page.ts
@ -1,27 +0,0 @@
-import type { Load } from "@sveltejs/kit";
-
-interface User {
-  id: string;
-  username: string;
-  email: string;
-  pref_theme: "light" | "dark";
-  full_name: string;
-  default_prompt: string;
-}
-
-export const load: Load = async () => {
-  const user = await fetch("/api/user/", {
-    method: "GET",
-  })
-    .then((response) => {
-      if (response.status == 401) {
-        window.location.href = "/";
-      }
-      return response.json();
-    })
-    .catch((error) => {
-      console.log(error);
-      window.location.href = "/";
-    });
-  return { user };
-};
--- a/web/src/routes/chat/[id]/+page.svelte
+++ b/web/src/routes/chat/[id]/+page.svelte
@ -2,7 +2,7 @@
  import type { PageData } from "./$types";
  import { invalidate, goto } from "$app/navigation";
  import { page } from "$app/stores";
-  import { newChat, themeStore } from "$lib/stores";
+  import { barVisible, newChat, themeStore } from "$lib/stores";
  import { onMount, onDestroy } from "svelte";
  import ClipboardJS from "clipboard";
  import hljs from "highlight.js";
@ -11,6 +11,7 @@
  import css from "highlight.js/lib/languages/css";
  import cpp from "highlight.js/lib/languages/cpp";
  import dockerfile from "highlight.js/lib/languages/dockerfile";
+  import graphql from "highlight.js/lib/languages/graphql";
  import go from "highlight.js/lib/languages/go";
  import javascript from "highlight.js/lib/languages/javascript";
  import json from "highlight.js/lib/languages/json";
@ -31,6 +32,7 @@
  hljs.registerLanguage("bash", bash);
  hljs.registerLanguage("css", css);
  hljs.registerLanguage("cpp", cpp);
+  hljs.registerLanguage("graphql", graphql);
  hljs.registerLanguage("dockerfile", dockerfile);
  hljs.registerLanguage("go", go);
  hljs.registerLanguage("javascript", javascript);
@ -59,6 +61,8 @@
    messageContainer.scrollBottom = messageContainer.scrollHeight;
  }
  let prompt = "";
+  let bar_visible: boolean;
+  const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));

  async function askQuestion() {
    const data = new URLSearchParams();
@ -102,7 +106,10 @@
    });

    eventSource.onerror = async (error) => {
+      console.log("error", error);
      eventSource.close();
+      //history[history.length - 1].data.content = "A server error occurred.";
+      //await invalidate("/api/chat/" + $page.params.id);
    };
  }

@ -118,7 +125,7 @@
      `/api/chat/?model=${data.chat.params.model_path}&temperature=${data.chat.params.temperature}&top_k=${data.chat.params.top_k}` +
        `&top_p=${data.chat.params.top_p}&max_length=${data.chat.params.max_tokens}&context_window=${data.chat.params.n_ctx}` +
        `&repeat_last_n=${data.chat.params.last_n_tokens_size}&repeat_penalty=${data.chat.params.repeat_penalty}` +
-        `&n_threads=${data.chat.params.n_threads}&init_prompt=${data.chat.history[0].data.content}` +
+        `&init_prompt=${data.chat.history[0].data.content}` +
        `&gpu_layers=${data.chat.params.n_gpu_layers}`,

      {
@ -127,23 +134,17 @@
          accept: "application/json",
        },
      },
-    )
-      .then((response) => {
-        if (response.status == 401) {
-          console.log("Not authorized");
-          window.location.href = "/";
-        } else {
-          return response.json();
-        }
-      })
-      .catch((error) => {
-        console.log(error);
-        window.location.href = "/";
-      });
+    ).then((response) => response.json());
    await invalidate("/api/chat/");
    await goto("/chat/" + newData);
  }

+  document.addEventListener("keydown", async (event) => {
+    if (event.key === "n" && event.altKey) {
+      await createSameSession();
+    }
+  });
+
  async function deletePrompt(chatID: string, idx: number) {
    const response = await fetch(
      `/api/chat/${chatID}/prompt?idx=${idx.toString()}`,
@ -152,36 +153,11 @@

    if (response.status === 200) {
      await invalidate("/api/chat/" + $page.params.id);
-    } else if (response.status === 202) {
-      showToast("Chat in progress!");
-    } else if (response.status === 401) {
-      window.location.href = "/";
    } else {
-      showToast("An error occurred: " + response.statusText);
+      console.error("Error " + response.status + ": " + response.statusText);
    }
  }

-  function showToast(message: string) {
-    // Create the toast element
-    const toast = document.createElement("div");
-    toast.className = `alert alert-info`;
-    toast.textContent = message;
-    const toastContainer = document.getElementById("toast-container");
-
-    // Append the toast to the toast container if it exists
-    if (toastContainer) {
-      toastContainer.appendChild(toast);
-    } else {
-      console.error("Toast container not found?");
-      return;
-    }
-
-    // Automatically remove the toast after a delay
-    setTimeout(() => {
-      toast.remove();
-    }, 3000);
-  }
-
  const md: MarkdownIt = new MarkdownIt({
    html: true,
    linkify: true,
@ -249,12 +225,6 @@
    themeStore.subscribe((newTheme) => {
      updateThemeStyle(newTheme);
    });
-
-    document.addEventListener("keydown", async (event) => {
-      if (event.key === "n" && event.altKey) {
-        await createSameSession();
-      }
-    });
  });

  function updateThemeStyle(currentTheme: string) {
@ -273,6 +243,10 @@
  const onMouseLeave = () => {
    sendBottomHovered = false;
  };
+  const toggleBar = () => {
+    bar_visible = !bar_visible;
+    barVisible.set(bar_visible);
+  };
  const scrollToBottom = (node: Element, history: any[]) => {
    const scroll = () =>
      node.scroll({
@ -284,22 +258,41 @@
    return { update: scroll };
  };
  onDestroy(() => {
+    unsubscribe;
    styleElement && styleElement.remove();
  });
 </script>

 <!-- svelte-ignore a11y-no-static-element-interactions -->
 <div
-  class="relative h-full max-h-screen overflow-hidden"
+  class="relative mx-auto h-full max-h-screen w-full overflow-hidden"
  on:keydown={handleKeyDown}
 >
-  <div class="mx-20">
-    <div class="h-8 justify-content border-b border-base-content/[.2]">
-      <div class="h-full relative flex items-center justify-center">
-        <div
-          class="flex flex-row items-center justify-center color-base-300"
-          title="Model"
-        >
+  <div class="w-full border-b border-base-content/[.2]">
+    <div class="h-8 px-2 md:container md:mx-auto md:px-0">
+      <div class="w-full h-full relative flex items-center justify-center">
+        {#if !bar_visible}
+          <button
+            class="absolute p-0 top-0 bottom-0 left-0 w-10 h-8 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
+            on:click={toggleBar}
+          >
+            <svg
+              xmlns="http://www.w3.org/2000/svg"
+              viewBox="0 0 24 24"
+              class="w-4 h-4 fill-base-content"
+            >
+              <path
+                d="M7.22 14.47 9.69 12 7.22 9.53a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l3 3a.75.75 0 0 1 0 1.06l-3 3a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Z"
+              >
+              </path>
+              <path
+                d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
+              >
+              </path>
+            </svg>
+          </button>
+        {/if}
+        <div class="flex flex-row items-center justify-center color-base-300">
          <svg
            xmlns="http://www.w3.org/2000/svg"
            viewBox="0 0 11.12744 16"
@ -314,10 +307,7 @@
            {data.chat.params.model_path}
          </span>
        </div>
-        <div
-          class="pl-4 hidden sm:flex flex-row items-center justify-center"
-          title="Temperature"
-        >
+        <div class="pl-4 hidden sm:flex flex-row items-center justify-center">
          <svg
            xmlns="http://www.w3.org/2000/svg"
            viewBox="0 0 5.31286 16"
@ -332,10 +322,7 @@
            {data.chat.params.temperature}
          </span>
        </div>
-        <div
-          class="pl-4 hidden sm:flex flex-row items-center justify-center"
-          title="Context Length/Maximum Generated Tokens"
-        >
+        <div class="pl-4 hidden sm:flex flex-row items-center justify-center">
          <svg
            xmlns="http://www.w3.org/2000/svg"
            viewBox="0 0 16 16"
@ -350,35 +337,8 @@
            {data.chat.params.n_ctx}/{data.chat.params.max_tokens}
          </span>
        </div>
-        {#if data.chat.params.n_threads > 0}
-          <div
-            class="pl-4 hidden sm:flex flex-row items-center justify-center"
-            title="Threads"
-          >
-            <svg
-              xmlns="http://www.w3.org/2000/svg"
-              fill="none"
-              viewBox="0 0 24 24"
-              stroke-width="1.5"
-              stroke="currentColor"
-              class="w-4 h-4"
-            >
-              <path
-                stroke-linecap="round"
-                stroke-linejoin="round"
-                d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 002.25-2.25V6.75a2.25 2.25 0 00-2.25-2.25H6.75A2.25 2.25 0 004.5 6.75v10.5a2.25 2.25 0 002.25 2.25zm.75-12h9v9h-9v-9z"
-              />
-            </svg>
-            <span class="ml-2 inline-block text-center text-sm font-semibold">
-              {data.chat.params.n_threads}
-            </span>
-          </div>
-        {/if}
        {#if data.chat.params.n_gpu_layers > 0}
-          <div
-            class="pl-4 hidden sm:flex flex-row items-center justify-center"
-            title="GPU Layers"
-          >
+          <div class="pl-4 hidden sm:flex flex-row items-center justify-center">
            <svg
              xmlns="http://www.w3.org/2000/svg"
              viewBox="0 0 16 16"
@ -394,10 +354,7 @@
            </span>
          </div>
        {/if}
-        <div
-          class="pl-4 hidden sm:flex flex-row items-center justify-center"
-          title="Repeat Penalty"
-        >
+        <div class="pl-4 hidden sm:flex flex-row items-center justify-center">
          <svg
            xmlns="http://www.w3.org/2000/svg"
            viewBox="0 0 16 16"
@ -416,10 +373,7 @@
            {data.chat.params.repeat_penalty}
          </span>
        </div>
-        <div
-          class="pl-4 hidden sm:flex flex-row items-center justify-center"
-          title="Top_k-Top_p"
-        >
+        <div class="pl-4 hidden sm:flex flex-row items-center justify-center">
          <svg
            xmlns="http://www.w3.org/2000/svg"
            viewBox="0 0 16 16"
@ -444,46 +398,49 @@
    <div class="h-max pb-4">
      {#each history as question, i}
        {#if question.type === "human"}
-          <div class="w-10/12 mx-auto sm:w-10/12 chat chat-end py-4">
-            <div class="chat-image self-start pl-1 pt-1">
-              <div
-                class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
-              >
-                <span class="text-xs text-neutral-content">I</span>
-              </div>
-            </div>
-            <div
-              class="chat-bubble whitespace-normal break-words bg-base-300 text-base font-light text-base-content"
-            >
-              <!-- {question.data.content} -->
-              <div class="w-full overflow-hidden break-words">
-                {@html renderMarkdown(question.data.content)}
-              </div>
-            </div>
-            {#if i === history.length - 1 && !isLoading}
-              <div style="width: 100%; text-align: right;">
-                <button
-                  disabled={isLoading}
-                  class="btn-ghost btn-sm btn"
-                  on:click|preventDefault={() => deletePrompt(data.chat.id, i)}
+          <div class="w-full border-y border-base-content/[.2] bg-base-300">
+            <div class="w-11/12 mx-auto sm:w-10/12 chat chat-start py-4">
+              <div class="chat-image self-start pl-1 pt-1">
+                <div
+                  class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
                >
-                  <svg
-                    xmlns="http://www.w3.org/2000/svg"
-                    viewBox="0 0 16 16"
-                    width="16"
-                    height="16"
-                  >
-                    <path
-                      class="fill-base-content"
-                      d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
-                    />
-                  </svg>
-                </button>
+                  <span class="text-xs text-neutral-content">I</span>
+                </div>
              </div>
-            {/if}
+              <div
+                class="chat-bubble whitespace-normal break-words bg-base-300 text-base font-light text-base-content"
+              >
+                <!-- {question.data.content} -->
+                <div class="w-full overflow-hidden break-words">
+                  {@html renderMarkdown(question.data.content)}
+                </div>
+              </div>
+              {#if i === history.length - 1 && !isLoading}
+                <div style="width: 100%; text-align: right;">
+                  <button
+                    disabled={isLoading}
+                    class="btn-ghost btn-sm btn"
+                    on:click|preventDefault={() =>
+                      deletePrompt(data.chat.id, i)}
+                  >
+                    <svg
+                      xmlns="http://www.w3.org/2000/svg"
+                      viewBox="0 0 16 16"
+                      width="16"
+                      height="16"
+                    >
+                      <path
+                        class="fill-base-content"
+                        d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
+                      />
+                    </svg>
+                  </button>
+                </div>
+              {/if}
+            </div>
          </div>
        {:else if question.type === "ai"}
-          <div class="w-10/12 mx-auto sm:w-10/12 chat chat-start py-4">
+          <div class="w-11/12 mx-auto sm:w-10/12 chat chat-start py-4">
            <div class="chat-image self-start pl-1 pt-1">
              <div
                class="mask mask-squircle online flex aspect-square w-8 items-center justify-center overflow-hidden bg-gradient-to-b from-primary to-primary-focus"
@ -524,7 +481,6 @@
                      d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"
                    />
                  </svg>
-                  <span class="sr-only">Delete</span>
                </button>
              </div>
            {/if}
@ -564,7 +520,7 @@
        class="btn btn-ghost h-10 w-14 rounded-l-none rounded-r-lg border-0 text-lg"
        class:loading={isLoading}
        on:click|preventDefault={askQuestion}
-        ><span class="sr-only">Send</span>
+      >
        <svg
          xmlns="http://www.w3.org/2000/svg"
          viewBox="0 0 16 16"
@ -580,7 +536,4 @@
      </button>
    </div>
  </div>
-  <div id="toast-container" class="toast">
-    <!-- Toast notifications will be added here -->
-  </div>
 </div>
--- a/web/src/routes/chat/[id]/+page.ts
+++ b/web/src/routes/chat/[id]/+page.ts
@ -15,7 +15,6 @@ interface Params {
  model_path: string;
  n_ctx: number;
  n_gpu_layers: number;
-  n_threads: number;
  last_n_tokens_size: number;
  max_tokens: number;
  temperature: number;
@ -28,22 +27,12 @@ interface Response {
  id: string;
  created: string;
  params: Params;
-  owner: string;
  history: Message[];
 }

 export const load: PageLoad = async ({ fetch, params }) => {
-  const data = await fetch("/api/chat/" + params.id)
-    .then((response) => {
-      if (response.status == 401) {
-        window.location.href = "/";
-      }
-      return response.json();
-    })
-    .catch((error) => {
-      console.log(error);
-      window.location.href = "/";
-    });
+  const r = await fetch("/api/chat/" + params.id);
+  const data = (await r.json()) as Response;

  return {
    chat: data,
--- a/web/src/routes/login/+page.svelte
+++ b/web/src/routes/login/+page.svelte
@ -1,69 +0,0 @@
-<script lang="ts">
-  import { goto } from "$app/navigation";
-  import { writable } from "svelte/store";
-
-  let username = "";
-  let password = "";
-  let error = writable<string | null>(null);
-
-  async function handleSubmit(event: Event) {
-    event.preventDefault();
-    try {
-      const response = await fetch("/api/auth/token", {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/x-www-form-urlencoded",
-        },
-        body: new URLSearchParams({
-          username,
-          password,
-        }),
-      });
-
-      if (response.ok) {
-        const data = await response.json();
-        localStorage.setItem("token", data.access_token);
-        goto("/", { invalidateAll: true });
-      } else {
-        const errorData = await response.json();
-        error.set(errorData.detail || "Login failed");
-      }
-    } catch (err) {
-      error.set("An error occurred");
-    }
-  }
-</script>
-
-<main>
-  <div class="card-group">
-    <div class="card">
-      <div class="card-title p-3 text-3xl justify-center font-bold">
-        Sign In
-      </div>
-      <div class="card-body">
-        <form on:submit={handleSubmit}>
-          <div class="form-control">
-            <input
-              type="text"
-              placeholder="Username"
-              bind:value={username}
-              required
-            />
-          </div>
-          <div class="form-control">
-            <input
-              type="password"
-              placeholder="Password"
-              bind:value={password}
-              required
-            />
-          </div>
-          {#if $error}
-            <p style="color: red;">{$error}</p>
-          {/if}
-          <button class="btn" type="submit">Authenticate</button>
-        </form>
-      </div>
-    </div>
-  </div>
-</main>
--- a/web/src/routes/models/+page.svelte
+++ b/web/src/routes/models/+page.svelte
@ -1,359 +1,166 @@
 <script lang="ts">
  import { invalidate } from "$app/navigation";
-  import type { ModelStatus } from "../+page";
  import type { PageData } from "./$types";
-  import Icon from "@iconify/svelte";
-  import { onMount } from "svelte";
+  import RefreshModal from "../../lib/components/models/RefreshModal.svelte";
+  import { barVisible } from "$lib/stores";
+  import { onDestroy } from "svelte";

  export let data: PageData;
-  let searchQuery = "";
-  let selectedVariant: Record<string, string> = {};

-  // Add a reactive statement to keep track of downloading models
-  $: downloadingModels = new Set(
-    data.models
-      .filter(
-        (model) =>
-          (model.progress > 0 && model.progress < 100) || !model.available,
-      )
-      .map((model) => model.name),
-  );
-
-  function onComponentMount() {
-    const downloadingModelsArray = JSON.parse(
-      localStorage.getItem("downloadingModels") || "[]",
-    );
-    downloadingModelsArray.forEach((model: string) => {
-      downloadingModels.add(model);
-      checkDownloadProgress(model);
-    });
-  }
-
-  onMount(() => {
-    onComponentMount();
-  });
-
-  /**
-   * Handles the fetching the status of an active download
-   * @param modelName - The model name.
-   */
-  async function fetchDownloadProgress(modelName: string) {
-    const response = await fetch(`/api/model/${modelName}/download/status`);
-    if (response.ok) {
-      const progress = await response.text();
-      const progressNumber = parseFloat(progress);
-      const modelIndex = data.models.findIndex((m) => m.name === modelName);
-
-      if (modelIndex !== -1) {
-        data.models[modelIndex].progress = progressNumber;
-        data.models = [...data.models]; // enable reactivity
-      }
-      return progressNumber;
-    }
-    return 0;
-  }
-
-  function startDownload(modelName: string) {
-    const currentDownloads = JSON.parse(
-      localStorage.getItem("downloadingModels") || "[]",
-    );
-    if (!currentDownloads.includes(modelName)) {
-      currentDownloads.push(modelName);
-      localStorage.setItem(
-        "downloadingModels",
-        JSON.stringify(currentDownloads),
-      );
-    }
-    downloadingModels.add(modelName);
-    checkDownloadProgress(modelName);
-  }
-
-  /**
-   * Debounce function to limit how often a function can be called.
-   * @param func - The function to be debounced.
-   * @param wait - The time to wait in milliseconds.
-   * @returns A debounced version of the given function.
-   */
-  function debounce(func: (...args: any[]) => void, wait: number) {
-    let timeout: ReturnType<typeof setTimeout>;
-    return function (...args: any[]) {
-      const later = () => {
-        clearTimeout(timeout);
-        func(...args);
-      };
-      clearTimeout(timeout);
-      timeout = setTimeout(later, wait);
-    };
-  }
-
-  // Update search query with debounce to improve performance
-  const updateSearch = debounce((query: string) => {
-    searchQuery = query;
-  }, 300);
-
-  /**
-   * Wrapper function for fetch to include invalidate call on successful response.
-   * @param url - The URL to fetch.
-   * @param options - Fetch request options.
-   * @returns The fetch response.
-   */
-  async function fetchWithInvalidate(url: string, options: any) {
-    const response = await fetch(url, options);
-    if (response.ok) {
+  let downloading = false;
+  let bar_visible: boolean;
+  const unsubscribe = barVisible.subscribe((value) => (bar_visible = value));
+  console.log(data);
+  setInterval(async () => {
+    if (downloading) {
      await invalidate("/api/model/all");
    }
-    return response;
-  }
+  }, 2500);

-  /**
-   * Truncates a string to the specified length and appends an ellipsis.
-   * @param str - The string to truncate.
-   * @param maxLength - The maximum length of the truncated string.
-   * @returns The truncated string with an ellipsis if needed.
-   */
-  function truncateString(str: string, maxLength: number): string {
-    return str.length > maxLength
-      ? str.substring(0, maxLength - 1) + "..."
-      : str;
-  }
-
-  /**
-   * Handles the action (download/delete) on a model.
-   * @param model - The model name.
-   * @param isAvailable - Boolean indicating if the model is available.
-   */
-  async function handleModelAction(
-    model: string,
-    isAvailable: boolean,
-    isDownloading: boolean = false,
-  ) {
-    if (isDownloading) {
-      await cancelDownload(model);
+  async function onClick(model: string) {
+    if (downloading) {
      return;
    }
-    const url = `/api/model/${model}${isAvailable ? "" : "/download"}`;
-    const method = isAvailable ? "DELETE" : "POST";

-    console.log("Before fetch invalidate");
-    fetchWithInvalidate(url, { method }).then((response) => {
-      console.log(`After fetch for ${url}`);
+    downloading = true;
+    const r = await fetch(`/api/model/${model}/download`, {
+      method: "POST",
    });

-    if (method === "POST") {
-      // Start tracking download progress for the model
-      console.log(`Calling startDownload() for ${model}`);
-      startDownload(model);
+    if (r.ok) {
+      await invalidate("/api/model/all");
+    }
+    downloading = false;
+  }
+
+  async function deleteModel(model: string) {
+    const r = await fetch(`/api/model/${model}`, {
+      method: "DELETE",
+    });
+
+    if (r.ok) {
+      await invalidate("/api/model/all");
    }
  }
-
-  // Function to periodically check download progress for a model
-  async function checkDownloadProgress(modelName: string) {
-    let progress = await fetchDownloadProgress(modelName);
-    console.log(`Download status for ${modelName} ${progress}/100.0%`);
-
-    // Continue checking until progress reaches 100
-    if (progress < 100) {
-      setTimeout(() => checkDownloadProgress(modelName), 1500);
-    } else {
-      // Stop tracking the model once download is complete
-      console.log(`Stopping tracker for ${modelName}`);
-      const currentDownloads = JSON.parse(
-        localStorage.getItem("downloadingModels") || "[]",
-      );
-      const updatedDownloads = currentDownloads.filter(
-        (model: string) => model !== modelName,
-      );
-      localStorage.setItem(
-        "downloadingModels",
-        JSON.stringify(updatedDownloads),
-      );
-      downloadingModels.delete(modelName);
-    }
-  }
-
-  /**
-   * Groups models by their prefix.
-   * @param models - Array of ModelStatus objects.
-   * @returns An object grouping models by their prefix.
-   */
-  function groupModelsByPrefix(
-    models: ModelStatus[],
-  ): Record<string, ModelStatus[]> {
-    return models.reduce(
-      (acc, model) => {
-        const prefix = model.name.split("-")[0];
-        acc[prefix] = acc[prefix] || [];
-        acc[prefix].push(model);
-        return acc;
-      },
-      {} as Record<string, ModelStatus[]>,
-    );
-  }
-
-  /**
-   * Handles change in variant selection for a model.
-   * @param modelPrefix - The prefix of the model.
-   * @param event - The change event.
-   */
-  function handleVariantChange(modelPrefix: string, event: Event) {
-    const target = event.target as HTMLSelectElement;
-    selectedVariant[modelPrefix] = target.value;
-  }
-
-  /**
-   * Retrieves model details based on the selected variant or default.
-   * @param models - Array of ModelStatus objects.
-   * @param prefix - The prefix of the model group.
-   * @returns The selected or default ModelStatus object.
-   */
-  function getModelDetails(models: ModelStatus[], prefix: string): ModelStatus {
-    return models.find((m) => m.name === selectedVariant[prefix]) || models[0];
-  }
-
-  // Reactive statements to filter and group models based on search query
-  $: filteredModels = data.models
-    .filter(
-      (model) =>
-        !downloadedOrDownloadingModels.includes(model) &&
-        model.name.toLowerCase().includes(searchQuery.toLowerCase()),
-    )
-    .sort((a, b) => a.name.localeCompare(b.name));
-
-  // Reactive statement with models grouped by prefix
-  $: groupedModels = groupModelsByPrefix(filteredModels);
-
-  // Reactive statement to filter models that are downloaded or downloading
-  $: downloadedOrDownloadingModels = data.models
-    .filter((model) => model.progress > 0 || model.available)
-    .sort((a, b) => a.name.localeCompare(b.name));
-
-  async function cancelDownload(modelName: string) {
-    try {
-      const response = await fetch(`/api/model/${modelName}/download/cancel`, {
-        method: "POST",
-      });
-
-      if (response.ok) {
-        console.log(`Download for ${modelName} cancelled successfully.`);
-        // Update UI based on successful cancellation
-        const modelIndex = data.models.findIndex((m) => m.name === modelName);
-        if (modelIndex !== -1) {
-          data.models[modelIndex].progress = 0;
-          data.models[modelIndex].available = false;
-          data.models = [...data.models]; // trigger reactivity
-        }
-
-        // Remove model from tracking and local storage
-        downloadingModels.delete(modelName);
-        const currentDownloads = JSON.parse(
-          localStorage.getItem("downloadingModels") || "[]",
-        );
-        const updatedDownloads = currentDownloads.filter(
-          (model: string) => model !== modelName,
-        );
-        localStorage.setItem(
-          "downloadingModels",
-          JSON.stringify(updatedDownloads),
-        );
-      } else {
-        console.error(`Failed to cancel download for ${modelName}`);
-      }
-    } catch (error) {
-      console.error(`Error cancelling download for ${modelName}:`, error);
-    }
+  function toggleBar() {
+    bar_visible = !bar_visible;
+    barVisible.set(bar_visible);
  }
+  onDestroy(unsubscribe);
 </script>

-<div class="ml-12 pt-1">
-  <div class="search-row">
-    <input
-      type="text"
-      bind:value={searchQuery}
-      class="input input-bordered flex-grow"
-      placeholder="Search models..."
-      on:input={(e) => {
-        const target = e.target;
-        if (target instanceof HTMLInputElement) {
-          updateSearch(target.value);
-        }
-      }}
+{#if !bar_visible}
+  <button
+    class="absolute p-0 top-1 left-2 md:left-16 h-10 w-10 min-h-0 btn btn-ghost flex items-center justify-center font-semibold z-40"
+    on:click={toggleBar}
+  >
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      viewBox="0 0 24 24"
+      fill="currentColor"
+      class="w-4 h-4"
+    >
+      <path
+        d="M11.28 9.53 8.81 12l2.47 2.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-3-3a.75.75 0 0 1 0-1.06l3-3a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734Z"
+      >
+      </path>
+      <path
+        d="M3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25V3.75C2 2.784 2.784 2 3.75 2ZM3.5 3.75v16.5c0 .138.112.25.25.25H15v-17H3.75a.25.25 0 0 0-.25.25Zm13 16.75h3.75a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25H16.5Z"
+      >
+      </path>
+    </svg>
+  </button>
+{/if}
+<div class="flex flex-row items-center justify-center pt-5">
+  <svg
+    xmlns="http://www.w3.org/2000/svg"
+    viewBox="0 0 16 16"
+    width="24"
+    height="24"
+  >
+    <path
+      class="fill-warning"
+      d="M9.504.43a1.516 1.516 0 0 1 2.437 1.713L10.415 5.5h2.123c1.57 0 2.346 1.909 1.22 3.004l-7.34 7.142a1.249 1.249 0 0 1-.871.354h-.302a1.25 1.25 0 0 1-1.157-1.723L5.633 10.5H3.462c-1.57 0-2.346-1.909-1.22-3.004L9.503.429Zm1.047 1.074L3.286 8.571A.25.25 0 0 0 3.462 9H6.75a.75.75 0 0 1 .694 1.034l-1.713 4.188 6.982-6.793A.25.25 0 0 0 12.538 7H9.25a.75.75 0 0 1-.683-1.06l2.008-4.418.003-.006a.036.036 0 0 0-.004-.009l-.006-.006-.008-.001c-.003 0-.006.002-.009.004Z"
    />
+  </svg>
+  <h1 class="px-2 text-center text-3xl font-bold">Download a model</h1>
+  <svg
+    xmlns="http://www.w3.org/2000/svg"
+    viewBox="0 0 16 16"
+    width="24"
+    height="24"
+  >
+    <path
+      class="fill-warning"
+      d="M9.504.43a1.516 1.516 0 0 1 2.437 1.713L10.415 5.5h2.123c1.57 0 2.346 1.909 1.22 3.004l-7.34 7.142a1.249 1.249 0 0 1-.871.354h-.302a1.25 1.25 0 0 1-1.157-1.723L5.633 10.5H3.462c-1.57 0-2.346-1.909-1.22-3.004L9.503.429Zm1.047 1.074L3.286 8.571A.25.25 0 0 0 3.462 9H6.75a.75.75 0 0 1 .694 1.034l-1.713 4.188 6.982-6.793A.25.25 0 0 0 12.538 7H9.25a.75.75 0 0 1-.683-1.06l2.008-4.418.003-.006a.036.036 0 0 0-.004-.009l-.006-.006-.008-.001c-.003 0-.006.002-.009.004Z"
+    />
+  </svg>
+</div>
+
+<h1 class="pb-5 pt-2 text-center text-xl font-light">
+  Make sure you have enough disk space and available RAM to run them.<br />
+  7B requires about 4.5GB of free RAM, 13B requires about 12GB free, 30B requires
+  about 20GB free
+</h1>
+
+<div class="mx-auto w-fit">
+  <RefreshModal />
+</div>
+
+<div class="mt-30 mx-auto flex flex-col">
+  <div class="mx-auto w-full max-w-4xl">
+    <div class="divider" />
+    {#each data.models as model}
+      <div class="my-5 flex flex-col content-around">
+        <div
+          class="mx-auto flex flex-row items-center justify-center text-3xl font-semibold"
+        >
+          <span class="mr-2">{model.name}</span>
+          {#if model.available}
+            <svg
+              xmlns="http://www.w3.org/2000/svg"
+              viewBox="0 0 16 16"
+              width="24"
+              height="24"
+            >
+              <path
+                class="fill-info"
+                d="m9.585.52.929.68c.153.112.331.186.518.215l1.138.175a2.678 2.678 0 0 1 2.24 2.24l.174 1.139c.029.187.103.365.215.518l.68.928a2.677 2.677 0 0 1 0 3.17l-.68.928a1.174 1.174 0 0 0-.215.518l-.175 1.138a2.678 2.678 0 0 1-2.241 2.241l-1.138.175a1.17 1.17 0 0 0-.518.215l-.928.68a2.677 2.677 0 0 1-3.17 0l-.928-.68a1.174 1.174 0 0 0-.518-.215L3.83 14.41a2.678 2.678 0 0 1-2.24-2.24l-.175-1.138a1.17 1.17 0 0 0-.215-.518l-.68-.928a2.677 2.677 0 0 1 0-3.17l.68-.928c.112-.153.186-.331.215-.518l.175-1.14a2.678 2.678 0 0 1 2.24-2.24l1.139-.175c.187-.029.365-.103.518-.215l.928-.68a2.677 2.677 0 0 1 3.17 0ZM7.303 1.728l-.927.68a2.67 2.67 0 0 1-1.18.489l-1.137.174a1.179 1.179 0 0 0-.987.987l-.174 1.136a2.677 2.677 0 0 1-.489 1.18l-.68.928a1.18 1.18 0 0 0 0 1.394l.68.927c.256.348.424.753.489 1.18l.174 1.137c.078.509.478.909.987.987l1.136.174a2.67 2.67 0 0 1 1.18.489l.928.68c.414.305.979.305 1.394 0l.927-.68a2.67 2.67 0 0 1 1.18-.489l1.137-.174a1.18 1.18 0 0 0 .987-.987l.174-1.136a2.67 2.67 0 0 1 .489-1.18l.68-.928a1.176 1.176 0 0 0 0-1.394l-.68-.927a2.686 2.686 0 0 1-.489-1.18l-.174-1.137a1.179 1.179 0 0 0-.987-.987l-1.136-.174a2.677 2.677 0 0 1-1.18-.489l-.928-.68a1.176 1.176 0 0 0-1.394 0ZM11.28 6.78l-3.75 3.75a.75.75 0 0 1-1.06 0L4.72 8.78a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L7 8.94l3.22-3.22a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"
+              />
+            </svg>
+          {/if}
+        </div>
+        <p class="mx-auto pb-2 text-xl font-light">
+          ({model.size / 1e9}GB)
+        </p>
+        {#if model.progress}
+          <div class="mx-auto my-5 w-56 justify-center">
+            <p class="w-full text-center font-light">{model.progress}%</p>
+            <progress
+              class="progress-primary progress mx-auto h-5 w-56"
+              value={model.progress}
+              max="100"
+            />
+          </div>
+        {/if}
+        {#if model.available}
+          <button
+            on:click={() => deleteModel(model.name)}
+            class="btn-warning btn-outline btn mx-auto">Delete</button
+          >
+        {:else}
+          <button
+            on:click={() => onClick(model.name)}
+            class="btn-primary btn mx-auto"
+            class:model.available={() => "btn-outline"}
+            disabled={model.available ||
+              !!(model.progress && model.progress > 0)}
+          >
+            Download
+          </button>
+        {/if}
+      </div>
+      <div class="divider" />
+    {/each}
  </div>
 </div>
-
-<div class="models-grid grid">
-  {#each downloadedOrDownloadingModels as model}
-    <div class="model card card-bordered">
-      <div class="card-body">
-        <h2 class="card-title">{truncateString(model.name, 24)}</h2>
-        <div class="model-details">
-          {#if model.progress < 100}
-            <div class="progress-bar">
-              <progress value={model.progress} max="100"></progress> / {model.progress}%
-            </div>
-          {/if}
-          {#if model.progress >= 100}
-            <p>Size: {(model.size / 1e9).toFixed(2)} GB</p>
-            <button
-              on:click={() => handleModelAction(model.name, model.available)}
-              class="btn btn-error mt-2"
-            >
-              <Icon icon="mdi:trash" width="32" height="32" />
-            </button>
-          {:else}
-            <button
-              on:click={() =>
-                handleModelAction(
-                  model.name,
-                  model.available,
-                  model.progress > 0 && model.progress < 100,
-                )}
-              class="btn btn-error mt-2"
-            >
-              <Icon icon="mdi:cancel" width="32" height="32" />
-            </button>
-          {/if}
-        </div>
-      </div>
-    </div>
-  {/each}
-</div>
-
-<div class="models-grid grid">
-  {#each Object.entries(groupedModels) as [prefix, models]}
-    <div class="model-group card card-bordered">
-      <div class="card-body">
-        <h2 class="card-title">{truncateString(prefix, 24)}</h2>
-        <div class="model-details">
-          {#if models.length > 1}
-            <select
-              class="select-bordered select w-full"
-              bind:value={selectedVariant[prefix]}
-              on:change={(event) => handleVariantChange(prefix, event)}
-            >
-              {#each models as model}
-                <option value={model.name}
-                  >{truncateString(model.name, 32)}</option
-                >
-              {/each}
-            </select>
-          {/if}
-
-          {#if models.length === 1 || selectedVariant[prefix]}
-            {@const model = getModelDetails(models, prefix)}
-            {#if models.length === 1}
-              <h3>{truncateString(model.name, 24)}</h3>
-            {/if}
-            <p>Size: {(model.size / 1e9).toFixed(2)} GB</p>
-            <button
-              on:click={() => handleModelAction(model.name, model.available)}
-              class="btn btn-primary mt-2"
-            >
-              <Icon icon="ic:baseline-download" width="32" height="32" />
-            </button>
-          {/if}
-        </div>
-      </div>
-    </div>
-  {/each}
-</div>
--- a/web/src/routes/models/+page.ts
+++ b/web/src/routes/models/+page.ts
@ -4,7 +4,7 @@ interface ModelStatus {
  name: string;
  size: number;
  available: boolean;
-  progress: number;
+  progress?: number;
 }

 export const load: PageLoad = async ({ fetch }) => {
--- a/web/src/routes/signup/+page.svelte
+++ b/web/src/routes/signup/+page.svelte
@ -1,165 +0,0 @@
-<script lang="ts">
-  import { onMount } from "svelte";
-  import { goto } from "$app/navigation";
-  let username = "";
-  let secret = "";
-  let full_name = "";
-  let email = "";
-  let auth_type = 1;
-  let error = "";
-  let success = "";
-
-  async function handleSubmit(event: Event) {
-    event.preventDefault();
-    error = "";
-    success = "";
-    const response = await fetch("/api/user/create", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        username,
-        secret,
-        full_name,
-        email,
-        auth_type,
-      }),
-    });
-
-    if (response.ok) {
-      success = "User created successfully!";
-      await authAfterCreate(event);
-      goto("/account");
-    } else {
-      const data = await response.json();
-      error = data.detail || "An error occurred";
-    }
-  }
-
-  async function authAfterCreate(event: Event) {
-    event.preventDefault();
-    try {
-      const response = await fetch("/api/auth/token", {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/x-www-form-urlencoded",
-        },
-        body: new URLSearchParams({
-          username: username,
-          password: secret,
-        }),
-      });
-
-      if (response.ok) {
-        goto("/", { invalidateAll: true });
-      } else {
-        const errorData = await response.json();
-        error = errorData.detail || "Login failed";
-      }
-    } catch (err) {
-      error = err instanceof Error ? err.message : "An unknown error occurred";
-    }
-  }
-</script>
-
-<main>
-  <div class="card-group">
-    <div class="card">
-      <div class="card-title p-3 text-3xl justify-center font-bold">
-        Register a new user
-      </div>
-      <div class="card-body">
-        <form on:submit={handleSubmit}>
-          <div class="form-control">
-            <input
-              type="text"
-              placeholder="Username"
-              bind:value={username}
-              required
-            />
-          </div>
-          <div class="form-control">
-            <input
-              type="password"
-              placeholder="Password"
-              bind:value={secret}
-              required
-            />
-          </div>
-
-          {#if error}
-            <p class="error-message">{error}</p>
-          {/if}
-          {#if success}
-            <p class="success-message">{success}</p>
-          {/if}
-          <button class="btn" type="submit">Submit</button>
-        </form>
-      </div>
-    </div>
-    <div class="card">
-      <div class="card-title p-3 text-3xl justify-center font-bold">
-        Or link an account (comming soon)
-      </div>
-      <div class="card-body">
-        <button name="google-btn" class="btn" disabled={true}>
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            width="18"
-            height="18"
-            fill="currentColor"
-            viewBox="0 0 16 16"
-          >
-            <path
-              d="M15.545 6.558a9.4 9.4 0 0 1 .139 1.626c0 2.434-.87 4.492-2.384 5.885h.002C11.978 15.292 10.158 16 8 16A8 8 0 1 1 8 0a7.7 7.7 0 0 1 5.352 2.082l-2.284 2.284A4.35 4.35 0 0 0 8 3.166c-2.087 0-3.86 1.408-4.492 3.304a4.8 4.8 0 0 0 0 3.063h.003c.635 1.893 2.405 3.301 4.492 3.301 1.078 0 2.004-.276 2.722-.764h-.003a3.7 3.7 0 0 0 1.599-2.431H8v-3.08z"
-            />
-          </svg>
-          <span>Link Google Account</span>
-        </button>
-        <button name="reddit-btn" class="btn" disabled={true}>
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            width="18"
-            height="18"
-            fill="currentColor"
-            viewBox="0 0 16 16"
-          >
-            <path
-              d="M6.167 8a.83.83 0 0 0-.83.83c0 .459.372.84.83.831a.831.831 0 0 0 0-1.661m1.843 3.647c.315 0 1.403-.038 1.976-.611a.23.23 0 0 0 0-.306.213.213 0 0 0-.306 0c-.353.363-1.126.487-1.67.487-.545 0-1.308-.124-1.671-.487a.213.213 0 0 0-.306 0 .213.213 0 0 0 0 .306c.564.563 1.652.61 1.977.61zm.992-2.807c0 .458.373.83.831.83s.83-.381.83-.83a.831.831 0 0 0-1.66 0z"
-            />
-            <path
-              d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0m-3.828-1.165c-.315 0-.602.124-.812.325-.801-.573-1.9-.945-3.121-.993l.534-2.501 1.738.372a.83.83 0 1 0 .83-.869.83.83 0 0 0-.744.468l-1.938-.41a.2.2 0 0 0-.153.028.2.2 0 0 0-.086.134l-.592 2.788c-1.24.038-2.358.41-3.17.992-.21-.2-.496-.324-.81-.324a1.163 1.163 0 0 0-.478 2.224q-.03.17-.029.353c0 1.795 2.091 3.256 4.669 3.256s4.668-1.451 4.668-3.256c0-.114-.01-.238-.029-.353.401-.181.688-.592.688-1.069 0-.65-.525-1.165-1.165-1.165"
-            />
-          </svg>
-          <span>Link Reddit Account</span>
-        </button>
-      </div>
-    </div>
-    <div class="card">
-      <div class="card-title pt-3 text-3xl justify-center font-bold">
-        Already have an account?
-      </div>
-      <div class="card-body">
-        <button name="login-btn" class="btn" on:click={() => goto("/login")}>
-          <svg
-            xmlns="http://www.w3.org/2000/svg"
-            width="18"
-            height="18"
-            fill="currentColor"
-            class="mr-3"
-            viewBox="0 0 16 16"
-          >
-            <path
-              d="M12.5 16a3.5 3.5 0 1 0 0-7 3.5 3.5 0 0 0 0 7m1.679-4.493-1.335 2.226a.75.75 0 0 1-1.174.144l-.774-.773a.5.5 0 0 1 .708-.708l.547.548 1.17-1.951a.5.5 0 1 1 .858.514M11 5a3 3 0 1 1-6 0 3 3 0 0 1 6 0M8 7a2 2 0 1 0 0-4 2 2 0 0 0 0 4"
-            />
-            <path
-              d="M8.256 14a4.5 4.5 0 0 1-.229-1.004H3c.001-.246.154-.986.832-1.664C4.484 10.68 5.711 10 8 10q.39 0 .74.025c.226-.341.496-.65.804-.918Q8.844 9.002 8 9c-5 0-6 3-6 4s1 1 1 1z"
-            />
-          </svg>
-          <span>Login Instead</span>
-        </button>
-      </div>
-    </div>
-  </div>
-</main>
--- a/web/svelte.config.js
+++ b/web/svelte.config.js
@ -1,5 +1,5 @@
 import adapter from "@sveltejs/adapter-static";
-import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
+import { vitePreprocess } from "@sveltejs/kit/vite";

 /** @type {import('@sveltejs/kit').Config} */
 const config = {