From a13eeadaed2befad02656dfb8710a12eb079c041 Mon Sep 17 00:00:00 2001 From: Nathan Sarrazin Date: Tue, 21 Mar 2023 07:11:00 +0100 Subject: [PATCH] Latest changes: - Added nginx, api & web app on the same port now. - Allowed CSR, through sveltekit, with a hook for redirecting server side api requests. - Implemented menu to pass model parameters on start page. - Added a loading indicator while the model is computing --- .env.sample | 3 +- README.md | 18 ++- docker-compose.yml | 14 +- nginx/Dockerfile.nginx | 3 + nginx/nginx.conf | 32 +++++ package-lock.json | 6 + web/src/app.html | 4 +- web/src/hooks.server.ts | 10 ++ web/src/routes/+layout.svelte | 4 +- web/src/routes/+layout.ts | 4 +- web/src/routes/+page.server.ts | 12 +- web/src/routes/+page.svelte | 159 +++++++++++++++++++++-- web/src/routes/+page.ts | 4 +- web/src/routes/chat/[id]/+page.server.ts | 5 +- web/src/routes/chat/[id]/+page.svelte | 62 ++++++--- web/src/routes/chat/[id]/+page.ts | 4 +- web/svelte.config.js | 5 +- 17 files changed, 284 insertions(+), 65 deletions(-) create mode 100644 nginx/Dockerfile.nginx create mode 100644 nginx/nginx.conf create mode 100644 package-lock.json create mode 100644 web/src/hooks.server.ts diff --git a/.env.sample b/.env.sample index eaf8ec9..64365b0 100644 --- a/.env.sample +++ b/.env.sample @@ -1,4 +1,3 @@ DATABASE_URL=mongodb://mongodb:27017/lms secret_key=youshouldchangethis -API_PORT = 9124 -WEB_PORT = 9123 \ No newline at end of file +PORT = 8008 \ No newline at end of file diff --git a/README.md b/README.md index 98cc55d..9ade5a7 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,15 @@ ![License](https://img.shields.io/github/license/nsarrazin/serge) -![Serge](https://i.imgur.com/JtWV72d.png) +| Home page | Chat | +| :----------------------------------: | :----------------------------------------------: | +| ![](https://i.imgur.com/CRXj9KD.png) | ![Serge - chat](https://i.imgur.com/bnqZyaC.png) | + A chat interface based on `llama.cpp` for running alpaca models. -* **SvelteKit** frontend -* **MongoDB** for storing chat history & parameters -* **FastAPI + beanie** for the API, wrapping calls to `llama.cpp` +- **SvelteKit** frontend +- **MongoDB** for storing chat history & parameters +- **FastAPI + beanie** for the API, wrapping calls to `llama.cpp` ## Getting started @@ -24,6 +27,8 @@ Then put your weights in the `models` folder. If you don't have them you can dow They are currently the only two models supported. I'm working on expanding support to all the models supported by `llama.cpp`. +Note: `llama.cpp` [recently underwent some change](https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818) that requires model weights to be converted to a new format. Serge picks this up automatically on startup, and will convert your weights to the new format if needed. The old weights will be renamed to `*.bin.old` and the new weights will be named `*.bin`. + Then, you can start the project by running: ``` @@ -31,9 +36,9 @@ cp .env.sample .env docker compose up -d ``` -The front-end lives at http://localhost:9123/. +The front-end lives at http://localhost:8008/ by default but you can change the port in the `.env` file. -To get an interactive API documentation go to http://localhost:9124/docs. +The interactive API docs is available at http://localhost:8008/api/docs. ## What's next @@ -44,5 +49,4 @@ To get an interactive API documentation go to http://localhost:9124/docs. - [ ] LangChain integration with a custom LLM - [ ] Support for other llama models, quantization, etc. - And a lot more! diff --git a/docker-compose.yml b/docker-compose.yml index be9ca99..b65b320 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,12 +8,12 @@ services: - ./api:/usr/src/app/ - /etc/localtime:/etc/localtime:ro ports: - - ${API_PORT}:${API_PORT} + - 9124:9124 depends_on: - mongodb env_file: - .env - command: uvicorn main:app --reload --host 0.0.0.0 --port ${API_PORT} + command: uvicorn main:app --reload --host 0.0.0.0 --port 9124 --root-path /api/ mongodb: image: bitnami/mongodb:latest @@ -28,12 +28,18 @@ services: context: ./web dockerfile: ./Dockerfile.web ports: - - ${WEB_PORT}:${WEB_PORT} + - 9123:9123 - 24678:24678 volumes: - ./web:/usr/src/app/ - /usr/src/app/node_modules - command: npm run dev -- --host 0.0.0.0 --port ${WEB_PORT} + command: npm run dev -- --host 0.0.0.0 --port 9123 + nginx: + build: + context: ./nginx + dockerfile: ./Dockerfile.nginx + ports: + - "${PORT}:80" volumes: data: diff --git a/nginx/Dockerfile.nginx b/nginx/Dockerfile.nginx new file mode 100644 index 0000000..233b925 --- /dev/null +++ b/nginx/Dockerfile.nginx @@ -0,0 +1,3 @@ +FROM nginx:alpine as deploy + +COPY nginx.conf /etc/nginx/conf.d/default.conf \ No newline at end of file diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..8f68aac --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,32 @@ +server { + listen 80; # Adjust the port number if needed + server_name localhost; + # Proxy requests for the root URL to Service A + location / { + proxy_pass http://web:9123; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_read_timeout 300s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + + } + + # Proxy requests for /api to Service B + location /api/ { + proxy_pass http://api:9124; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + rewrite ^/api/(.*) /$1 break; + + proxy_read_timeout 300s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + + } +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..3d0930e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "serge", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/web/src/app.html b/web/src/app.html index 04a48a4..699f824 100644 --- a/web/src/app.html +++ b/web/src/app.html @@ -1,5 +1,5 @@ - + @@ -7,7 +7,7 @@ Serge - Powered by LLaMa %sveltekit.head% - +
%sveltekit.body%
diff --git a/web/src/hooks.server.ts b/web/src/hooks.server.ts new file mode 100644 index 0000000..50a921d --- /dev/null +++ b/web/src/hooks.server.ts @@ -0,0 +1,10 @@ +import type { HandleFetch } from "@sveltejs/kit"; + +export const handleFetch = (({ request, fetch }) => { + request = new Request( + request.url.replace("http://localhost/api/", "http://api:9124/"), + request + ); + + return fetch(request); +}) satisfies HandleFetch; diff --git a/web/src/routes/+layout.svelte b/web/src/routes/+layout.svelte index 7271ade..88990c7 100644 --- a/web/src/routes/+layout.svelte +++ b/web/src/routes/+layout.svelte @@ -37,7 +37,7 @@ -
+
diff --git a/web/src/routes/+layout.ts b/web/src/routes/+layout.ts index 82a1d6a..16c6a45 100644 --- a/web/src/routes/+layout.ts +++ b/web/src/routes/+layout.ts @@ -7,11 +7,9 @@ type t = { }; export const load: LayoutLoad = async ({ fetch }) => { - const r = await fetch("http://api:9124/chats"); + const r = await fetch("/api/chats"); const chats = (await r.json()) as t[]; return { chats: chats, }; }; - -export const csr = false; diff --git a/web/src/routes/+page.server.ts b/web/src/routes/+page.server.ts index c24f423..8aae6f7 100644 --- a/web/src/routes/+page.server.ts +++ b/web/src/routes/+page.server.ts @@ -3,14 +3,14 @@ import type { Actions } from "./$types"; export const actions = { default: async ({ fetch, request }) => { const formData = await request.formData(); - const model = formData.get("model"); - let data = new URLSearchParams(); - if (model) { - data.append("model", model.toString()); - } + const convertedFormEntries = Array.from(formData, ([key, value]) => [ + key, + typeof value === "string" ? value : value.name, + ]); + const searchParams = new URLSearchParams(convertedFormEntries); - const response = await fetch("http://api:9124/chat?" + data.toString(), { + const response = await fetch("/api/chat?" + searchParams.toString(), { method: "POST", }); diff --git a/web/src/routes/+page.svelte b/web/src/routes/+page.svelte index 2db7751..334227a 100644 --- a/web/src/routes/+page.svelte +++ b/web/src/routes/+page.svelte @@ -2,18 +2,155 @@ import type { PageData } from "./$types"; export let data: PageData; + + const modelAvailable = data.models.length > 0; + + let temp = 0.1; + let top_k: number = 50; + let top_p: number = 0.95; + + let max_length: number = 256; + let repeat_last_n: number = 64; + let repeat_penalty: number = 1.3; + + let preprompt: string = + "Below is an instruction that describes a task. Write a response that appropriately completes the request. The response must be accurate, concise and evidence-based whenever possible. A complete answer is always ended by [end of text]."; -

Say Hi to Serge !

+

Say Hi to Serge!

+

+ An easy way to chat with Alpaca & other LLaMa based models. +

-
- - - + +
+
+ +
+
+ +
+ +
Model settings
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+ + +
+
+ +