Latest changes:

- Added nginx, api & web app on the same port now.
- Allowed CSR, through sveltekit, with a hook for redirecting server side api requests.
- Implemented menu to  pass model parameters on start page.
- Added a loading indicator while the model is computing
This commit is contained in:
Nathan Sarrazin 2023-03-21 07:11:00 +01:00
parent 4dfa4c92c0
commit a13eeadaed
17 changed files with 284 additions and 65 deletions

View File

@ -1,4 +1,3 @@
DATABASE_URL=mongodb://mongodb:27017/lms
secret_key=youshouldchangethis
API_PORT = 9124
WEB_PORT = 9123
PORT = 8008

View File

@ -2,12 +2,15 @@
![License](https://img.shields.io/github/license/nsarrazin/serge)
![Serge](https://i.imgur.com/JtWV72d.png)
| Home page | Chat |
| :----------------------------------: | :----------------------------------------------: |
| ![](https://i.imgur.com/CRXj9KD.png) | ![Serge - chat](https://i.imgur.com/bnqZyaC.png) |
A chat interface based on `llama.cpp` for running alpaca models.
* **SvelteKit** frontend
* **MongoDB** for storing chat history & parameters
* **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
- **SvelteKit** frontend
- **MongoDB** for storing chat history & parameters
- **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
## Getting started
@ -24,6 +27,8 @@ Then put your weights in the `models` folder. If you don't have them you can dow
They are currently the only two models supported. I'm working on expanding support to all the models supported by `llama.cpp`.
Note: `llama.cpp` [recently underwent some change](https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818) that requires model weights to be converted to a new format. Serge picks this up automatically on startup, and will convert your weights to the new format if needed. The old weights will be renamed to `*.bin.old` and the new weights will be named `*.bin`.
Then, you can start the project by running:
```
@ -31,9 +36,9 @@ cp .env.sample .env
docker compose up -d
```
The front-end lives at http://localhost:9123/.
The front-end lives at http://localhost:8008/ by default but you can change the port in the `.env` file.
To get an interactive API documentation go to http://localhost:9124/docs.
The interactive API docs is available at http://localhost:8008/api/docs.
## What's next
@ -44,5 +49,4 @@ To get an interactive API documentation go to http://localhost:9124/docs.
- [ ] LangChain integration with a custom LLM
- [ ] Support for other llama models, quantization, etc.
And a lot more!

View File

@ -8,12 +8,12 @@ services:
- ./api:/usr/src/app/
- /etc/localtime:/etc/localtime:ro
ports:
- ${API_PORT}:${API_PORT}
- 9124:9124
depends_on:
- mongodb
env_file:
- .env
command: uvicorn main:app --reload --host 0.0.0.0 --port ${API_PORT}
command: uvicorn main:app --reload --host 0.0.0.0 --port 9124 --root-path /api/
mongodb:
image: bitnami/mongodb:latest
@ -28,12 +28,18 @@ services:
context: ./web
dockerfile: ./Dockerfile.web
ports:
- ${WEB_PORT}:${WEB_PORT}
- 9123:9123
- 24678:24678
volumes:
- ./web:/usr/src/app/
- /usr/src/app/node_modules
command: npm run dev -- --host 0.0.0.0 --port ${WEB_PORT}
command: npm run dev -- --host 0.0.0.0 --port 9123
nginx:
build:
context: ./nginx
dockerfile: ./Dockerfile.nginx
ports:
- "${PORT}:80"
volumes:
data:

3
nginx/Dockerfile.nginx Normal file
View File

@ -0,0 +1,3 @@
FROM nginx:alpine as deploy
COPY nginx.conf /etc/nginx/conf.d/default.conf

32
nginx/nginx.conf Normal file
View File

@ -0,0 +1,32 @@
server {
listen 80; # Adjust the port number if needed
server_name localhost;
# Proxy requests for the root URL to Service A
location / {
proxy_pass http://web:9123;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300s;
proxy_connect_timeout 300s;
proxy_send_timeout 300s;
}
# Proxy requests for /api to Service B
location /api/ {
proxy_pass http://api:9124;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
rewrite ^/api/(.*) /$1 break;
proxy_read_timeout 300s;
proxy_connect_timeout 300s;
proxy_send_timeout 300s;
}
}

6
package-lock.json generated Normal file
View File

@ -0,0 +1,6 @@
{
"name": "serge",
"lockfileVersion": 3,
"requires": true,
"packages": {}
}

View File

@ -1,5 +1,5 @@
<!DOCTYPE html>
<html lang="en" data-theme="dark">
<html lang="en" data-theme="dark" style="height: 100%; margin: 0;">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%sveltekit.assets%/favicon.png" />
@ -7,7 +7,7 @@
<title>Serge - Powered by LLaMa</title>
%sveltekit.head%
</head>
<body data-sveltekit-preload-data="hover">
<body data-sveltekit-preload-data="hover" style="height: 100%;">
<div style="display: contents">%sveltekit.body%</div>
</body>
</html>

10
web/src/hooks.server.ts Normal file
View File

@ -0,0 +1,10 @@
import type { HandleFetch } from "@sveltejs/kit";
export const handleFetch = (({ request, fetch }) => {
request = new Request(
request.url.replace("http://localhost/api/", "http://api:9124/"),
request
);
return fetch(request);
}) satisfies HandleFetch;

View File

@ -37,7 +37,7 @@
<aside
id="default-sidebar"
class="fixed top-0 left-0 z-40 w-96 h-screen transition-transform -translate-x-full sm:translate-x-0"
class="fixed top-0 left-0 z-40 w-80 h-screen transition-transform -translate-x-full sm:translate-x-0"
aria-label="Sidebar"
>
<div class="h-full px-3 py-4 overflow-y-auto bg-gray-600">
@ -60,6 +60,6 @@
</div>
</aside>
<div class="p-4 sm:ml-96">
<div class="p-4 sm:ml-80 h-full">
<slot />
</div>

View File

@ -7,11 +7,9 @@ type t = {
};
export const load: LayoutLoad = async ({ fetch }) => {
const r = await fetch("http://api:9124/chats");
const r = await fetch("/api/chats");
const chats = (await r.json()) as t[];
return {
chats: chats,
};
};
export const csr = false;

View File

@ -3,14 +3,14 @@ import type { Actions } from "./$types";
export const actions = {
default: async ({ fetch, request }) => {
const formData = await request.formData();
const model = formData.get("model");
let data = new URLSearchParams();
if (model) {
data.append("model", model.toString());
}
const convertedFormEntries = Array.from(formData, ([key, value]) => [
key,
typeof value === "string" ? value : value.name,
]);
const searchParams = new URLSearchParams(convertedFormEntries);
const response = await fetch("http://api:9124/chat?" + data.toString(), {
const response = await fetch("/api/chat?" + searchParams.toString(), {
method: "POST",
});

View File

@ -2,18 +2,155 @@
import type { PageData } from "./$types";
export let data: PageData;
const modelAvailable = data.models.length > 0;
let temp = 0.1;
let top_k: number = 50;
let top_p: number = 0.95;
let max_length: number = 256;
let repeat_last_n: number = 64;
let repeat_penalty: number = 1.3;
let preprompt: string =
"Below is an instruction that describes a task. Write a response that appropriately completes the request. The response must be accurate, concise and evidence-based whenever possible. A complete answer is always ended by [end of text].";
</script>
<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge !</h1>
<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge!</h1>
<h1 class="text-xl text-center pt-2 pb-5">
An easy way to chat with Alpaca & other LLaMa based models.
</h1>
<form method="POST" class="p-10">
<label for="model" class="label">
<span class="label-text">Model</span>
</label>
<select name="model" class="select select-bordered w-full max-w-xs">
{#each data.models as model}
<option value={model}>{model}</option>
{/each}
</select>
<button class="btn ml-5">Start a new chat</button>
<form method="POST" class="p-5">
<div class="w-full pb-20">
<div class="mx-auto w-fit pt-5">
<button class=" mx-auto btn btn-primary ml-5" disabled={!modelAvailable}
>Start a new chat</button
>
</div>
</div>
<div
tabindex="-1"
class="collapse collapse-arrow border-2 rounded-box border-gray-600 bg-base-100"
>
<input type="checkbox" />
<div class="collapse-title text-xl font-medium">Model settings</div>
<div class="collapse-content">
<div class="grid grid-cols-3 gap-4 p-3 ">
<div
class="tooltip col-span-2"
data-tip="The higher the temperature, the more random the model output."
>
<label for="temp" class="label-text">Temperature - [{temp}]</label>
<input
name="temp"
type="range"
bind:value={temp}
min="0.05"
max="2"
step="0.05"
class="range range-sm mt-auto"
/>
</div>
<div
class="flex flex-col tooltip"
data-tip="The number of samples to consider for top_k sampling. "
>
<label for="top_k" class="label-text pb-1">top_k</label>
<input
class="input input-bordered w-full max-w-xs"
name="top_k"
type="number"
bind:value={top_k}
min="0"
max="100"
/>
</div>
<div class="col-span-2">
<label for="max_length" class="label-text"
>Maximum generated text length in tokens - [{max_length}]</label
>
<input
name="max_length"
type="range"
bind:value={max_length}
min="16"
max="512"
step="16"
class="range range-sm mt-auto"
/>
</div>
<div
class="flex flex-col tooltip"
data-tip="The cumulative probability of the tokens to keep for nucleus sampling. "
>
<label for="top_p" class="label-text pb-1">top_p</label>
<input
class="input input-bordered w-full max-w-xs"
name="top_p"
type="number"
bind:value={top_p}
min="0"
max="1"
step="0.025"
/>
</div>
<div class="flex flex-col">
<label for="model" class="label-text pb-1"> Model choice </label>
<select name="model" class="select select-bordered w-full max-w-xs">
{#each data.models as model}
<option value={model}>{model}</option>
{/each}
</select>
</div>
<div
class="flex flex-col tooltip"
data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
>
<label for="repeat_last_n" class="label-text pb-1"
>repeat_last_n</label
>
<input
class="input input-bordered w-full max-w-xs"
name="repeat_last_n"
type="number"
bind:value={repeat_last_n}
min="0"
max="100"
/>
</div>
<div
class="flex flex-col tooltip"
data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens. "
>
<label for="repeat_penalty" class="label-text pb-1"
>repeat_penalty</label
>
<input
class="input input-bordered w-full max-w-xs"
name="repeat_penalty"
type="number"
bind:value={repeat_penalty}
min="0"
max="2"
step="0.05"
/>
</div>
<div class="col-span-3 flex flex-col">
<label for="preprompt" class="label-text pb-1"
>Pre-Prompt for initializing a conversation.</label
>
<textarea
class="textarea h-24 textarea-bordered w-full"
name="preprompt"
bind:value={preprompt}
placeholder="Enter your prompt here"
/>
</div>
</div>
</div>
</div>
</form>

View File

@ -1,11 +1,9 @@
import type { PageLoad } from "./$types";
export const load: PageLoad = async ({ fetch }) => {
const r = await fetch("http://api:9124/models");
const r = await fetch("api/models");
const models = (await r.json()) as string[];
return {
models,
};
};
export const csr = false;

View File

@ -10,14 +10,15 @@ export const actions = {
data.append("prompt", question.toString());
const response = await fetch(
"http://api:9124/chat/" + params.id + "/question?" + data.toString(),
"/api/chat/" + params.id + "/question?" + data.toString(),
{
method: "POST",
}
);
if (response.ok) {
return { success: true };
const question = await response.json();
return question;
} else {
console.log(response.statusText);
}

View File

@ -1,41 +1,65 @@
<script lang="ts">
import { navigating } from "$app/stores";
import type { PageData } from "./$types";
import { enhance } from "$app/forms";
export let data: PageData;
const questions = data.props.questions ?? [];
const startDate = new Date(data.props.created);
$: isLoading = false;
$: questions = data.props.questions ?? [];
$: startDate = new Date(data.props.created);
</script>
<div class="max-w-4xl mx-auto">
<div class="max-w-4xl mx-auto h-full max-h-screen relative">
<h1 class="text-4xl font-bold">Chat with {data.props.parameters.model}</h1>
<h4 class="text-xl font-semibold mb-10">
Started on {startDate.toLocaleString("en-US")}
</h4>
{#each questions as question}
<div class="chat chat-end">
<div class="chat-bubble chat-bubble-secondary whitespace-pre-line">
{question.question}
</div>
</div>
<div class="chat chat-start">
<div class="chat-bubble chat-bubble-primary whitespace-pre-line">
{question.answer}
</div>
</div>
{/each}
<form method="POST" class="form-control items-center mt-3">
<div class="overflow-y-auto h-[calc(100vh-10rem)] px-10">
<div class="h-max pb-32">
{#each questions as question}
<div class="chat chat-end my-2">
<div
class="chat-bubble chat-bubble-secondary whitespace-pre-line text-lg"
>
{question.question}
</div>
</div>
<div class="chat chat-start my-2">
<div
class="chat-bubble chat-bubble-primary whitespace-pre-line text-lg"
>
{question.answer}
</div>
</div>
{/each}
</div>
</div>
<form
method="POST"
class="form-control items-center absolute bottom-0 w-full px-5 left-0 h-32 flex flex-row bg-base-100"
use:enhance={() => {
isLoading = true;
return async ({ update }) => {
isLoading = false;
update();
};
}}
>
<textarea
name="question"
class="textarea textarea-bordered h-24 w-full"
class="textarea textarea-bordered h-24 w-full text-lg"
placeholder="Why is the sky blue?"
disabled={isLoading}
/>
<button
type="submit"
class={"btn btn-primary max-w-lg m-3" + ($navigating ? "loading" : "")}
disabled={isLoading}
class={"btn btn-primary max-w-lg m-3 h-24 w-24 text-lg"}
class:loading={isLoading}
>
Send
</button>

View File

@ -25,11 +25,9 @@ type t = {
};
export const load: PageLoad = async ({ fetch, params }) => {
const r = await fetch("http://api:9124/chat/" + params.id);
const r = await fetch("/api/chat/" + params.id);
const data = (await r.json()) as t;
return {
props: data,
};
};
export const csr = false;

View File

@ -1,4 +1,4 @@
import adapter from '@sveltejs/adapter-auto';
import adapter from '@sveltejs/adapter-node';
import { vitePreprocess } from '@sveltejs/kit/vite';
/** @type {import('@sveltejs/kit').Config} */
@ -12,6 +12,9 @@ const config = {
// If your environment is not supported or you settled on a specific environment, switch out the adapter.
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
adapter: adapter(),
csrf: {
checkOrigin: false,
}
},
preprocess: vitePreprocess()
};