Latest changes:
- Added nginx, api & web app on the same port now. - Allowed CSR, through sveltekit, with a hook for redirecting server side api requests. - Implemented menu to pass model parameters on start page. - Added a loading indicator while the model is computing
This commit is contained in:
parent
4dfa4c92c0
commit
a13eeadaed
@ -1,4 +1,3 @@
|
||||
DATABASE_URL=mongodb://mongodb:27017/lms
|
||||
secret_key=youshouldchangethis
|
||||
API_PORT = 9124
|
||||
WEB_PORT = 9123
|
||||
PORT = 8008
|
||||
18
README.md
18
README.md
@ -2,12 +2,15 @@
|
||||
|
||||

|
||||
|
||||

|
||||
| Home page | Chat |
|
||||
| :----------------------------------: | :----------------------------------------------: |
|
||||
|  |  |
|
||||
|
||||
A chat interface based on `llama.cpp` for running alpaca models.
|
||||
|
||||
* **SvelteKit** frontend
|
||||
* **MongoDB** for storing chat history & parameters
|
||||
* **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
|
||||
- **SvelteKit** frontend
|
||||
- **MongoDB** for storing chat history & parameters
|
||||
- **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
|
||||
|
||||
## Getting started
|
||||
|
||||
@ -24,6 +27,8 @@ Then put your weights in the `models` folder. If you don't have them you can dow
|
||||
|
||||
They are currently the only two models supported. I'm working on expanding support to all the models supported by `llama.cpp`.
|
||||
|
||||
Note: `llama.cpp` [recently underwent some change](https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818) that requires model weights to be converted to a new format. Serge picks this up automatically on startup, and will convert your weights to the new format if needed. The old weights will be renamed to `*.bin.old` and the new weights will be named `*.bin`.
|
||||
|
||||
Then, you can start the project by running:
|
||||
|
||||
```
|
||||
@ -31,9 +36,9 @@ cp .env.sample .env
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
The front-end lives at http://localhost:9123/.
|
||||
The front-end lives at http://localhost:8008/ by default but you can change the port in the `.env` file.
|
||||
|
||||
To get an interactive API documentation go to http://localhost:9124/docs.
|
||||
The interactive API docs is available at http://localhost:8008/api/docs.
|
||||
|
||||
## What's next
|
||||
|
||||
@ -44,5 +49,4 @@ To get an interactive API documentation go to http://localhost:9124/docs.
|
||||
- [ ] LangChain integration with a custom LLM
|
||||
- [ ] Support for other llama models, quantization, etc.
|
||||
|
||||
|
||||
And a lot more!
|
||||
|
||||
@ -8,12 +8,12 @@ services:
|
||||
- ./api:/usr/src/app/
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
ports:
|
||||
- ${API_PORT}:${API_PORT}
|
||||
- 9124:9124
|
||||
depends_on:
|
||||
- mongodb
|
||||
env_file:
|
||||
- .env
|
||||
command: uvicorn main:app --reload --host 0.0.0.0 --port ${API_PORT}
|
||||
command: uvicorn main:app --reload --host 0.0.0.0 --port 9124 --root-path /api/
|
||||
|
||||
mongodb:
|
||||
image: bitnami/mongodb:latest
|
||||
@ -28,12 +28,18 @@ services:
|
||||
context: ./web
|
||||
dockerfile: ./Dockerfile.web
|
||||
ports:
|
||||
- ${WEB_PORT}:${WEB_PORT}
|
||||
- 9123:9123
|
||||
- 24678:24678
|
||||
volumes:
|
||||
- ./web:/usr/src/app/
|
||||
- /usr/src/app/node_modules
|
||||
command: npm run dev -- --host 0.0.0.0 --port ${WEB_PORT}
|
||||
command: npm run dev -- --host 0.0.0.0 --port 9123
|
||||
nginx:
|
||||
build:
|
||||
context: ./nginx
|
||||
dockerfile: ./Dockerfile.nginx
|
||||
ports:
|
||||
- "${PORT}:80"
|
||||
|
||||
volumes:
|
||||
data:
|
||||
|
||||
3
nginx/Dockerfile.nginx
Normal file
3
nginx/Dockerfile.nginx
Normal file
@ -0,0 +1,3 @@
|
||||
FROM nginx:alpine as deploy
|
||||
|
||||
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
||||
32
nginx/nginx.conf
Normal file
32
nginx/nginx.conf
Normal file
@ -0,0 +1,32 @@
|
||||
server {
|
||||
listen 80; # Adjust the port number if needed
|
||||
server_name localhost;
|
||||
# Proxy requests for the root URL to Service A
|
||||
location / {
|
||||
proxy_pass http://web:9123;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
proxy_read_timeout 300s;
|
||||
proxy_connect_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
|
||||
}
|
||||
|
||||
# Proxy requests for /api to Service B
|
||||
location /api/ {
|
||||
proxy_pass http://api:9124;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
rewrite ^/api/(.*) /$1 break;
|
||||
|
||||
proxy_read_timeout 300s;
|
||||
proxy_connect_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
|
||||
}
|
||||
}
|
||||
6
package-lock.json
generated
Normal file
6
package-lock.json
generated
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "serge",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {}
|
||||
}
|
||||
@ -1,5 +1,5 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" data-theme="dark">
|
||||
<html lang="en" data-theme="dark" style="height: 100%; margin: 0;">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<link rel="icon" href="%sveltekit.assets%/favicon.png" />
|
||||
@ -7,7 +7,7 @@
|
||||
<title>Serge - Powered by LLaMa</title>
|
||||
%sveltekit.head%
|
||||
</head>
|
||||
<body data-sveltekit-preload-data="hover">
|
||||
<body data-sveltekit-preload-data="hover" style="height: 100%;">
|
||||
<div style="display: contents">%sveltekit.body%</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
10
web/src/hooks.server.ts
Normal file
10
web/src/hooks.server.ts
Normal file
@ -0,0 +1,10 @@
|
||||
import type { HandleFetch } from "@sveltejs/kit";
|
||||
|
||||
export const handleFetch = (({ request, fetch }) => {
|
||||
request = new Request(
|
||||
request.url.replace("http://localhost/api/", "http://api:9124/"),
|
||||
request
|
||||
);
|
||||
|
||||
return fetch(request);
|
||||
}) satisfies HandleFetch;
|
||||
@ -37,7 +37,7 @@
|
||||
|
||||
<aside
|
||||
id="default-sidebar"
|
||||
class="fixed top-0 left-0 z-40 w-96 h-screen transition-transform -translate-x-full sm:translate-x-0"
|
||||
class="fixed top-0 left-0 z-40 w-80 h-screen transition-transform -translate-x-full sm:translate-x-0"
|
||||
aria-label="Sidebar"
|
||||
>
|
||||
<div class="h-full px-3 py-4 overflow-y-auto bg-gray-600">
|
||||
@ -60,6 +60,6 @@
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<div class="p-4 sm:ml-96">
|
||||
<div class="p-4 sm:ml-80 h-full">
|
||||
<slot />
|
||||
</div>
|
||||
|
||||
@ -7,11 +7,9 @@ type t = {
|
||||
};
|
||||
|
||||
export const load: LayoutLoad = async ({ fetch }) => {
|
||||
const r = await fetch("http://api:9124/chats");
|
||||
const r = await fetch("/api/chats");
|
||||
const chats = (await r.json()) as t[];
|
||||
return {
|
||||
chats: chats,
|
||||
};
|
||||
};
|
||||
|
||||
export const csr = false;
|
||||
|
||||
@ -3,14 +3,14 @@ import type { Actions } from "./$types";
|
||||
export const actions = {
|
||||
default: async ({ fetch, request }) => {
|
||||
const formData = await request.formData();
|
||||
const model = formData.get("model");
|
||||
|
||||
let data = new URLSearchParams();
|
||||
if (model) {
|
||||
data.append("model", model.toString());
|
||||
}
|
||||
const convertedFormEntries = Array.from(formData, ([key, value]) => [
|
||||
key,
|
||||
typeof value === "string" ? value : value.name,
|
||||
]);
|
||||
const searchParams = new URLSearchParams(convertedFormEntries);
|
||||
|
||||
const response = await fetch("http://api:9124/chat?" + data.toString(), {
|
||||
const response = await fetch("/api/chat?" + searchParams.toString(), {
|
||||
method: "POST",
|
||||
});
|
||||
|
||||
|
||||
@ -2,18 +2,155 @@
|
||||
import type { PageData } from "./$types";
|
||||
|
||||
export let data: PageData;
|
||||
|
||||
const modelAvailable = data.models.length > 0;
|
||||
|
||||
let temp = 0.1;
|
||||
let top_k: number = 50;
|
||||
let top_p: number = 0.95;
|
||||
|
||||
let max_length: number = 256;
|
||||
let repeat_last_n: number = 64;
|
||||
let repeat_penalty: number = 1.3;
|
||||
|
||||
let preprompt: string =
|
||||
"Below is an instruction that describes a task. Write a response that appropriately completes the request. The response must be accurate, concise and evidence-based whenever possible. A complete answer is always ended by [end of text].";
|
||||
</script>
|
||||
|
||||
<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge !</h1>
|
||||
<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge!</h1>
|
||||
<h1 class="text-xl text-center pt-2 pb-5">
|
||||
An easy way to chat with Alpaca & other LLaMa based models.
|
||||
</h1>
|
||||
|
||||
<form method="POST" class="p-10">
|
||||
<label for="model" class="label">
|
||||
<span class="label-text">Model</span>
|
||||
</label>
|
||||
<select name="model" class="select select-bordered w-full max-w-xs">
|
||||
{#each data.models as model}
|
||||
<option value={model}>{model}</option>
|
||||
{/each}
|
||||
</select>
|
||||
<button class="btn ml-5">Start a new chat</button>
|
||||
<form method="POST" class="p-5">
|
||||
<div class="w-full pb-20">
|
||||
<div class="mx-auto w-fit pt-5">
|
||||
<button class=" mx-auto btn btn-primary ml-5" disabled={!modelAvailable}
|
||||
>Start a new chat</button
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div
|
||||
tabindex="-1"
|
||||
class="collapse collapse-arrow border-2 rounded-box border-gray-600 bg-base-100"
|
||||
>
|
||||
<input type="checkbox" />
|
||||
<div class="collapse-title text-xl font-medium">Model settings</div>
|
||||
<div class="collapse-content">
|
||||
<div class="grid grid-cols-3 gap-4 p-3 ">
|
||||
<div
|
||||
class="tooltip col-span-2"
|
||||
data-tip="The higher the temperature, the more random the model output."
|
||||
>
|
||||
<label for="temp" class="label-text">Temperature - [{temp}]</label>
|
||||
<input
|
||||
name="temp"
|
||||
type="range"
|
||||
bind:value={temp}
|
||||
min="0.05"
|
||||
max="2"
|
||||
step="0.05"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="flex flex-col tooltip"
|
||||
data-tip="The number of samples to consider for top_k sampling. "
|
||||
>
|
||||
<label for="top_k" class="label-text pb-1">top_k</label>
|
||||
<input
|
||||
class="input input-bordered w-full max-w-xs"
|
||||
name="top_k"
|
||||
type="number"
|
||||
bind:value={top_k}
|
||||
min="0"
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
<div class="col-span-2">
|
||||
<label for="max_length" class="label-text"
|
||||
>Maximum generated text length in tokens - [{max_length}]</label
|
||||
>
|
||||
<input
|
||||
name="max_length"
|
||||
type="range"
|
||||
bind:value={max_length}
|
||||
min="16"
|
||||
max="512"
|
||||
step="16"
|
||||
class="range range-sm mt-auto"
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="flex flex-col tooltip"
|
||||
data-tip="The cumulative probability of the tokens to keep for nucleus sampling. "
|
||||
>
|
||||
<label for="top_p" class="label-text pb-1">top_p</label>
|
||||
<input
|
||||
class="input input-bordered w-full max-w-xs"
|
||||
name="top_p"
|
||||
type="number"
|
||||
bind:value={top_p}
|
||||
min="0"
|
||||
max="1"
|
||||
step="0.025"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex flex-col">
|
||||
<label for="model" class="label-text pb-1"> Model choice </label>
|
||||
<select name="model" class="select select-bordered w-full max-w-xs">
|
||||
{#each data.models as model}
|
||||
<option value={model}>{model}</option>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
<div
|
||||
class="flex flex-col tooltip"
|
||||
data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
|
||||
>
|
||||
<label for="repeat_last_n" class="label-text pb-1"
|
||||
>repeat_last_n</label
|
||||
>
|
||||
<input
|
||||
class="input input-bordered w-full max-w-xs"
|
||||
name="repeat_last_n"
|
||||
type="number"
|
||||
bind:value={repeat_last_n}
|
||||
min="0"
|
||||
max="100"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div
|
||||
class="flex flex-col tooltip"
|
||||
data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens. "
|
||||
>
|
||||
<label for="repeat_penalty" class="label-text pb-1"
|
||||
>repeat_penalty</label
|
||||
>
|
||||
<input
|
||||
class="input input-bordered w-full max-w-xs"
|
||||
name="repeat_penalty"
|
||||
type="number"
|
||||
bind:value={repeat_penalty}
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.05"
|
||||
/>
|
||||
</div>
|
||||
<div class="col-span-3 flex flex-col">
|
||||
<label for="preprompt" class="label-text pb-1"
|
||||
>Pre-Prompt for initializing a conversation.</label
|
||||
>
|
||||
<textarea
|
||||
class="textarea h-24 textarea-bordered w-full"
|
||||
name="preprompt"
|
||||
bind:value={preprompt}
|
||||
placeholder="Enter your prompt here"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@ -1,11 +1,9 @@
|
||||
import type { PageLoad } from "./$types";
|
||||
|
||||
export const load: PageLoad = async ({ fetch }) => {
|
||||
const r = await fetch("http://api:9124/models");
|
||||
const r = await fetch("api/models");
|
||||
const models = (await r.json()) as string[];
|
||||
return {
|
||||
models,
|
||||
};
|
||||
};
|
||||
|
||||
export const csr = false;
|
||||
|
||||
@ -10,14 +10,15 @@ export const actions = {
|
||||
data.append("prompt", question.toString());
|
||||
|
||||
const response = await fetch(
|
||||
"http://api:9124/chat/" + params.id + "/question?" + data.toString(),
|
||||
"/api/chat/" + params.id + "/question?" + data.toString(),
|
||||
{
|
||||
method: "POST",
|
||||
}
|
||||
);
|
||||
|
||||
if (response.ok) {
|
||||
return { success: true };
|
||||
const question = await response.json();
|
||||
return question;
|
||||
} else {
|
||||
console.log(response.statusText);
|
||||
}
|
||||
|
||||
@ -1,41 +1,65 @@
|
||||
<script lang="ts">
|
||||
import { navigating } from "$app/stores";
|
||||
import type { PageData } from "./$types";
|
||||
import { enhance } from "$app/forms";
|
||||
|
||||
export let data: PageData;
|
||||
|
||||
const questions = data.props.questions ?? [];
|
||||
|
||||
const startDate = new Date(data.props.created);
|
||||
$: isLoading = false;
|
||||
$: questions = data.props.questions ?? [];
|
||||
$: startDate = new Date(data.props.created);
|
||||
</script>
|
||||
|
||||
<div class="max-w-4xl mx-auto">
|
||||
<div class="max-w-4xl mx-auto h-full max-h-screen relative">
|
||||
<h1 class="text-4xl font-bold">Chat with {data.props.parameters.model}</h1>
|
||||
<h4 class="text-xl font-semibold mb-10">
|
||||
Started on {startDate.toLocaleString("en-US")}
|
||||
</h4>
|
||||
{#each questions as question}
|
||||
<div class="chat chat-end">
|
||||
<div class="chat-bubble chat-bubble-secondary whitespace-pre-line">
|
||||
{question.question}
|
||||
</div>
|
||||
</div>
|
||||
<div class="chat chat-start">
|
||||
<div class="chat-bubble chat-bubble-primary whitespace-pre-line">
|
||||
{question.answer}
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
|
||||
<form method="POST" class="form-control items-center mt-3">
|
||||
<div class="overflow-y-auto h-[calc(100vh-10rem)] px-10">
|
||||
<div class="h-max pb-32">
|
||||
{#each questions as question}
|
||||
<div class="chat chat-end my-2">
|
||||
<div
|
||||
class="chat-bubble chat-bubble-secondary whitespace-pre-line text-lg"
|
||||
>
|
||||
{question.question}
|
||||
</div>
|
||||
</div>
|
||||
<div class="chat chat-start my-2">
|
||||
<div
|
||||
class="chat-bubble chat-bubble-primary whitespace-pre-line text-lg"
|
||||
>
|
||||
{question.answer}
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form
|
||||
method="POST"
|
||||
class="form-control items-center absolute bottom-0 w-full px-5 left-0 h-32 flex flex-row bg-base-100"
|
||||
use:enhance={() => {
|
||||
isLoading = true;
|
||||
|
||||
return async ({ update }) => {
|
||||
isLoading = false;
|
||||
update();
|
||||
};
|
||||
}}
|
||||
>
|
||||
<textarea
|
||||
name="question"
|
||||
class="textarea textarea-bordered h-24 w-full"
|
||||
class="textarea textarea-bordered h-24 w-full text-lg"
|
||||
placeholder="Why is the sky blue?"
|
||||
disabled={isLoading}
|
||||
/>
|
||||
<button
|
||||
type="submit"
|
||||
class={"btn btn-primary max-w-lg m-3" + ($navigating ? "loading" : "")}
|
||||
disabled={isLoading}
|
||||
class={"btn btn-primary max-w-lg m-3 h-24 w-24 text-lg"}
|
||||
class:loading={isLoading}
|
||||
>
|
||||
Send
|
||||
</button>
|
||||
|
||||
@ -25,11 +25,9 @@ type t = {
|
||||
};
|
||||
|
||||
export const load: PageLoad = async ({ fetch, params }) => {
|
||||
const r = await fetch("http://api:9124/chat/" + params.id);
|
||||
const r = await fetch("/api/chat/" + params.id);
|
||||
const data = (await r.json()) as t;
|
||||
return {
|
||||
props: data,
|
||||
};
|
||||
};
|
||||
|
||||
export const csr = false;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import adapter from '@sveltejs/adapter-auto';
|
||||
import adapter from '@sveltejs/adapter-node';
|
||||
import { vitePreprocess } from '@sveltejs/kit/vite';
|
||||
|
||||
/** @type {import('@sveltejs/kit').Config} */
|
||||
@ -12,6 +12,9 @@ const config = {
|
||||
// If your environment is not supported or you settled on a specific environment, switch out the adapter.
|
||||
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
|
||||
adapter: adapter(),
|
||||
csrf: {
|
||||
checkOrigin: false,
|
||||
}
|
||||
},
|
||||
preprocess: vitePreprocess()
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user