Latest changes:

- Added nginx, api & web app on the same port now. - Allowed CSR, through sveltekit, with a hook for redirecting server side api requests. - Implemented menu to pass model parameters on start page. - Added a loading indicator while the model is computing
2023-03-21 07:11:00 +01:00 · 2023-03-21 07:11:00 +01:00 · a13eeadaed
commit a13eeadaed
parent 4dfa4c92c0
17 changed files with 284 additions and 65 deletions
--- a/.env.sample
+++ b/.env.sample
@ -1,4 +1,3 @@
 DATABASE_URL=mongodb://mongodb:27017/lms
 secret_key=youshouldchangethis
-API_PORT = 9124
-WEB_PORT = 9123
+PORT = 8008
--- a/README.md
+++ b/README.md
@ -2,12 +2,15 @@

 ![License](https://img.shields.io/github/license/nsarrazin/serge)

-![Serge](https://i.imgur.com/JtWV72d.png)
+|              Home page               |                       Chat                       |
+| :----------------------------------: | :----------------------------------------------: |
+| ![](https://i.imgur.com/CRXj9KD.png) | ![Serge - chat](https://i.imgur.com/bnqZyaC.png) |
+
 A chat interface based on `llama.cpp` for running alpaca models.

-* **SvelteKit** frontend
-* **MongoDB** for storing chat history & parameters
-* **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
+- **SvelteKit** frontend
+- **MongoDB** for storing chat history & parameters
+- **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`

 ## Getting started

@ -24,6 +27,8 @@ Then put your weights in the `models` folder. If you don't have them you can dow

 They are currently the only two models supported. I'm working on expanding support to all the models supported by `llama.cpp`.

+Note: `llama.cpp` [recently underwent some change](https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818) that requires model weights to be converted to a new format. Serge picks this up automatically on startup, and will convert your weights to the new format if needed. The old weights will be renamed to `*.bin.old` and the new weights will be named `*.bin`.
+
 Then, you can start the project by running:

 ```
@ -31,9 +36,9 @@ cp .env.sample .env
 docker compose up -d
 ```

-The front-end lives at http://localhost:9123/.
+The front-end lives at http://localhost:8008/ by default but you can change the port in the `.env` file.

-To get an interactive API documentation go to http://localhost:9124/docs.
+The interactive API docs is available at http://localhost:8008/api/docs.

 ## What's next

@ -44,5 +49,4 @@ To get an interactive API documentation go to http://localhost:9124/docs.
 - [ ] LangChain integration with a custom LLM
 - [ ] Support for other llama models, quantization, etc.

-
 And a lot more!
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,12 +8,12 @@ services:
      - ./api:/usr/src/app/
      - /etc/localtime:/etc/localtime:ro 
    ports:
-      - ${API_PORT}:${API_PORT}
+      - 9124:9124
    depends_on:
      - mongodb
    env_file:
      - .env
-    command: uvicorn main:app --reload --host 0.0.0.0 --port ${API_PORT}
+    command: uvicorn main:app --reload --host 0.0.0.0 --port 9124 --root-path /api/

  mongodb:
    image: bitnami/mongodb:latest
@ -28,12 +28,18 @@ services:
      context: ./web
      dockerfile: ./Dockerfile.web
    ports:
-      - ${WEB_PORT}:${WEB_PORT}
+      - 9123:9123
      - 24678:24678
    volumes:
      - ./web:/usr/src/app/
      - /usr/src/app/node_modules
-    command: npm run dev -- --host 0.0.0.0 --port ${WEB_PORT}
+    command: npm run dev -- --host 0.0.0.0 --port 9123
+  nginx:
+    build: 
+      context: ./nginx
+      dockerfile: ./Dockerfile.nginx 
+    ports:
+     - "${PORT}:80"

 volumes:
  data:
--- a/nginx/Dockerfile.nginx
+++ b/nginx/Dockerfile.nginx
@ -0,0 +1,3 @@
+FROM nginx:alpine as deploy
+
+COPY nginx.conf /etc/nginx/conf.d/default.conf
--- a/nginx/nginx.conf
+++ b/nginx/nginx.conf
@ -0,0 +1,32 @@
+server {
+    listen 80; # Adjust the port number if needed
+    server_name localhost;
+    # Proxy requests for the root URL to Service A
+    location / {
+        proxy_pass http://web:9123;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        
+        proxy_read_timeout 300s;
+        proxy_connect_timeout 300s;
+        proxy_send_timeout 300s;
+
+    }
+
+    # Proxy requests for /api to Service B
+    location /api/ {
+        proxy_pass http://api:9124;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        rewrite ^/api/(.*) /$1 break;
+        
+        proxy_read_timeout 300s;
+        proxy_connect_timeout 300s;
+        proxy_send_timeout 300s;
+
+    }
+}
--- a/package-lock.json
+++ b/package-lock.json
@ -0,0 +1,6 @@
+{
+  "name": "serge",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}
--- a/web/src/app.html
+++ b/web/src/app.html
@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en" data-theme="dark">
+<html lang="en" data-theme="dark" style="height: 100%; margin: 0;">
 	<head>
 		<meta charset="utf-8" />
 		<link rel="icon" href="%sveltekit.assets%/favicon.png" />
@ -7,7 +7,7 @@
 		<title>Serge - Powered by LLaMa</title>
 		%sveltekit.head%
 	</head>
-	<body data-sveltekit-preload-data="hover">
+	<body data-sveltekit-preload-data="hover" style="height: 100%;">
 		<div style="display: contents">%sveltekit.body%</div>
 	</body>
 </html>
--- a/web/src/hooks.server.ts
+++ b/web/src/hooks.server.ts
@ -0,0 +1,10 @@
+import type { HandleFetch } from "@sveltejs/kit";
+
+export const handleFetch = (({ request, fetch }) => {
+  request = new Request(
+    request.url.replace("http://localhost/api/", "http://api:9124/"),
+    request
+  );
+
+  return fetch(request);
+}) satisfies HandleFetch;
--- a/web/src/routes/+layout.svelte
+++ b/web/src/routes/+layout.svelte
@ -37,7 +37,7 @@

 <aside
  id="default-sidebar"
-  class="fixed top-0 left-0 z-40 w-96 h-screen transition-transform -translate-x-full sm:translate-x-0"
+  class="fixed top-0 left-0 z-40 w-80 h-screen transition-transform -translate-x-full sm:translate-x-0"
  aria-label="Sidebar"
 >
  <div class="h-full px-3 py-4 overflow-y-auto bg-gray-600">
@ -60,6 +60,6 @@
  </div>
 </aside>

-<div class="p-4 sm:ml-96">
+<div class="p-4 sm:ml-80 h-full">
  <slot />
 </div>
--- a/web/src/routes/+layout.ts
+++ b/web/src/routes/+layout.ts
@ -7,11 +7,9 @@ type t = {
 };

 export const load: LayoutLoad = async ({ fetch }) => {
-  const r = await fetch("http://api:9124/chats");
+  const r = await fetch("/api/chats");
  const chats = (await r.json()) as t[];
  return {
    chats: chats,
  };
 };
-
-export const csr = false;
--- a/web/src/routes/+page.server.ts
+++ b/web/src/routes/+page.server.ts
@ -3,14 +3,14 @@ import type { Actions } from "./$types";
 export const actions = {
  default: async ({ fetch, request }) => {
    const formData = await request.formData();
-    const model = formData.get("model");

-    let data = new URLSearchParams();
-    if (model) {
-      data.append("model", model.toString());
-    }
+    const convertedFormEntries = Array.from(formData, ([key, value]) => [
+      key,
+      typeof value === "string" ? value : value.name,
+    ]);
+    const searchParams = new URLSearchParams(convertedFormEntries);

-    const response = await fetch("http://api:9124/chat?" + data.toString(), {
+    const response = await fetch("/api/chat?" + searchParams.toString(), {
      method: "POST",
    });

--- a/web/src/routes/+page.svelte
+++ b/web/src/routes/+page.svelte
@ -2,18 +2,155 @@
  import type { PageData } from "./$types";

  export let data: PageData;
+
+  const modelAvailable = data.models.length > 0;
+
+  let temp = 0.1;
+  let top_k: number = 50;
+  let top_p: number = 0.95;
+
+  let max_length: number = 256;
+  let repeat_last_n: number = 64;
+  let repeat_penalty: number = 1.3;
+
+  let preprompt: string =
+    "Below is an instruction that describes a task. Write a response that appropriately completes the request. The response must be accurate, concise and evidence-based whenever possible. A complete answer is always ended by [end of text].";
 </script>

-<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge !</h1>
+<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge!</h1>
+<h1 class="text-xl text-center pt-2 pb-5">
+  An easy way to chat with Alpaca & other LLaMa based models.
+</h1>

-<form method="POST" class="p-10">
-  <label for="model" class="label">
-    <span class="label-text">Model</span>
-  </label>
-  <select name="model" class="select select-bordered w-full max-w-xs">
-    {#each data.models as model}
-      <option value={model}>{model}</option>
-    {/each}
-  </select>
-  <button class="btn ml-5">Start a new chat</button>
+<form method="POST" class="p-5">
+  <div class="w-full pb-20">
+    <div class="mx-auto w-fit pt-5">
+      <button class=" mx-auto btn btn-primary ml-5" disabled={!modelAvailable}
+        >Start a new chat</button
+      >
+    </div>
+  </div>
+
+  <div
+    tabindex="-1"
+    class="collapse collapse-arrow border-2 rounded-box border-gray-600 bg-base-100"
+  >
+    <input type="checkbox" />
+    <div class="collapse-title text-xl font-medium">Model settings</div>
+    <div class="collapse-content">
+      <div class="grid grid-cols-3 gap-4 p-3 ">
+        <div
+          class="tooltip col-span-2"
+          data-tip="The higher the temperature, the more random the model output."
+        >
+          <label for="temp" class="label-text">Temperature - [{temp}]</label>
+          <input
+            name="temp"
+            type="range"
+            bind:value={temp}
+            min="0.05"
+            max="2"
+            step="0.05"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="flex flex-col tooltip"
+          data-tip="The number of samples to consider for top_k sampling. "
+        >
+          <label for="top_k" class="label-text pb-1">top_k</label>
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="top_k"
+            type="number"
+            bind:value={top_k}
+            min="0"
+            max="100"
+          />
+        </div>
+        <div class="col-span-2">
+          <label for="max_length" class="label-text"
+            >Maximum generated text length in tokens - [{max_length}]</label
+          >
+          <input
+            name="max_length"
+            type="range"
+            bind:value={max_length}
+            min="16"
+            max="512"
+            step="16"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="flex flex-col tooltip"
+          data-tip="The cumulative probability of the tokens to keep for nucleus sampling. "
+        >
+          <label for="top_p" class="label-text pb-1">top_p</label>
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="top_p"
+            type="number"
+            bind:value={top_p}
+            min="0"
+            max="1"
+            step="0.025"
+          />
+        </div>
+        <div class="flex flex-col">
+          <label for="model" class="label-text pb-1"> Model choice </label>
+          <select name="model" class="select select-bordered w-full max-w-xs">
+            {#each data.models as model}
+              <option value={model}>{model}</option>
+            {/each}
+          </select>
+        </div>
+        <div
+          class="flex flex-col tooltip"
+          data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
+        >
+          <label for="repeat_last_n" class="label-text pb-1"
+            >repeat_last_n</label
+          >
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="repeat_last_n"
+            type="number"
+            bind:value={repeat_last_n}
+            min="0"
+            max="100"
+          />
+        </div>
+
+        <div
+          class="flex flex-col tooltip"
+          data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens. "
+        >
+          <label for="repeat_penalty" class="label-text pb-1"
+            >repeat_penalty</label
+          >
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="repeat_penalty"
+            type="number"
+            bind:value={repeat_penalty}
+            min="0"
+            max="2"
+            step="0.05"
+          />
+        </div>
+        <div class="col-span-3 flex flex-col">
+          <label for="preprompt" class="label-text pb-1"
+            >Pre-Prompt for initializing a conversation.</label
+          >
+          <textarea
+            class="textarea h-24 textarea-bordered w-full"
+            name="preprompt"
+            bind:value={preprompt}
+            placeholder="Enter your prompt here"
+          />
+        </div>
+      </div>
+    </div>
+  </div>
 </form>
--- a/web/src/routes/+page.ts
+++ b/web/src/routes/+page.ts
@ -1,11 +1,9 @@
 import type { PageLoad } from "./$types";

 export const load: PageLoad = async ({ fetch }) => {
-  const r = await fetch("http://api:9124/models");
+  const r = await fetch("api/models");
  const models = (await r.json()) as string[];
  return {
    models,
  };
 };
-
-export const csr = false;
--- a/web/src/routes/chat/[id]/+page.server.ts
+++ b/web/src/routes/chat/[id]/+page.server.ts
@ -10,14 +10,15 @@ export const actions = {
      data.append("prompt", question.toString());

      const response = await fetch(
-        "http://api:9124/chat/" + params.id + "/question?" + data.toString(),
+        "/api/chat/" + params.id + "/question?" + data.toString(),
        {
          method: "POST",
        }
      );

      if (response.ok) {
-        return { success: true };
+        const question = await response.json();
+        return question;
      } else {
        console.log(response.statusText);
      }
--- a/web/src/routes/chat/[id]/+page.svelte
+++ b/web/src/routes/chat/[id]/+page.svelte
@ -1,41 +1,65 @@
 <script lang="ts">
  import { navigating } from "$app/stores";
  import type { PageData } from "./$types";
+  import { enhance } from "$app/forms";

  export let data: PageData;

-  const questions = data.props.questions ?? [];
-
-  const startDate = new Date(data.props.created);
+  $: isLoading = false;
+  $: questions = data.props.questions ?? [];
+  $: startDate = new Date(data.props.created);
 </script>

-<div class="max-w-4xl mx-auto">
+<div class="max-w-4xl mx-auto h-full max-h-screen relative">
  <h1 class="text-4xl font-bold">Chat with {data.props.parameters.model}</h1>
  <h4 class="text-xl font-semibold mb-10">
    Started on {startDate.toLocaleString("en-US")}
  </h4>
-  {#each questions as question}
-    <div class="chat chat-end">
-      <div class="chat-bubble chat-bubble-secondary whitespace-pre-line">
-        {question.question}
-      </div>
-    </div>
-    <div class="chat chat-start">
-      <div class="chat-bubble chat-bubble-primary whitespace-pre-line">
-        {question.answer}
-      </div>
-    </div>
-  {/each}

-  <form method="POST" class="form-control items-center mt-3">
+  <div class="overflow-y-auto h-[calc(100vh-10rem)] px-10">
+    <div class="h-max pb-32">
+      {#each questions as question}
+        <div class="chat chat-end my-2">
+          <div
+            class="chat-bubble chat-bubble-secondary whitespace-pre-line text-lg"
+          >
+            {question.question}
+          </div>
+        </div>
+        <div class="chat chat-start my-2">
+          <div
+            class="chat-bubble chat-bubble-primary whitespace-pre-line text-lg"
+          >
+            {question.answer}
+          </div>
+        </div>
+      {/each}
+    </div>
+  </div>
+
+  <form
+    method="POST"
+    class="form-control items-center absolute bottom-0 w-full px-5 left-0 h-32 flex flex-row bg-base-100"
+    use:enhance={() => {
+      isLoading = true;
+
+      return async ({ update }) => {
+        isLoading = false;
+        update();
+      };
+    }}
+  >
    <textarea
      name="question"
-      class="textarea textarea-bordered h-24 w-full"
+      class="textarea textarea-bordered h-24 w-full text-lg"
      placeholder="Why is the sky blue?"
+      disabled={isLoading}
    />
    <button
      type="submit"
-      class={"btn btn-primary max-w-lg m-3" + ($navigating ? "loading" : "")}
+      disabled={isLoading}
+      class={"btn btn-primary max-w-lg m-3 h-24 w-24 text-lg"}
+      class:loading={isLoading}
    >
      Send
    </button>
--- a/web/src/routes/chat/[id]/+page.ts
+++ b/web/src/routes/chat/[id]/+page.ts
@ -25,11 +25,9 @@ type t = {
 };

 export const load: PageLoad = async ({ fetch, params }) => {
-  const r = await fetch("http://api:9124/chat/" + params.id);
+  const r = await fetch("/api/chat/" + params.id);
  const data = (await r.json()) as t;
  return {
    props: data,
  };
 };
-
-export const csr = false;
--- a/web/svelte.config.js
+++ b/web/svelte.config.js
@ -1,4 +1,4 @@
-import adapter from '@sveltejs/adapter-auto';
+import adapter from '@sveltejs/adapter-node';
 import { vitePreprocess } from '@sveltejs/kit/vite';

 /** @type {import('@sveltejs/kit').Config} */
@ -12,6 +12,9 @@ const config = {
 		// If your environment is not supported or you settled on a specific environment, switch out the adapter.
 		// See https://kit.svelte.dev/docs/adapters for more information about adapters.
 		adapter: adapter(),	  
+		csrf: {
+			checkOrigin: false,
+		  }	  
 	},
 	preprocess: vitePreprocess()
 };