From a13eeadaed2befad02656dfb8710a12eb079c041 Mon Sep 17 00:00:00 2001
From: Nathan Sarrazin <sarrazin.nathan@gmail.com>
Date: Tue, 21 Mar 2023 07:11:00 +0100
Subject: [PATCH] Latest changes:

- Added nginx, api & web app on the same port now.
- Allowed CSR, through sveltekit, with a hook for redirecting server side api requests.
- Implemented menu to  pass model parameters on start page.
- Added a loading indicator while the model is computing
---
 .env.sample                              |   3 +-
 README.md                                |  18 ++-
 docker-compose.yml                       |  14 +-
 nginx/Dockerfile.nginx                   |   3 +
 nginx/nginx.conf                         |  32 +++++
 package-lock.json                        |   6 +
 web/src/app.html                         |   4 +-
 web/src/hooks.server.ts                  |  10 ++
 web/src/routes/+layout.svelte            |   4 +-
 web/src/routes/+layout.ts                |   4 +-
 web/src/routes/+page.server.ts           |  12 +-
 web/src/routes/+page.svelte              | 159 +++++++++++++++++++++--
 web/src/routes/+page.ts                  |   4 +-
 web/src/routes/chat/[id]/+page.server.ts |   5 +-
 web/src/routes/chat/[id]/+page.svelte    |  62 ++++++---
 web/src/routes/chat/[id]/+page.ts        |   4 +-
 web/svelte.config.js                     |   5 +-
 17 files changed, 284 insertions(+), 65 deletions(-)
 create mode 100644 nginx/Dockerfile.nginx
 create mode 100644 nginx/nginx.conf
 create mode 100644 package-lock.json
 create mode 100644 web/src/hooks.server.ts

diff --git a/.env.sample b/.env.sample
index eaf8ec9..64365b0 100644
--- a/.env.sample
+++ b/.env.sample
@@ -1,4 +1,3 @@
 DATABASE_URL=mongodb://mongodb:27017/lms
 secret_key=youshouldchangethis
-API_PORT = 9124
-WEB_PORT = 9123
\ No newline at end of file
+PORT = 8008
\ No newline at end of file
diff --git a/README.md b/README.md
index 98cc55d..9ade5a7 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,15 @@
 
 ![License](https://img.shields.io/github/license/nsarrazin/serge)
 
-![Serge](https://i.imgur.com/JtWV72d.png)
+|              Home page               |                       Chat                       |
+| :----------------------------------: | :----------------------------------------------: |
+| ![](https://i.imgur.com/CRXj9KD.png) | ![Serge - chat](https://i.imgur.com/bnqZyaC.png) |
+
 A chat interface based on `llama.cpp` for running alpaca models.
 
-* **SvelteKit** frontend
-* **MongoDB** for storing chat history & parameters
-* **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
+- **SvelteKit** frontend
+- **MongoDB** for storing chat history & parameters
+- **FastAPI + beanie** for the API, wrapping calls to `llama.cpp`
 
 ## Getting started
 
@@ -24,6 +27,8 @@ Then put your weights in the `models` folder. If you don't have them you can dow
 
 They are currently the only two models supported. I'm working on expanding support to all the models supported by `llama.cpp`.
 
+Note: `llama.cpp` [recently underwent some change](https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818) that requires model weights to be converted to a new format. Serge picks this up automatically on startup, and will convert your weights to the new format if needed. The old weights will be renamed to `*.bin.old` and the new weights will be named `*.bin`.
+
 Then, you can start the project by running:
 
 ```
@@ -31,9 +36,9 @@ cp .env.sample .env
 docker compose up -d
 ```
 
-The front-end lives at http://localhost:9123/.
+The front-end lives at http://localhost:8008/ by default but you can change the port in the `.env` file.
 
-To get an interactive API documentation go to http://localhost:9124/docs.
+The interactive API docs is available at http://localhost:8008/api/docs.
 
 ## What's next
 
@@ -44,5 +49,4 @@ To get an interactive API documentation go to http://localhost:9124/docs.
 - [ ] LangChain integration with a custom LLM
 - [ ] Support for other llama models, quantization, etc.
 
-
 And a lot more!
diff --git a/docker-compose.yml b/docker-compose.yml
index be9ca99..b65b320 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,12 +8,12 @@ services:
       - ./api:/usr/src/app/
       - /etc/localtime:/etc/localtime:ro 
     ports:
-      - ${API_PORT}:${API_PORT}
+      - 9124:9124
     depends_on:
       - mongodb
     env_file:
       - .env
-    command: uvicorn main:app --reload --host 0.0.0.0 --port ${API_PORT}
+    command: uvicorn main:app --reload --host 0.0.0.0 --port 9124 --root-path /api/
 
   mongodb:
     image: bitnami/mongodb:latest
@@ -28,12 +28,18 @@ services:
       context: ./web
       dockerfile: ./Dockerfile.web
     ports:
-      - ${WEB_PORT}:${WEB_PORT}
+      - 9123:9123
       - 24678:24678
     volumes:
       - ./web:/usr/src/app/
       - /usr/src/app/node_modules
-    command: npm run dev -- --host 0.0.0.0 --port ${WEB_PORT}
+    command: npm run dev -- --host 0.0.0.0 --port 9123
+  nginx:
+    build: 
+      context: ./nginx
+      dockerfile: ./Dockerfile.nginx 
+    ports:
+     - "${PORT}:80"
 
 volumes:
   data:
diff --git a/nginx/Dockerfile.nginx b/nginx/Dockerfile.nginx
new file mode 100644
index 0000000..233b925
--- /dev/null
+++ b/nginx/Dockerfile.nginx
@@ -0,0 +1,3 @@
+FROM nginx:alpine as deploy
+
+COPY nginx.conf /etc/nginx/conf.d/default.conf
\ No newline at end of file
diff --git a/nginx/nginx.conf b/nginx/nginx.conf
new file mode 100644
index 0000000..8f68aac
--- /dev/null
+++ b/nginx/nginx.conf
@@ -0,0 +1,32 @@
+server {
+    listen 80; # Adjust the port number if needed
+    server_name localhost;
+    # Proxy requests for the root URL to Service A
+    location / {
+        proxy_pass http://web:9123;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        
+        proxy_read_timeout 300s;
+        proxy_connect_timeout 300s;
+        proxy_send_timeout 300s;
+
+    }
+
+    # Proxy requests for /api to Service B
+    location /api/ {
+        proxy_pass http://api:9124;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        rewrite ^/api/(.*) /$1 break;
+        
+        proxy_read_timeout 300s;
+        proxy_connect_timeout 300s;
+        proxy_send_timeout 300s;
+
+    }
+}
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..3d0930e
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,6 @@
+{
+  "name": "serge",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}
diff --git a/web/src/app.html b/web/src/app.html
index 04a48a4..699f824 100644
--- a/web/src/app.html
+++ b/web/src/app.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en" data-theme="dark">
+<html lang="en" data-theme="dark" style="height: 100%; margin: 0;">
 	<head>
 		<meta charset="utf-8" />
 		<link rel="icon" href="%sveltekit.assets%/favicon.png" />
@@ -7,7 +7,7 @@
 		<title>Serge - Powered by LLaMa</title>
 		%sveltekit.head%
 	</head>
-	<body data-sveltekit-preload-data="hover">
+	<body data-sveltekit-preload-data="hover" style="height: 100%;">
 		<div style="display: contents">%sveltekit.body%</div>
 	</body>
 </html>
diff --git a/web/src/hooks.server.ts b/web/src/hooks.server.ts
new file mode 100644
index 0000000..50a921d
--- /dev/null
+++ b/web/src/hooks.server.ts
@@ -0,0 +1,10 @@
+import type { HandleFetch } from "@sveltejs/kit";
+
+export const handleFetch = (({ request, fetch }) => {
+  request = new Request(
+    request.url.replace("http://localhost/api/", "http://api:9124/"),
+    request
+  );
+
+  return fetch(request);
+}) satisfies HandleFetch;
diff --git a/web/src/routes/+layout.svelte b/web/src/routes/+layout.svelte
index 7271ade..88990c7 100644
--- a/web/src/routes/+layout.svelte
+++ b/web/src/routes/+layout.svelte
@@ -37,7 +37,7 @@
 
 <aside
   id="default-sidebar"
-  class="fixed top-0 left-0 z-40 w-96 h-screen transition-transform -translate-x-full sm:translate-x-0"
+  class="fixed top-0 left-0 z-40 w-80 h-screen transition-transform -translate-x-full sm:translate-x-0"
   aria-label="Sidebar"
 >
   <div class="h-full px-3 py-4 overflow-y-auto bg-gray-600">
@@ -60,6 +60,6 @@
   </div>
 </aside>
 
-<div class="p-4 sm:ml-96">
+<div class="p-4 sm:ml-80 h-full">
   <slot />
 </div>
diff --git a/web/src/routes/+layout.ts b/web/src/routes/+layout.ts
index 82a1d6a..16c6a45 100644
--- a/web/src/routes/+layout.ts
+++ b/web/src/routes/+layout.ts
@@ -7,11 +7,9 @@ type t = {
 };
 
 export const load: LayoutLoad = async ({ fetch }) => {
-  const r = await fetch("http://api:9124/chats");
+  const r = await fetch("/api/chats");
   const chats = (await r.json()) as t[];
   return {
     chats: chats,
   };
 };
-
-export const csr = false;
diff --git a/web/src/routes/+page.server.ts b/web/src/routes/+page.server.ts
index c24f423..8aae6f7 100644
--- a/web/src/routes/+page.server.ts
+++ b/web/src/routes/+page.server.ts
@@ -3,14 +3,14 @@ import type { Actions } from "./$types";
 export const actions = {
   default: async ({ fetch, request }) => {
     const formData = await request.formData();
-    const model = formData.get("model");
 
-    let data = new URLSearchParams();
-    if (model) {
-      data.append("model", model.toString());
-    }
+    const convertedFormEntries = Array.from(formData, ([key, value]) => [
+      key,
+      typeof value === "string" ? value : value.name,
+    ]);
+    const searchParams = new URLSearchParams(convertedFormEntries);
 
-    const response = await fetch("http://api:9124/chat?" + data.toString(), {
+    const response = await fetch("/api/chat?" + searchParams.toString(), {
       method: "POST",
     });
 
diff --git a/web/src/routes/+page.svelte b/web/src/routes/+page.svelte
index 2db7751..334227a 100644
--- a/web/src/routes/+page.svelte
+++ b/web/src/routes/+page.svelte
@@ -2,18 +2,155 @@
   import type { PageData } from "./$types";
 
   export let data: PageData;
+
+  const modelAvailable = data.models.length > 0;
+
+  let temp = 0.1;
+  let top_k: number = 50;
+  let top_p: number = 0.95;
+
+  let max_length: number = 256;
+  let repeat_last_n: number = 64;
+  let repeat_penalty: number = 1.3;
+
+  let preprompt: string =
+    "Below is an instruction that describes a task. Write a response that appropriately completes the request. The response must be accurate, concise and evidence-based whenever possible. A complete answer is always ended by [end of text].";
 </script>
 
-<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge !</h1>
+<h1 class="text-3xl font-bold text-center pt-5">Say Hi to Serge!</h1>
+<h1 class="text-xl text-center pt-2 pb-5">
+  An easy way to chat with Alpaca & other LLaMa based models.
+</h1>
 
-<form method="POST" class="p-10">
-  <label for="model" class="label">
-    <span class="label-text">Model</span>
-  </label>
-  <select name="model" class="select select-bordered w-full max-w-xs">
-    {#each data.models as model}
-      <option value={model}>{model}</option>
-    {/each}
-  </select>
-  <button class="btn ml-5">Start a new chat</button>
+<form method="POST" class="p-5">
+  <div class="w-full pb-20">
+    <div class="mx-auto w-fit pt-5">
+      <button class=" mx-auto btn btn-primary ml-5" disabled={!modelAvailable}
+        >Start a new chat</button
+      >
+    </div>
+  </div>
+
+  <div
+    tabindex="-1"
+    class="collapse collapse-arrow border-2 rounded-box border-gray-600 bg-base-100"
+  >
+    <input type="checkbox" />
+    <div class="collapse-title text-xl font-medium">Model settings</div>
+    <div class="collapse-content">
+      <div class="grid grid-cols-3 gap-4 p-3 ">
+        <div
+          class="tooltip col-span-2"
+          data-tip="The higher the temperature, the more random the model output."
+        >
+          <label for="temp" class="label-text">Temperature - [{temp}]</label>
+          <input
+            name="temp"
+            type="range"
+            bind:value={temp}
+            min="0.05"
+            max="2"
+            step="0.05"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="flex flex-col tooltip"
+          data-tip="The number of samples to consider for top_k sampling. "
+        >
+          <label for="top_k" class="label-text pb-1">top_k</label>
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="top_k"
+            type="number"
+            bind:value={top_k}
+            min="0"
+            max="100"
+          />
+        </div>
+        <div class="col-span-2">
+          <label for="max_length" class="label-text"
+            >Maximum generated text length in tokens - [{max_length}]</label
+          >
+          <input
+            name="max_length"
+            type="range"
+            bind:value={max_length}
+            min="16"
+            max="512"
+            step="16"
+            class="range range-sm mt-auto"
+          />
+        </div>
+        <div
+          class="flex flex-col tooltip"
+          data-tip="The cumulative probability of the tokens to keep for nucleus sampling. "
+        >
+          <label for="top_p" class="label-text pb-1">top_p</label>
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="top_p"
+            type="number"
+            bind:value={top_p}
+            min="0"
+            max="1"
+            step="0.025"
+          />
+        </div>
+        <div class="flex flex-col">
+          <label for="model" class="label-text pb-1"> Model choice </label>
+          <select name="model" class="select select-bordered w-full max-w-xs">
+            {#each data.models as model}
+              <option value={model}>{model}</option>
+            {/each}
+          </select>
+        </div>
+        <div
+          class="flex flex-col tooltip"
+          data-tip="Number of tokens to look back on for deciding to apply the repeat penalty."
+        >
+          <label for="repeat_last_n" class="label-text pb-1"
+            >repeat_last_n</label
+          >
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="repeat_last_n"
+            type="number"
+            bind:value={repeat_last_n}
+            min="0"
+            max="100"
+          />
+        </div>
+
+        <div
+          class="flex flex-col tooltip"
+          data-tip="The weight of the penalty to avoid repeating the last repeat_last_n tokens. "
+        >
+          <label for="repeat_penalty" class="label-text pb-1"
+            >repeat_penalty</label
+          >
+          <input
+            class="input input-bordered w-full max-w-xs"
+            name="repeat_penalty"
+            type="number"
+            bind:value={repeat_penalty}
+            min="0"
+            max="2"
+            step="0.05"
+          />
+        </div>
+        <div class="col-span-3 flex flex-col">
+          <label for="preprompt" class="label-text pb-1"
+            >Pre-Prompt for initializing a conversation.</label
+          >
+          <textarea
+            class="textarea h-24 textarea-bordered w-full"
+            name="preprompt"
+            bind:value={preprompt}
+            placeholder="Enter your prompt here"
+          />
+        </div>
+      </div>
+    </div>
+  </div>
 </form>
diff --git a/web/src/routes/+page.ts b/web/src/routes/+page.ts
index fdeaa31..4e5fcd5 100644
--- a/web/src/routes/+page.ts
+++ b/web/src/routes/+page.ts
@@ -1,11 +1,9 @@
 import type { PageLoad } from "./$types";
 
 export const load: PageLoad = async ({ fetch }) => {
-  const r = await fetch("http://api:9124/models");
+  const r = await fetch("api/models");
   const models = (await r.json()) as string[];
   return {
     models,
   };
 };
-
-export const csr = false;
diff --git a/web/src/routes/chat/[id]/+page.server.ts b/web/src/routes/chat/[id]/+page.server.ts
index 1f1c526..a6b3a04 100644
--- a/web/src/routes/chat/[id]/+page.server.ts
+++ b/web/src/routes/chat/[id]/+page.server.ts
@@ -10,14 +10,15 @@ export const actions = {
       data.append("prompt", question.toString());
 
       const response = await fetch(
-        "http://api:9124/chat/" + params.id + "/question?" + data.toString(),
+        "/api/chat/" + params.id + "/question?" + data.toString(),
         {
           method: "POST",
         }
       );
 
       if (response.ok) {
-        return { success: true };
+        const question = await response.json();
+        return question;
       } else {
         console.log(response.statusText);
       }
diff --git a/web/src/routes/chat/[id]/+page.svelte b/web/src/routes/chat/[id]/+page.svelte
index df33c24..b92fafd 100644
--- a/web/src/routes/chat/[id]/+page.svelte
+++ b/web/src/routes/chat/[id]/+page.svelte
@@ -1,41 +1,65 @@
 <script lang="ts">
   import { navigating } from "$app/stores";
   import type { PageData } from "./$types";
+  import { enhance } from "$app/forms";
 
   export let data: PageData;
 
-  const questions = data.props.questions ?? [];
-
-  const startDate = new Date(data.props.created);
+  $: isLoading = false;
+  $: questions = data.props.questions ?? [];
+  $: startDate = new Date(data.props.created);
 </script>
 
-<div class="max-w-4xl mx-auto">
+<div class="max-w-4xl mx-auto h-full max-h-screen relative">
   <h1 class="text-4xl font-bold">Chat with {data.props.parameters.model}</h1>
   <h4 class="text-xl font-semibold mb-10">
     Started on {startDate.toLocaleString("en-US")}
   </h4>
-  {#each questions as question}
-    <div class="chat chat-end">
-      <div class="chat-bubble chat-bubble-secondary whitespace-pre-line">
-        {question.question}
-      </div>
-    </div>
-    <div class="chat chat-start">
-      <div class="chat-bubble chat-bubble-primary whitespace-pre-line">
-        {question.answer}
-      </div>
-    </div>
-  {/each}
 
-  <form method="POST" class="form-control items-center mt-3">
+  <div class="overflow-y-auto h-[calc(100vh-10rem)] px-10">
+    <div class="h-max pb-32">
+      {#each questions as question}
+        <div class="chat chat-end my-2">
+          <div
+            class="chat-bubble chat-bubble-secondary whitespace-pre-line text-lg"
+          >
+            {question.question}
+          </div>
+        </div>
+        <div class="chat chat-start my-2">
+          <div
+            class="chat-bubble chat-bubble-primary whitespace-pre-line text-lg"
+          >
+            {question.answer}
+          </div>
+        </div>
+      {/each}
+    </div>
+  </div>
+
+  <form
+    method="POST"
+    class="form-control items-center absolute bottom-0 w-full px-5 left-0 h-32 flex flex-row bg-base-100"
+    use:enhance={() => {
+      isLoading = true;
+
+      return async ({ update }) => {
+        isLoading = false;
+        update();
+      };
+    }}
+  >
     <textarea
       name="question"
-      class="textarea textarea-bordered h-24 w-full"
+      class="textarea textarea-bordered h-24 w-full text-lg"
       placeholder="Why is the sky blue?"
+      disabled={isLoading}
     />
     <button
       type="submit"
-      class={"btn btn-primary max-w-lg m-3" + ($navigating ? "loading" : "")}
+      disabled={isLoading}
+      class={"btn btn-primary max-w-lg m-3 h-24 w-24 text-lg"}
+      class:loading={isLoading}
     >
       Send
     </button>
diff --git a/web/src/routes/chat/[id]/+page.ts b/web/src/routes/chat/[id]/+page.ts
index 09b062c..d2a5851 100644
--- a/web/src/routes/chat/[id]/+page.ts
+++ b/web/src/routes/chat/[id]/+page.ts
@@ -25,11 +25,9 @@ type t = {
 };
 
 export const load: PageLoad = async ({ fetch, params }) => {
-  const r = await fetch("http://api:9124/chat/" + params.id);
+  const r = await fetch("/api/chat/" + params.id);
   const data = (await r.json()) as t;
   return {
     props: data,
   };
 };
-
-export const csr = false;
diff --git a/web/svelte.config.js b/web/svelte.config.js
index 94be549..4fefc49 100644
--- a/web/svelte.config.js
+++ b/web/svelte.config.js
@@ -1,4 +1,4 @@
-import adapter from '@sveltejs/adapter-auto';
+import adapter from '@sveltejs/adapter-node';
 import { vitePreprocess } from '@sveltejs/kit/vite';
 
 /** @type {import('@sveltejs/kit').Config} */
@@ -12,6 +12,9 @@ const config = {
 		// If your environment is not supported or you settled on a specific environment, switch out the adapter.
 		// See https://kit.svelte.dev/docs/adapters for more information about adapters.
 		adapter: adapter(),	  
+		csrf: {
+			checkOrigin: false,
+		  }	  
 	},
 	preprocess: vitePreprocess()
 };