basic working version

2023-03-19 12:09:15 +01:00 · 2023-03-19 12:09:15 +01:00 · 2ae699b0c6
commit 2ae699b0c6
7 changed files with 113 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+api/__pycache__
+api/magic.dat
+
+weights/**
+!weights/put_your_weights_here.txt
--- a/Dockerfile.api
+++ b/Dockerfile.api
@ -0,0 +1,27 @@
+FROM ubuntu:latest as builder
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    zlib1g-dev \
+    libbz2-dev \
+    liblzma-dev \
+    autoconf \
+    git \
+    wget
+
+WORKDIR /tmp
+
+RUN git clone https://github.com/ggerganov/llama.cpp.git -b mmap
+RUN cd llama.cpp && sed -i 's/{ 5120, 2 },/{ 5120, 1 },/g' main.cpp && make
+
+FROM ubuntu:latest
+WORKDIR /usr/src/app
+
+RUN apt update
+RUN apt-get install -y python3-pip
+RUN pip install --upgrade pip
+
+COPY ./api/requirements.txt requirements.txt
+COPY --from=builder /tmp/llama.cpp/main /usr/bin/llama
+
+RUN pip install -r requirements.txt
--- a/Dockerfile.web
+++ b/Dockerfile.web
--- a/api/main.py
+++ b/api/main.py
@ -0,0 +1,67 @@
+from typing import Union
+from fastapi import FastAPI
+import subprocess, os
+
+app = FastAPI()
+
+
+@app.get("/generate")
+def generate(
+    model: str = "ggml-alpaca-13b-q4.bin",
+    prompt: str = "What is the first letter of the alphabet?",
+    temp: float = 0.8,
+    top_k: int = 40,
+    top_p: float = 0.9,
+    repeast_last_n: int = 64,
+    repeat_penalty: float = 1.3,
+    stop_token: str = None,
+):
+
+    prompter = (
+        lambda prompt: f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+{prompt}
+
+### Response:
+"""
+    )
+
+    args = (
+        "llama",
+        "--model",
+        "weights/" + model,
+        "--prompt",
+        prompter(prompt),
+        "--temp",
+        str(temp),
+        "--top_k",
+        str(top_k),
+        "--top_p",
+        str(top_p),
+    )
+
+    print(args)
+
+    procLlama = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    while True:
+        if procLlama.poll() is not None:
+            break
+
+    output = procLlama.stdout.read().decode("utf-8")
+
+    splits = output.split("###")
+
+    return {
+        "input": splits[1].lstrip(" Instruction:\n").rstrip("\n\n"),
+        "output": splits[2].lstrip(" Response:\n"),
+    }
+
+
+@app.get("/models")
+def models():
+    files = os.listdir("weights")
+    files.remove("put_your_weights_here.txt")
+
+    return files
--- a/api/requirements.txt
+++ b/api/requirements.txt
@ -0,0 +1,2 @@
+fastapi
+uvicorn
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,12 @@
+version: "3.9"
+services:
+  api:
+    build: 
+      context: .
+      dockerfile: Dockerfile.api
+    command: uvicorn main:app --reload --host 0.0.0.0 --port 9123
+    volumes:
+      - ./api:/usr/src/app/
+      - ./weights:/usr/src/app/weights
+    ports:
+      - 9123:9123
--- a/weights/put_your_weights_here.txt
+++ b/weights/put_your_weights_here.txt