basic working version

This commit is contained in:
Nathan Sarrazin 2023-03-19 12:09:15 +01:00
commit 2ae699b0c6
7 changed files with 113 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
api/__pycache__
api/magic.dat
weights/**
!weights/put_your_weights_here.txt

27
Dockerfile.api Normal file
View File

@ -0,0 +1,27 @@
FROM ubuntu:latest as builder
RUN apt-get update && apt-get install -y \
build-essential \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
autoconf \
git \
wget
WORKDIR /tmp
RUN git clone https://github.com/ggerganov/llama.cpp.git -b mmap
RUN cd llama.cpp && sed -i 's/{ 5120, 2 },/{ 5120, 1 },/g' main.cpp && make
FROM ubuntu:latest
WORKDIR /usr/src/app
RUN apt update
RUN apt-get install -y python3-pip
RUN pip install --upgrade pip
COPY ./api/requirements.txt requirements.txt
COPY --from=builder /tmp/llama.cpp/main /usr/bin/llama
RUN pip install -r requirements.txt

0
Dockerfile.web Normal file
View File

67
api/main.py Normal file
View File

@ -0,0 +1,67 @@
from typing import Union
from fastapi import FastAPI
import subprocess, os
app = FastAPI()
@app.get("/generate")
def generate(
model: str = "ggml-alpaca-13b-q4.bin",
prompt: str = "What is the first letter of the alphabet?",
temp: float = 0.8,
top_k: int = 40,
top_p: float = 0.9,
repeast_last_n: int = 64,
repeat_penalty: float = 1.3,
stop_token: str = None,
):
prompter = (
lambda prompt: f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{prompt}
### Response:
"""
)
args = (
"llama",
"--model",
"weights/" + model,
"--prompt",
prompter(prompt),
"--temp",
str(temp),
"--top_k",
str(top_k),
"--top_p",
str(top_p),
)
print(args)
procLlama = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
while True:
if procLlama.poll() is not None:
break
output = procLlama.stdout.read().decode("utf-8")
splits = output.split("###")
return {
"input": splits[1].lstrip(" Instruction:\n").rstrip("\n\n"),
"output": splits[2].lstrip(" Response:\n"),
}
@app.get("/models")
def models():
files = os.listdir("weights")
files.remove("put_your_weights_here.txt")
return files

2
api/requirements.txt Normal file
View File

@ -0,0 +1,2 @@
fastapi
uvicorn

12
docker-compose.yml Normal file
View File

@ -0,0 +1,12 @@
version: "3.9"
services:
api:
build:
context: .
dockerfile: Dockerfile.api
command: uvicorn main:app --reload --host 0.0.0.0 --port 9123
volumes:
- ./api:/usr/src/app/
- ./weights:/usr/src/app/weights
ports:
- 9123:9123

View File