basic working version
This commit is contained in:
commit
2ae699b0c6
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
api/__pycache__
|
||||
api/magic.dat
|
||||
|
||||
weights/**
|
||||
!weights/put_your_weights_here.txt
|
||||
27
Dockerfile.api
Normal file
27
Dockerfile.api
Normal file
@ -0,0 +1,27 @@
|
||||
FROM ubuntu:latest as builder
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
zlib1g-dev \
|
||||
libbz2-dev \
|
||||
liblzma-dev \
|
||||
autoconf \
|
||||
git \
|
||||
wget
|
||||
|
||||
WORKDIR /tmp
|
||||
|
||||
RUN git clone https://github.com/ggerganov/llama.cpp.git -b mmap
|
||||
RUN cd llama.cpp && sed -i 's/{ 5120, 2 },/{ 5120, 1 },/g' main.cpp && make
|
||||
|
||||
FROM ubuntu:latest
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
RUN apt update
|
||||
RUN apt-get install -y python3-pip
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
COPY ./api/requirements.txt requirements.txt
|
||||
COPY --from=builder /tmp/llama.cpp/main /usr/bin/llama
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
0
Dockerfile.web
Normal file
0
Dockerfile.web
Normal file
67
api/main.py
Normal file
67
api/main.py
Normal file
@ -0,0 +1,67 @@
|
||||
from typing import Union
|
||||
from fastapi import FastAPI
|
||||
import subprocess, os
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.get("/generate")
|
||||
def generate(
|
||||
model: str = "ggml-alpaca-13b-q4.bin",
|
||||
prompt: str = "What is the first letter of the alphabet?",
|
||||
temp: float = 0.8,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.9,
|
||||
repeast_last_n: int = 64,
|
||||
repeat_penalty: float = 1.3,
|
||||
stop_token: str = None,
|
||||
):
|
||||
|
||||
prompter = (
|
||||
lambda prompt: f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
||||
|
||||
### Instruction:
|
||||
{prompt}
|
||||
|
||||
### Response:
|
||||
"""
|
||||
)
|
||||
|
||||
args = (
|
||||
"llama",
|
||||
"--model",
|
||||
"weights/" + model,
|
||||
"--prompt",
|
||||
prompter(prompt),
|
||||
"--temp",
|
||||
str(temp),
|
||||
"--top_k",
|
||||
str(top_k),
|
||||
"--top_p",
|
||||
str(top_p),
|
||||
)
|
||||
|
||||
print(args)
|
||||
|
||||
procLlama = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
while True:
|
||||
if procLlama.poll() is not None:
|
||||
break
|
||||
|
||||
output = procLlama.stdout.read().decode("utf-8")
|
||||
|
||||
splits = output.split("###")
|
||||
|
||||
return {
|
||||
"input": splits[1].lstrip(" Instruction:\n").rstrip("\n\n"),
|
||||
"output": splits[2].lstrip(" Response:\n"),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/models")
|
||||
def models():
|
||||
files = os.listdir("weights")
|
||||
files.remove("put_your_weights_here.txt")
|
||||
|
||||
return files
|
||||
2
api/requirements.txt
Normal file
2
api/requirements.txt
Normal file
@ -0,0 +1,2 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
12
docker-compose.yml
Normal file
12
docker-compose.yml
Normal file
@ -0,0 +1,12 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
api:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.api
|
||||
command: uvicorn main:app --reload --host 0.0.0.0 --port 9123
|
||||
volumes:
|
||||
- ./api:/usr/src/app/
|
||||
- ./weights:/usr/src/app/weights
|
||||
ports:
|
||||
- 9123:9123
|
||||
0
weights/put_your_weights_here.txt
Normal file
0
weights/put_your_weights_here.txt
Normal file
Loading…
x
Reference in New Issue
Block a user