diff --git a/README.md b/README.md index 6b80fb3..90e241f 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ We currently support the following models: - Airoboros-7B - Airoboros-13B - Airoboros-30B + - Airoboros-65B - Alpaca 🦙 - Alpaca-LoRA-65B - GPT4-Alpaca-LoRA-30B diff --git a/api/src/serge/routers/model.py b/api/src/serge/routers/model.py index c8da2a5..fe64b47 100644 --- a/api/src/serge/routers/model.py +++ b/api/src/serge/routers/model.py @@ -14,25 +14,30 @@ model_router = APIRouter( models_info = { "Airoboros-7B": [ - "TheBloke/airoboros-7b-gpt4-GGML", - "airoboros-7b-gpt4.ggmlv3.q5_1.bin", + "TheBloke/airoboros-7b-gpt4-1.2-GGML", + "airoboros-7b-gpt4-1.2.ggmlv3.q5_1.bin", 5.06e9, ], "Airoboros-7B-q6_K": [ - "TheBloke/airoboros-7b-gpt4-GGML", - "airoboros-7B.ggmlv3.q6_K.bin", + "TheBloke/airoboros-7b-gpt4-1.2-GGML", + "airoboros-7B-1.2.ggmlv3.q6_K.bin", 5.53e9, ], "Airoboros-13B": [ - "TheBloke/airoboros-13b-gpt4-GGML", - "airoboros-13b.ggmlv3.q6_K.bin", + "TheBloke/airoboros-13b-gpt4-1.2-GGML", + "airoboros-13b-gpt4-1.2.ggmlv3.q6_K.bin", 10.7e9, ], "Airoboros-33B": [ - "TheBloke/airoboros-33b-gpt4-GGML", - "airoboros-33b-gpt4.ggmlv3.q6_K.bin", + "TheBloke/airoboros-33b-gpt4-1.2-GGML", + "airoboros-33b-gpt4-1.2.ggmlv3.q6_K.bin", 26.7e9, ], + "Airoboros-65B": [ + "TheBloke/airoboros-65B-gpt4-1.2-GGML", + "airoboros-65B-gpt4-1.2.ggmlv3.q5_K_M.bin", + 46.3e9, + ], "GPT4AlpacaLoRA-30B": [ "TheBloke/gpt4-alpaca-lora-30B-4bit-GGML", "gpt4-alpaca-lora-30b.ggmlv3.q5_1.bin",