From c6c02271e08699d054fd470baac91d2edc949dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=CE=94BL=C3=98=20=E1=84=83=CE=9E?= Date: Mon, 19 Jun 2023 21:16:41 +0000 Subject: [PATCH] Support for BigTrans, Minotaur, Robin, and Vicuna v1.3 models (#451) * Feature: add Vicuna-v1.3-7B and Vicuna-v1.3-13B * Feature: add BigTrans-13B * Feature: add robin,minotour,chronos-hermes,vicuna,trans * Feature: add robin,minotour,chronos-hermes,vicuna,trans * Docs: update model max ram required * Docs: update model max ram required * Feature: add more quants q2_k,q3_K_L,q4_1,q4_K_M,q8_0 on schema.json * Fix: missing emojis and typo vicuna models q3_K_L * Fix: minotaur * Fix: minotaur * Fix: schame.json enum k-quants * Fix: schame.json enum k-quants --------- Co-authored-by: pabl-o-ce --- README.md | 57 ++++- api/src/serge/data/models.json | 394 +++++++++++++++++++++++++++++++ api/src/serge/schema/schema.json | 9 +- 3 files changed, 451 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 90e241f..3db6ef1 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ We currently support the following models: - Alpaca 🦙 - Alpaca-LoRA-65B - GPT4-Alpaca-LoRA-30B +- BigTrans 🗺 + - BigTrans-13B - Chronos 🌑 - Chronos-13B - Chronos-33B @@ -84,10 +86,17 @@ We currently support the following models: - Llama-Supercot-30B - Lazarus 💀 - Lazarus-30B +- Minotour 🐃 + - Minotaur-15B - Nous 🧠 - Nous-Hermes-13B - OpenAssistant 🎙️ - OpenAssistant-30B +- Robin 🏹 + - Robin-7B + - Robin-13B + - Robin-33B + - Robin-65B - Samantha 👩 - Samantha-7B - Samantha-13B @@ -104,6 +113,8 @@ We currently support the following models: - Vicuna-v1.1-13B - VicUnlocked-30B - VicUnlocked-65B + - Vicuna-v1.3-7B + - Vicuna-v1.3-13B - Wizard 🧙 - Wizard-Mega-13B - Wizard-Vicuna-Uncensored-7B @@ -124,14 +135,44 @@ docker cp ./my_weight.bin serge:/usr/src/app/weights/ LLaMA will crash if you don't have enough available memory for the model: -| Model | RAM Required | -|----------|-----------------| -| 7B | 4.5GB | -| 7B-q6_K | 8.03GB | -| 13B | 12GB | -| 13B-q6_K | 13.18GB | -| 30B | 20GB | -| 30B-q6_K | 29.19GB | +| Model | Max RAM Required | +|-------------|------------------| +| 7B | 4.5GB | +| 7B-q2_K | 5.37GB | +| 7B-q3_K_L | 6.10GB | +| 7B-q4_1 | 6.71GB | +| 7B-q4_K_M | 6.58GB | +| 7B-q5_1 | 7.56GB | +| 7B-q5_K_M | 7.28GB | +| 7B-q6_K | 8.03GB | +| 7B-q8_0 | 9.66GB | +| 13B | 12GB | +| 13B-q2_K | 8.01GB | +| 13B-q3_K_L | 9.43GB | +| 13B-q4_1 | 10.64GB | +| 13B-q4_K_M | 10.37GB | +| 13B-q5_1 | 12.26GB | +| 13B-q5_K_M | 11.73GB | +| 13B-q6_K | 13.18GB | +| 13B-q8_0 | 16.33GB | +| 33B | 20GB | +| 33B-q2_K | 16.21GB | +| 33B-q3_K_L | 19.78GB | +| 33B-q4_1 | 22.83GB | +| 33B-q4_K_M | 22.12GB | +| 33B-q5_1 | 26.90GB | +| 33B-q5_K_M | 25.55GB | +| 33B-q6_K | 29.19GB | +| 33B-q8_0 | 37.06GB | +| 65B | 50GB | +| 65B-q2_K | 29.95GB | +| 65B-q3_K_L | 37.15GB | +| 65B-q4_1 | 43.31GB | +| 65B-q4_K_M | 41.85GB | +| 65B-q5_1 | 51.47GB | +| 65B-q5_K_M | 48.74GB | +| 65B-q6_K | 56.06GB | +| 65B-q8_0 | 71.87GB | ## 💬 Support diff --git a/api/src/serge/data/models.json b/api/src/serge/data/models.json index 56091df..e82ec01 100644 --- a/api/src/serge/data/models.json +++ b/api/src/serge/data/models.json @@ -66,6 +66,52 @@ "disk_space": 26700000000.0 } ] + }, + { + "name": "Chronos-Hermes-13B", + "repo": "TheBloke/chronos-hermes-13B-GGML", + "files": [ + { + "name": "q2_K", + "filename": "chronos-hermes-13b.ggmlv3.q2_K.bin", + "disk_space": 55100000000.0 + }, + { + "name": "q3_K_L", + "filename": "chronos-hermes-13b.ggmlv3.q3_K_L.bin", + "disk_space": 69300000000.0 + }, + { + "name": "q4_1", + "filename": "chronos-hermes-13b.ggmlv3.q4_1.bin", + "disk_space": 81400000000.0 + }, + { + "name": "q4_K_M", + "filename": "chronos-hermes-13b.ggmlv3.q4_K_M.bin", + "disk_space": 78700000000.0 + }, + { + "name": "q5_1", + "filename": "chronos-hermes-13b.ggmlv3.q5_1.bin", + "disk_space": 97600000000.0 + }, + { + "name": "q5_K_M", + "filename": "chronos-hermes-13b.ggmlv3.q5_K_M.bin", + "disk_space": 92300000000.0 + }, + { + "name": "q6_K", + "filename": "chronos-hermes-13b.ggmlv3.q6_K.bin", + "disk_space": 10700000000.0 + }, + { + "name": "q8_0", + "filename": "chronos-hermes-13b.ggmlv3.q8_0.bin", + "disk_space": 13800000000.0 + } + ] } ] }, @@ -330,6 +376,98 @@ "disk_space": 58700000000.0 } ] + }, + { + "name": "Vicuna-v1.3-7B", + "repo": "TheBloke/vicuna-7B-v1.3-GGML", + "files": [ + { + "name": "q2_K", + "filename": "vicuna-7b-v1.3.ggmlv3.q2_K.bin", + "disk_space": 28700000000.0 + }, + { + "name": "q3_K_L", + "filename": "vicuna-7b-v1.3.ggmlv3.q3_K_L.bin", + "disk_space": 36000000000.0 + }, + { + "name": "q4_1", + "filename": "vicuna-7b-v1.3.ggmlv3.q4_1.bin", + "disk_space": 42100000000.0 + }, + { + "name": "q4_K_M", + "filename": "vicuna-7b-v1.3.ggmlv3.q4_K_M.bin", + "disk_space": 40800000000.0 + }, + { + "name": "q5_1", + "filename": "vicuna-7b-v1.3.ggmlv3.q5_1.bin", + "disk_space": 50600000000.0 + }, + { + "name": "q5_K_M", + "filename": "vicuna-7b-v1.3.ggmlv3.q5_K_M.bin", + "disk_space": 47800000000.0 + }, + { + "name": "q6_K", + "filename": "vicuna-7b-v1.3.ggmlv3.q6_K.bin", + "disk_space": 55300000000.0 + }, + { + "name": "q8_0", + "filename": "vicuna-7b-v1.3.ggmlv3.q8_0.bin", + "disk_space": 71600000000.0 + } + ] + }, + { + "name": "Vicuna-v1.3-13B", + "repo": "TheBloke/vicuna-13b-v1.3-GGML", + "files": [ + { + "name": "q2_K", + "filename": "vicuna-13b-v1.3.ggmlv3.q2_K.bin", + "disk_space": 55100000000.0 + }, + { + "name": "q3_K_L", + "filename": "vicuna-13b-v1.3.ggmlv3.q3_K_L.bin", + "disk_space": 69300000000.0 + }, + { + "name": "q4_1", + "filename": "vicuna-13b-v1.3.ggmlv3.q4_1.bin", + "disk_space": 81400000000.0 + }, + { + "name": "q4_K_M", + "filename": "vicuna-13b-v1.3.ggmlv3.q4_K_M.bin", + "disk_space": 78700000000.0 + }, + { + "name": "q5_1", + "filename": "vicuna-13b-v1.3.ggmlv3.q5_1.bin", + "disk_space": 97600000000.0 + }, + { + "name": "q5_K_M", + "filename": "vicuna-13b-v1.3.ggmlv3.q5_K_M.bin", + "disk_space": 92300000000.0 + }, + { + "name": "q6_K", + "filename": "vicuna-13b-v1.3.ggmlv3.q6_K.bin", + "disk_space": 10700000000.0 + }, + { + "name": "q8_0", + "filename": "vicuna-13b-v1.3.ggmlv3.q8_0.bin", + "disk_space": 13800000000.0 + } + ] } ] }, @@ -484,6 +622,185 @@ } ] }, + { + "name": "Robin", + "models": [ + { + "name": "Robin-7B", + "repo": "TheBloke/robin-7B-v2-GGML", + "files": [ + { + "name": "q2_K", + "filename": "robin-7b.ggmlv3.q2_K.bin", + "disk_space": 28700000000.0 + }, + { + "name": "q3_K_L", + "filename": "robin-7b.ggmlv3.q3_K_L.bin", + "disk_space": 36000000000.0 + }, + { + "name": "q4_1", + "filename": "robin-7b.ggmlv3.q4_1.bin", + "disk_space": 42100000000.0 + }, + { + "name": "q4_K_M", + "filename": "robin-7b.ggmlv3.q4_K_M.bin", + "disk_space": 40800000000.0 + }, + { + "name": "q5_1", + "filename": "robin-7b.ggmlv3.q5_1.bin", + "disk_space": 50600000000.0 + }, + { + "name": "q5_K_M", + "filename": "robin-7b.ggmlv3.q5_K_M.bin", + "disk_space": 47800000000.0 + }, + { + "name": "q6_K", + "filename": "robin-7b.ggmlv3.q6_K.bin", + "disk_space": 55300000000.0 + }, + { + "name": "q8_0", + "filename": "robin-7b.ggmlv3.q8_0.bin", + "disk_space": 71600000000.0 + } + ] + }, + { + "name": "Robin-13B", + "repo": "TheBloke/robin-13B-v2-GGML", + "files": [ + { + "name": "q2_K", + "filename": "robin-13b.ggmlv3.q2_K.bin", + "disk_space": 55100000000.0 + }, + { + "name": "q3_K_L", + "filename": "robin-13b.ggmlv3.q3_K_L.bin", + "disk_space": 69300000000.0 + }, + { + "name": "q4_1", + "filename": "robin-13b.ggmlv3.q4_1.bin", + "disk_space": 81400000000.0 + }, + { + "name": "q4_K_M", + "filename": "robin-13b.ggmlv3.q4_K_M.bin", + "disk_space": 78700000000.0 + }, + { + "name": "q5_1", + "filename": "robin-13b.ggmlv3.q5_1.bin", + "disk_space": 97600000000.0 + }, + { + "name": "q5_K_M", + "filename": "robin-13b.ggmlv3.q5_K_M.bin", + "disk_space": 92300000000.0 + }, + { + "name": "q6_K", + "filename": "robin-13b.ggmlv3.q6_K.bin", + "disk_space": 10700000000.0 + }, + { + "name": "q8_0", + "filename": "robin-13b.ggmlv3.q8_0.bin", + "disk_space": 13800000000.0 + } + ] + }, + { + "name": "Robin-33B", + "repo": "TheBloke/robin-33B-v2-GGML", + "files": [ + { + "name": "q2_K", + "filename": "robin-33b.ggmlv3.q2_K.bin", + "disk_space": 13700000000.0 + }, + { + "name": "q3_K_L", + "filename": "robin-33b.ggmlv3.q3_K_L.bin", + "disk_space": 17300000000.0 + }, + { + "name": "q4_1", + "filename": "robin-33b.ggmlv3.q4_1.bin", + "disk_space": 20300000000.0 + }, + { + "name": "q4_K_M", + "filename": "robin-33b.ggmlv3.q4_K_M.bin", + "disk_space": 19600000000.0 + }, + { + "name": "q5_1", + "filename": "robin-33b.ggmlv3.q5_1.bin", + "disk_space": 24400000000.0 + }, + { + "name": "q5_K_M", + "filename": "robin-33b.ggmlv3.q5_K_M.bin", + "disk_space": 23000000000.0 + }, + { + "name": "q6_K", + "filename": "robin-33b.ggmlv3.q6_K.bin", + "disk_space": 26700000000.0 + }, + { + "name": "q8_0", + "filename": "robin-33b.ggmlv3.q8_0.bin", + "disk_space": 34600000000.0 + } + ] + }, + { + "name": "Robin-65B", + "repo": "TheBloke/robin-65B-v2-GGML", + "files": [ + { + "name": "q2_K", + "filename": "robin-65b.ggmlv3.q2_K.bin", + "disk_space": 27500000000.0 + }, + { + "name": "q3_K_L", + "filename": "robin-65b.ggmlv3.q3_K_L.bin", + "disk_space": 34600000000.0 + }, + { + "name": "q4_1", + "filename": "robin-65b.ggmlv3.q4_1.bin", + "disk_space": 40800000000.0 + }, + { + "name": "q4_K_M", + "filename": "robin-65b.ggmlv3.q4_K_M.bin", + "disk_space": 39300000000.0 + }, + { + "name": "q5_1", + "filename": "robin-65b.ggmlv3.q5_1.bin", + "disk_space": 49000000000.0 + }, + { + "name": "q5_K_M", + "filename": "robin-65b.ggmlv3.q5_K_M.bin", + "disk_space": 46200000000.0 + } + ] + } + ] + }, { "name": "misc", "models": [ @@ -541,6 +858,83 @@ "disk_space": 26700000000.0 } ] + }, + { + "name": "BigTrans-13B", + "repo": "TheBloke/BigTrans-13B-GGML", + "files": [ + { + "name": "q2_K", + "filename": "bigtrans-13b.ggmlv3.q2_K.bin", + "disk_space": 56400000000.0 + }, + { + "name": "q3_K_L", + "filename": "bigtrans-13b.ggmlv3.q3_K_L.bin", + "disk_space": 70700000000.0 + }, + { + "name": "q4_1", + "filename": "bigtrans-13b.ggmlv3.q4_1.bin", + "disk_space": 82800000000.0 + }, + { + "name": "q4_K_M", + "filename": "bigtrans-13b.ggmlv3.q4_K_M.bin", + "disk_space": 80200000000.0 + }, + { + "name": "q5_1", + "filename": "bigtrans-13b.ggmlv3.q5_1.bin", + "disk_space": 99300000000.0 + }, + { + "name": "q5_K_M", + "filename": "bigtrans-13b.ggmlv3.q5_K_M.bin", + "disk_space": 94000000000.0 + }, + { + "name": "q6_K", + "filename": "bigtrans-13b.ggmlv3.q6_K.bin", + "disk_space": 10900000000.0 + }, + { + "name": "q8_0", + "filename": "bigtrans-13b.ggmlv3.q8_0.bin", + "disk_space": 14100000000.0 + } + ] + }, + { + "name": "Minotaur-15B", + "repo": "TheBloke/minotaur-15B-GGML", + "files": [ + { + "name": "q4_0", + "filename": "minotaur-15b.ggmlv3.q4_0.bin", + "disk_space": 10700000000.0 + }, + { + "name": "q4_1", + "filename": "minotaur-15b.ggmlv3.q4_1.bin", + "disk_space": 11900000000.0 + }, + { + "name": "q5_0", + "filename": "minotaur-15b.ggmlv3.q5_0.bin", + "disk_space": 13100000000.0 + }, + { + "name": "q5_1", + "filename": "minotaur-15b.ggmlv3.q5_1.bin", + "disk_space": 14300000000.0 + }, + { + "name": "q8_0", + "filename": "minotaur-15b.ggmlv3.q8_0.bin", + "disk_space": 20100000000.0 + } + ] } ] } diff --git a/api/src/serge/schema/schema.json b/api/src/serge/schema/schema.json index a8e63c6..016cc13 100644 --- a/api/src/serge/schema/schema.json +++ b/api/src/serge/schema/schema.json @@ -73,9 +73,16 @@ "Name": { "type": "string", "enum": [ + "q2_K", + "q3_K_L", + "q4_0", + "q4_1", + "q4_K_M", + "q5_0", "q5_1", + "q5_K_M", "q6_K", - "q5_K_M" + "q8_0" ], "title": "Name" }