diff --git a/README.md b/README.md index a333f69..91e2512 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https | **Alfred** | 40B | | **CodeLLaMA** | 7B, 13B | | **Falcon** | 7B, 7B-Instruct, 40B, 40B-Instruct | -| **LLaMA 2** | 7B, 13B, 70B | +| **LLaMA 2** | 7B, 7B-Chat, 13B, 13B-Chat, 70B, 70B-Chat, 70B-OASST | | **Med42** | 70B | | **Meditron** | 7B, 70B | | **Mistral** | 7B, 7B-Instruct, 7B-OpenOrca | diff --git a/api/src/serge/data/models.json b/api/src/serge/data/models.json index f435931..3d6c4ba 100644 --- a/api/src/serge/data/models.json +++ b/api/src/serge/data/models.json @@ -1,20 +1,4 @@ [ - { - "name": "Notus", - "models": [ - { - "name": "Notus-40B", - "repo": "TheBloke/notus-7B-v1-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "notus-7b-v1.Q4_K_M.gguf", - "disk_space": 4370000000.0 - } - ] - } - ] - }, { "name": "Alfred", "models": [ @@ -25,39 +9,181 @@ { "name": "q4_K_M", "filename": "alfred-40b-1023.Q4_K_M.gguf", - "disk_space": 25500000000.0 + "disk_space": 25452369600.0 } ] } ] - }, - { - "name": "PsyMedRP", + }, + { + "name": "CodeLLaMA", "models": [ { - "name": "PsyMedRP-v1-13B", - "repo": "TheBloke/PsyMedRP-v1-13B-GGUF", + "name": "CodeLlama-7B", + "repo": "TheBloke/CodeLlama-7B-GGUF", "files": [ { "name": "q4_K_M", - "filename": "psymedrp-v1-13b.Q4_K_M.gguf", - "disk_space": 7870000000.0 + "filename": "codellama-7b.Q4_K_M.gguf", + "disk_space": 4081095360.0 } ] }, { - "name": "PsyMedRP-v1-20B", - "repo": "TheBloke/PsyMedRP-v1-20B-GGUF", + "name": "CodeLlama-7B-Instruct", + "repo": "TheBloke/CodeLlama-7B-Instruct-GGUF", "files": [ { "name": "q4_K_M", - "filename": "psymedrp-v1-20b.Q4_K_M.gguf", - "disk_space": 12000000000.0 + "filename": "codellama-7b-instruct.Q4_K_M.gguf", + "disk_space": 4081095360.0 } ] - } + }, + { + "name": "CodeLlama-13B-Instruct", + "repo": "TheBloke/CodeLlama-13B-Instruct-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "codellama-13b-instruct.Q4_K_M.gguf", + "disk_space": 7866070016.0 + } + ] + } ] - }, + }, + { + "name": "Falcon", + "models": [ + { + "name": "Falcon-7B", + "repo": "maddes8cht/tiiuae-falcon-7b-gguf", + "files": [ + { + "name": "q4_K_M", + "filename": "tiiuae-falcon-7b-Q4_K_M.gguf", + "disk_space": 4975385792.0 + } + ] + }, + { + "name": "Falcon-7B-Instruct", + "repo": "maddes8cht/tiiuae-falcon-7b-instruct-gguf", + "files": [ + { + "name": "q4_K_M", + "filename": "tiiuae-falcon-7b-instruct-Q4_K_M.gguf", + "disk_space": 4975385792.0 + } + ] + }, + { + "name": "Falcon-40B", + "repo": "maddes8cht/tiiuae-falcon-40b-gguf", + "files": [ + { + "name": "q4_K_M", + "filename": "tiiuae-falcon-40b-Q4_K_M.gguf", + "disk_space": 25452629728.0 + } + ] + }, + { + "name": "Falcon-40B-Instruct", + "repo": "maddes8cht/tiiuae-falcon-40b-instruct-gguf", + "files": [ + { + "name": "q4_K_M", + "filename": "tiiuae-falcon-40b-instruct-Q4_K_M.gguf", + "disk_space": 25452629728.0 + } + ] + } + ] + }, + { + "name": "LLaMA_2", + "models": [ + { + "name": "LLaMA2-7B", + "repo": "TheBloke/Llama-2-7B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-7b.Q4_K_M.gguf", + "disk_space": 4081004224.0 + } + ] + }, + { + "name": "LLaMA2-7B-Chat", + "repo": "TheBloke/Llama-2-7B-Chat-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-7b-chat.Q4_K_M.gguf", + "disk_space": 4081004224.0 + } + ] + }, + { + "name": "LLaMA2-13B", + "repo": "TheBloke/Llama-2-13B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-13b.Q4_K_M.gguf", + "disk_space": 7865956224.0 + } + ] + }, + { + "name": "LLaMA2-13B-Chat", + "repo": "TheBloke/Llama-2-13B-chat-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-13b-chat.Q4_K_M.gguf", + "disk_space": 7865956224.0 + } + ] + }, + { + "name": "LLaMA2-70B", + "repo": "TheBloke/Llama-2-70B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-70b.Q4_K_M.gguf", + "disk_space": 41422910368.0 + } + ] + }, + { + "name": "LLaMA2-70B-Chat", + "repo": "TheBloke/Llama-2-70B-Chat-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-70b-chat.Q4_K_M.gguf", + "disk_space": 41422910368.0 + } + ] + }, + { + "name": "Llama2-70B-SFT-OASST", + "repo": "TheBloke/Llama-2-70B-OASST-1-200-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "llama-2-70b-oasst-1-200.Q4_K_M.gguf", + "disk_space": 41423092064.0 + } + ] + } + ] + }, { "name": "Med42", "models": [ @@ -68,28 +194,77 @@ { "name": "q4_K_M", "filename": "med42-70b.Q4_K_M.gguf", - "disk_space": 41400000000.0 + "disk_space": 41422910368.0 } ] } ] - }, - { - "name": "StarlingLM", + }, + { + "name": "Meditron", "models": [ { - "name": "Starling-LM-7B-Alpha", - "repo": "TheBloke/Starling-LM-7B-alpha-GGUF", + "name": "Meditron-7B", + "repo": "TheBloke/meditron-7B-GGUF", "files": [ { "name": "q4_K_M", - "filename": "starling-lm-7b-alpha.Q4_K_M.gguf", - "disk_space": 4370000000.0 + "filename": "meditron-7b.Q4_K_M.gguf", + "disk_space": 4081004352.0 + } + ] + }, + { + "name": "Meditron-70B", + "repo": "TheBloke/meditron-70B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "meditron-70b.Q4_K_M.gguf", + "disk_space": 41422910464.0 } ] } ] - }, + }, + { + "name": "Mistral", + "models": [ + { + "name": "Mistral-7B-Instruct", + "repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "mistral-7b-instruct-v0.1.Q4_K_M.gguf", + "disk_space": 4368438944.0 + } + ] + }, + { + "name": "Mistral-7B", + "repo": "TheBloke/Mistral-7B-v0.1-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "mistral-7b-v0.1.Q4_K_M.gguf", + "disk_space": 4368438912.0 + } + ] + }, + { + "name": "Mistral-7B-OpenOrca", + "repo": "TheBloke/Mistral-7B-OpenOrca-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "mistral-7b-openorca.Q4_K_M.gguf", + "disk_space": 4368450304.0 + } + ] + } + ] + }, { "name": "NeuralChat", "models": [ @@ -100,12 +275,28 @@ { "name": "q4_K_M", "filename": "neural-chat-7b-v3-2.Q4_K_M.gguf", - "disk_space": 4370000000.0 + "disk_space": 4368438976.0 } ] } ] }, + { + "name": "Notus", + "models": [ + { + "name": "Notus-7B-v1", + "repo": "TheBloke/notus-7B-v1-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "notus-7b-v1.Q4_K_M.gguf", + "disk_space": 4368439424.0 + } + ] + } + ] + }, { "name": "OpenChat", "models": [ @@ -116,7 +307,7 @@ { "name": "q4_K_M", "filename": "openchat_3.5.Q4_K_M.gguf", - "disk_space": 4370000000.0 + "disk_space": 4368450304.0 } ] } @@ -132,7 +323,7 @@ { "name": "q4_K_M", "filename": "openlm-research-open_llama_3b_v2-Q4_K_M.gguf", - "disk_space": 2580000000.0 + "disk_space": 2580116096.0 } ] }, @@ -143,7 +334,7 @@ { "name": "q4_K_M", "filename": "openlm-research-open_llama_7b_v2-Q4_K_M.gguf", - "disk_space": 4080000000.0 + "disk_space": 4081016096.0 } ] }, @@ -154,12 +345,28 @@ { "name": "q4_K_M", "filename": "openlm-research-open_llama_13b-Q4_K_M.gguf", - "disk_space": 7869900000.0 + "disk_space": 7865990752.0 } ] } ] }, + { + "name": "SlimOrca", + "models": [ + { + "name": "SlimOrca-13B", + "repo": "TheBloke/SlimOrca-13B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "slimorca-13b.Q4_K_M.gguf", + "disk_space": 7865956352.0 + } + ] + } + ] + }, { "name": "Orca_2", "models": [ @@ -170,7 +377,7 @@ { "name": "q4_K_M", "filename": "orca-2-7b.Q4_K_M.gguf", - "disk_space": 4080000000.0 + "disk_space": 4081021376.0 } ] }, @@ -181,88 +388,55 @@ { "name": "q4_K_M", "filename": "orca-2-13b.Q4_K_M.gguf", - "disk_space": 7870000000.0 + "disk_space": 7865977664.0 } ] } ] - }, - { - "name": "Meditron", + }, + { + "name": "PsyMedRP", "models": [ { - "name": "Meditron-7B", - "repo": "TheBloke/meditron-7B-GGUF", + "name": "PsyMedRP-v1-13B", + "repo": "TheBloke/PsyMedRP-v1-13B-GGUF", "files": [ { "name": "q4_K_M", - "filename": "meditron-7b.Q4_K_M.gguf", - "disk_space": 4080000000.0 + "filename": "psymedrp-v1-13b.Q4_K_M.gguf", + "disk_space": 7865956288.0 } ] - }, + }, { - "name": "Meditron-70B", - "repo": "TheBloke/meditron-70B-GGUF", + "name": "PsyMedRP-v1-20B", + "repo": "TheBloke/PsyMedRP-v1-20B-GGUF", "files": [ { "name": "q4_K_M", - "filename": "meditron-70b.Q4_K_M.gguf", - "disk_space": 41400000000.0 + "filename": "psymedrp-v1-20b.Q4_K_M.gguf", + "disk_space": 12042208736.0 + } + ] + } + ] + }, + { + "name": "StarlingLM", + "models": [ + { + "name": "Starling-LM-7B-Alpha", + "repo": "TheBloke/Starling-LM-7B-alpha-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "starling-lm-7b-alpha.Q4_K_M.gguf", + "disk_space": 4368450368.0 } ] } ] }, - { - "name": "Falcon", - "models": [ - { - "name": "Falcon-7b", - "repo": "maddes8cht/tiiuae-falcon-7b-gguf", - "files": [ - { - "name": "q4_K_M", - "filename": "tiiuae-falcon-7b-Q4_K_M.gguf", - "disk_space": 4980000000.0 - } - ] - }, - { - "name": "Falcon-7b-instruct", - "repo": "maddes8cht/tiiuae-falcon-7b-instruct-gguf", - "files": [ - { - "name": "q4_K_M", - "filename": "tiiuae-falcon-7b-instruct-Q4_K_M.gguf", - "disk_space": 4980000000.0 - } - ] - }, - { - "name": "Falcon-40b", - "repo": "maddes8cht/tiiuae-falcon-40b-gguf", - "files": [ - { - "name": "q4_K_M", - "filename": "tiiuae-falcon-40b-Q4_K_M.gguf", - "disk_space": 25500000000.0 - } - ] - }, - { - "name": "Falcon-40b-instruct", - "repo": "maddes8cht/tiiuae-falcon-40b-instruct-gguf", - "files": [ - { - "name": "q4_K_M", - "filename": "tiiuae-falcon-40b-instruct-Q4_K_M.gguf", - "disk_space": 25500000000.0 - } - ] - } - ] - }, { "name": "Vicuna", "models": [ @@ -273,7 +447,7 @@ { "name": "q4_K_M", "filename": "vicuna-7b-v1.5.Q4_K_M.gguf", - "disk_space": 4080000000.0 + "disk_space": 4081004224.0 } ] }, @@ -284,45 +458,7 @@ { "name": "q4_K_M", "filename": "vicuna-13b-v1.5.Q4_K_M.gguf", - "disk_space": 7869900000.0 - } - ] - } - ] - }, - { - "name": "CodeLLaMA", - "models": [ - { - "name": "CodeLlama-7B", - "repo": "TheBloke/CodeLlama-7B-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "codellama-7b.Q4_K_M.gguf", - "disk_space": 4080000000.0 - } - ] - }, - { - "name": "CodeLlama-7B-Instruct", - "repo": "TheBloke/CodeLlama-7B-Instruct-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "codellama-7b-instruct.Q4_K_M.gguf", - "disk_space": 4080000000.0 - } - ] - }, - { - "name": "CodeLlama-13B-Instruct", - "repo": "TheBloke/CodeLlama-13B-Instruct-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "codellama-13b-instruct.Q4_K_M.gguf", - "disk_space": 7870000000.0 + "disk_space": 7865956224.0 } ] } @@ -338,7 +474,7 @@ { "name": "q4_K_M", "filename": "zephyr-7b-alpha.Q4_K_M.gguf", - "disk_space": 4370000000.0 + "disk_space": 4368438976.0 } ] }, @@ -349,86 +485,10 @@ { "name": "q4_K_M", "filename": "zephyr-7b-beta.Q4_K_M.gguf", - "disk_space": 4370000000.0 + "disk_space": 4368438976.0 } ] } ] - }, - { - "name": "Mistral", - "models": [ - { - "name": "Mistral-7B-Instruct", - "repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "mistral-7b-instruct-v0.1.Q4_K_M.gguf", - "disk_space": 4370000000.0 - } - ] - }, - { - "name": "Mistral-7B", - "repo": "TheBloke/Mistral-7B-v0.1-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "mistral-7b-v0.1.Q4_K_M.gguf", - "disk_space": 4370000000.0 - } - ] - }, - { - "name": "Mistral-7B-OpenOrca", - "repo": "TheBloke/Mistral-7B-OpenOrca-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "mistral-7b-openorca.Q4_K_M.gguf", - "disk_space": 4370000000.0 - } - ] - } - ] - }, - { - "name": "LLaMA", - "models": [ - { - "name": "LLaMA2-7B-Chat", - "repo": "TheBloke/Llama-2-7B-Chat-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "llama-2-7b-chat.Q4_K_M.gguf", - "disk_space": 4080000000.0 - } - ] - }, - { - "name": "LLaMA2-13B-Chat", - "repo": "TheBloke/Llama-2-13B-chat-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "llama-2-13b-chat.Q4_K_M.gguf", - "disk_space": 7870000000.0 - } - ] - }, - { - "name": "LLaMA2-70B-Chat", - "repo": "TheBloke/Llama-2-70B-Chat-GGUF", - "files": [ - { - "name": "q4_K_M", - "filename": "llama-2-70b-chat.Q4_K_M.gguf", - "disk_space": 41420000000.0 - } - ] - } - ] - } + } ]