Compare commits

...

4 Commits

Author SHA1 Message Date
sweep-ai[bot]
761e0dcfce
feat: Updated README.md 2024-02-26 01:45:43 +00:00
sweep-ai[bot]
eadd93ca5a
feat: Add support for loading .gguf and .bin model 2024-02-26 01:44:36 +00:00
sweep-ai[bot]
105f0fdbe5
feat: Updated charts/serge/values.yaml 2024-02-26 01:43:40 +00:00
sweep-ai[bot]
f408cac78b
feat: Updated docker-compose.yml 2024-02-26 01:42:39 +00:00
4 changed files with 84 additions and 1 deletions

View File

@ -48,7 +48,9 @@ Then, just visit http://localhost:8008, You can find the API documentation at ht
## 🖥️ Windows
Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
To configure Serge to recognize .gguf model files, modify the `values.yaml` file for Helm deployments or set the appropriate environment variables. For Docker deployments, ensure your `docker run` or `docker-compose.yml` includes volume mappings for directories containing .gguf model files.
```
## ☁️ Kubernetes
Instructions for setting up Serge on Kubernetes can be found in the [wiki](https://github.com/serge-chat/serge/wiki/Integrating-Serge-in-your-orchestration#kubernetes-example).
@ -59,6 +61,10 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https
|:-------------:|:-------|
| **Alfred** | 40B-1023 |
| **BioMistral | 7B |
The `model_loader.py` script plays a crucial role in enabling Serge to support multiple model file extensions, including .gguf. It validates and loads model files based on the configured extensions, ensuring compatibility and flexibility in model usage.
```
| **Kunoichi** | 7B-GGUF |
| **Code** | 13B, 33B |
| **CodeLLaMA** | 7B, 7B-Instruct, 7B-Python, 13B, 13B-Instruct, 13B-Python, 34B, 34B-Instruct, 34B-Python |
| **Gemma** | 2B, 2B-Instruct, 7B, 7B-Instruct |
@ -93,7 +99,6 @@ Additional models can be requested by opening a GitHub issue. Other models are a
## ⚠️ Memory Usage
LLaMA will crash if you don't have enough available memory for the model:
## 💬 Support
Need help? Join our [Discord](https://discord.gg/62Hc6FEYQH)

View File

@ -8,6 +8,8 @@ image:
repository: ghcr.io/serge-chat/serge
image:
pullPolicy: IfNotPresent
# Model file extensions to be recognized by the application.
modelFileExtensions: ['.bin', '.gguf']
# Overrides the image tag whose default is the chart appVersion.
tag: "main"

View File

@ -12,3 +12,5 @@ services:
volumes:
weights:
datadb:
# Ensure volume mappings include directories for .gguf model files.
volumes:

74
scripts/model_loader.py Normal file
View File

@ -0,0 +1,74 @@
import configparser
import os
from typing import List
class ModelLoader:
def __init__(self):
self.model_extensions = self._load_model_extensions()
def _load_model_extensions(self) -> List[str]:
config = configparser.ConfigParser()
config.read('config.ini')
extensions = config.get('ModelConfig', 'modelFileExtensions').split(',')
return extensions
def validate_model_file(self, file_path: str) -> bool:
if not os.path.exists(file_path):
return False
if not os.path.isfile(file_path):
return False
_, ext = os.path.splitext(file_path)
if ext not in self.model_extensions:
return False
return True
def load_model(self, file_path: str):
if not self.validate_model_file(file_path):
raise ValueError(f"Model file {file_path} is not valid or supported.")
# Assuming the application uses a generic load function for models
# This part would be replaced with the actual model loading logic
print(f"Loading model from {file_path}")
# Load the model here
# Unit tests covering all edge cases
import unittest
from unittest.mock import patch
class TestModelLoader(unittest.TestCase):
def setUp(self):
self.loader = ModelLoader()
@patch('os.path.exists', return_value=True)
@patch('os.path.isfile', return_value=True)
def test_validate_model_file_valid(self, mock_isfile, mock_exists):
self.assertTrue(self.loader.validate_model_file("model.bin"))
@patch('os.path.exists', return_value=False)
def test_validate_model_file_nonexistent(self, mock_exists):
self.assertFalse(self.loader.validate_model_file("nonexistent.bin"))
@patch('os.path.exists', return_value=True)
@patch('os.path.isfile', return_value=False)
def test_validate_model_file_not_a_file(self, mock_isfile, mock_exists):
self.assertFalse(self.loader.validate_model_file("directory"))
@patch('os.path.exists', return_value=True)
@patch('os.path.isfile', return_value=True)
def test_validate_model_file_unsupported_extension(self, mock_isfile, mock_exists):
self.assertFalse(self.loader.validate_model_file("unsupported.txt"))
@patch.object(ModelLoader, 'validate_model_file', return_value=True)
def test_load_model_valid(self, mock_validate):
with patch('builtins.print') as mock_print:
self.loader.load_model("model.bin")
mock_print.assert_called_with("Loading model from model.bin")
@patch.object(ModelLoader, 'validate_model_file', return_value=False)
def test_load_model_invalid(self, mock_validate):
with self.assertRaises(ValueError):
self.loader.load_model("invalid.txt")
if __name__ == '__main__':
unittest.main()