Compare commits

...

25 Commits

Author SHA1 Message Date
Juan Calderon-Perez
c0322a829f
Merge branch 'main' into gpu-support 2024-03-29 22:48:45 -04:00
Juan Calderon-Perez
38d2245a8e
Merge branch 'main' into gpu-support 2024-02-25 16:03:32 -05:00
Juan Calderon-Perez
9dc8f42793
Merge branch 'main' into gpu-support 2024-02-24 17:01:01 -05:00
Olivier DEBAUCHE
235d65ca12
Update llama-cpp-python (#1138)
* Update serge.env

* Update deploy.sh

Update path

* Update dev.sh

update path

* Update serge.env

* Update serge.env

Bump version of Llama cpp python to v0.2.44
2024-02-18 10:00:49 -05:00
Juan Calderon-Perez
ca84dcc14b
Merge branch 'main' into gpu-support 2024-02-14 00:07:00 -05:00
Juan Calderon-Perez
da4bdefa6b
Merge branch 'main' into gpu-support 2024-02-13 21:54:48 -05:00
Juan Calderon-Perez
b65e7abd10
Merge branch 'main' into gpu-support 2024-02-12 22:55:49 -05:00
Olivier DEBAUCHE
8e35f238c3
Add GPU support (#1056)
* Update dev.sh

* Update deploy.sh

* Update serge.env

---------

Co-authored-by: Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
2024-02-12 22:54:26 -05:00
Juan Calderon-Perez
fe52b4cc80
Merge branch 'main' into gpu-support 2024-02-04 20:01:54 -05:00
Juan Calderon-Perez
8af20835fb
Merge branch 'main' into gpu-support 2024-01-09 22:44:43 -05:00
Juan Calderon-Perez
e00be78178
Merge branch 'main' into gpu-support 2024-01-08 08:13:50 -05:00
Juan Calderon-Perez
85c9892304
Merge branch 'main' into gpu-support 2024-01-04 00:16:09 -05:00
Juan Calderon-Perez
0f3358a7b6
Merge branch 'main' into gpu-support 2023-12-27 00:30:09 -05:00
Juan Calderon-Perez
9701d12fa2
Merge branch 'main' into gpu-support 2023-12-22 09:58:14 -05:00
Juan Calderon-Perez
7523f785dd
Merge branch 'main' into gpu-support 2023-12-21 07:54:39 -05:00
Juan Calderon-Perez
0b125ae892
Merge branch 'main' into gpu-support 2023-12-20 09:01:28 -05:00
Juan Calderon-Perez
c9bffc0ef4
Merge branch 'main' into gpu-support 2023-12-19 08:27:30 -05:00
Juan Calderon-Perez
6f102a6e8f
Merge branch 'main' into gpu-support 2023-12-17 23:47:56 -05:00
Juan Calderon-Perez
a766ce65af
Merge branch 'main' into gpu-support 2023-12-17 22:21:07 -05:00
Juan Calderon-Perez
e05495639d
Merge branch 'main' into gpu-support 2023-12-13 09:33:35 -05:00
Juan Calderon-Perez
cc2022beb3
Merge branch 'main' into gpu-support 2023-12-09 09:49:24 -05:00
Juan Calderon-Perez
c956afcab1
Merge branch 'main' into gpu-support 2023-12-06 20:38:46 -05:00
Juan Calderon-Perez
e6f625acff
Merge branch 'main' into gpu-support 2023-12-05 20:29:46 -05:00
Juan Calderon-Perez
2418ef07f8
Add suffix to CI 2023-12-04 08:18:27 -05:00
Juan Calderon-Perez
2d3d82ac95
Implement support for CUDA 2023-12-04 08:11:17 -05:00
11 changed files with 146 additions and 8 deletions

View File

@ -9,7 +9,7 @@ tests
_releaser
_site
CONTRIBUTING.md
Dockerfile
docker/
docker-compose.yml
docker-compose.dev.yml
.vscode/

1
.gitattributes vendored
View File

@ -1 +0,0 @@
*.sh eol=lf

72
.github/workflows/docker-gpu.yml vendored Normal file
View File

@ -0,0 +1,72 @@
name: Docker (CUDA Suport)
on:
push:
branches:
- "main"
paths-ignore:
- "**.md"
- LICENSE
- "docker-compose.yml"
- "docker-compose.dev.yml"
- ".github/ISSUE_TEMPLATE/*.yml"
- ".github/dependabot.yml"
- ".github/release-drafter.yml"
pull_request:
branches:
- "*"
paths:
- "docker/Dockerfile.gpu"
- "scripts/deploy.sh"
- "scripts/dev.sh"
workflow_dispatch:
release:
types: [published, edited]
jobs:
build-and-publish-image:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: |
ghcr.io/serge-chat/serge
flavor: |
suffix=-cuda,onlatest=true
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}
type=semver,pattern={{major}}.{{minor}}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and Publish Docker Image
uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile.gpu
push: ${{ github.event_name != 'pull_request' }}
target: release
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -16,8 +16,7 @@ on:
branches:
- "*"
paths:
- "Dockerfile"
- "Dockerfile.dev"
- "docker/Dockerfile"
- "scripts/deploy.sh"
- "scripts/dev.sh"
workflow_dispatch:
@ -61,6 +60,7 @@ jobs:
uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
target: release
cache-from: type=gha

View File

@ -3,7 +3,7 @@ services:
restart: on-failure
build:
context: .
dockerfile: Dockerfile.dev
dockerfile: docker/Dockerfile.dev
volumes:
- ./web:/usr/src/app/web/:z
- ./api:/usr/src/app/api/:z

52
docker/Dockerfile.gpu Normal file
View File

@ -0,0 +1,52 @@
# ---------------------------------------
# Base image for redis
FROM redis:7-bookworm as redis
# ---------------------------------------
# Build frontend
FROM node:20-bookworm-slim as frontend
WORKDIR /usr/src/app
COPY ./web/package.json ./web/package-lock.json ./
RUN npm ci
COPY ./web /usr/src/app/web/
WORKDIR /usr/src/app/web/
RUN npm run build
# ---------------------------------------
# Runtime environment
FROM python:3.11-slim-bookworm as release
# Set ENV
ENV NODE_ENV='production'
ENV TZ=Etc/UTC
WORKDIR /usr/src/app
# Copy artifacts
COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
COPY ./api /usr/src/app/api
COPY scripts/deploy.sh /usr/src/app/deploy.sh
COPY scripts/serge.env /usr/src/app/serge.env
COPY vendor/requirements.txt /usr/src/app/requirements.txt
# Install api dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends dumb-init \
&& pip install --no-cache-dir ./api \
&& pip install -r /usr/src/app/requirements.txt \
&& apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \
&& chmod 755 /usr/src/app/deploy.sh \
&& chmod 755 /usr/local/bin/redis-server \
&& chmod 755 /usr/local/bin/redis-cli \
&& mkdir -p /etc/redis \
&& mkdir -p /data/db \
&& mkdir -p /usr/src/app/weights \
&& echo "appendonly yes" >> /etc/redis/redis.conf \
&& echo "dir /data/db/" >> /etc/redis/redis.conf
EXPOSE 8008
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["/bin/bash", "-c", "/usr/src/app/deploy.sh"]

View File

@ -19,14 +19,20 @@ detect_cpu_features() {
echo "basic"
fi
}
# Check if the CPU architecture is aarch64/arm64
if [ "$cpu_arch" = "aarch64" ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://gaby.github.io/arm64-wheels/"
else
# Use @smartappli provided wheels
cpu_feature=$(detect_cpu_features)
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1"
else
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
fi
fi
echo "Recommended install command for llama-cpp-python: $pip_command"

View File

@ -26,7 +26,14 @@ if [ "$cpu_arch" = "aarch64" ]; then
else
# Use @smartappli provided wheels
cpu_feature=$(detect_cpu_features)
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1"
else
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
fi
fi
echo "Recommended install command for llama-cpp-python: $pip_command"

View File

@ -1,3 +1,5 @@
SERGE_GPU_NVIDIA_SUPPORT=false
SERGE_GPU_AMD_SUPPORT=false
LLAMA_PYTHON_VERSION=0.2.50
SERGE_ENABLE_IPV4=true
SERGE_ENABLE_IPV6=false