Compare commits
25 Commits
main
...
gpu-suppor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0322a829f | ||
|
|
38d2245a8e | ||
|
|
9dc8f42793 | ||
|
|
235d65ca12 | ||
|
|
ca84dcc14b | ||
|
|
da4bdefa6b | ||
|
|
b65e7abd10 | ||
|
|
8e35f238c3 | ||
|
|
fe52b4cc80 | ||
|
|
8af20835fb | ||
|
|
e00be78178 | ||
|
|
85c9892304 | ||
|
|
0f3358a7b6 | ||
|
|
9701d12fa2 | ||
|
|
7523f785dd | ||
|
|
0b125ae892 | ||
|
|
c9bffc0ef4 | ||
|
|
6f102a6e8f | ||
|
|
a766ce65af | ||
|
|
e05495639d | ||
|
|
cc2022beb3 | ||
|
|
c956afcab1 | ||
|
|
e6f625acff | ||
|
|
2418ef07f8 | ||
|
|
2d3d82ac95 |
@ -9,7 +9,7 @@ tests
|
|||||||
_releaser
|
_releaser
|
||||||
_site
|
_site
|
||||||
CONTRIBUTING.md
|
CONTRIBUTING.md
|
||||||
Dockerfile
|
docker/
|
||||||
docker-compose.yml
|
docker-compose.yml
|
||||||
docker-compose.dev.yml
|
docker-compose.dev.yml
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|||||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1 +0,0 @@
|
|||||||
*.sh eol=lf
|
|
||||||
72
.github/workflows/docker-gpu.yml
vendored
Normal file
72
.github/workflows/docker-gpu.yml
vendored
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
name: Docker (CUDA Suport)
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- "main"
|
||||||
|
paths-ignore:
|
||||||
|
- "**.md"
|
||||||
|
- LICENSE
|
||||||
|
- "docker-compose.yml"
|
||||||
|
- "docker-compose.dev.yml"
|
||||||
|
- ".github/ISSUE_TEMPLATE/*.yml"
|
||||||
|
- ".github/dependabot.yml"
|
||||||
|
- ".github/release-drafter.yml"
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- "*"
|
||||||
|
paths:
|
||||||
|
- "docker/Dockerfile.gpu"
|
||||||
|
- "scripts/deploy.sh"
|
||||||
|
- "scripts/dev.sh"
|
||||||
|
workflow_dispatch:
|
||||||
|
release:
|
||||||
|
types: [published, edited]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-publish-image:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Docker metadata
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
ghcr.io/serge-chat/serge
|
||||||
|
flavor: |
|
||||||
|
suffix=-cuda,onlatest=true
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=ref,event=pr
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to GitHub Container Registry
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: ${{ github.repository_owner }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and Publish Docker Image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: docker/Dockerfile.gpu
|
||||||
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
target: release
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
platforms: linux/amd64
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
4
.github/workflows/docker.yml
vendored
4
.github/workflows/docker.yml
vendored
@ -16,8 +16,7 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- "*"
|
- "*"
|
||||||
paths:
|
paths:
|
||||||
- "Dockerfile"
|
- "docker/Dockerfile"
|
||||||
- "Dockerfile.dev"
|
|
||||||
- "scripts/deploy.sh"
|
- "scripts/deploy.sh"
|
||||||
- "scripts/dev.sh"
|
- "scripts/dev.sh"
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
@ -61,6 +60,7 @@ jobs:
|
|||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
|
file: docker/Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
target: release
|
target: release
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
|
|||||||
@ -3,7 +3,7 @@ services:
|
|||||||
restart: on-failure
|
restart: on-failure
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile.dev
|
dockerfile: docker/Dockerfile.dev
|
||||||
volumes:
|
volumes:
|
||||||
- ./web:/usr/src/app/web/:z
|
- ./web:/usr/src/app/web/:z
|
||||||
- ./api:/usr/src/app/api/:z
|
- ./api:/usr/src/app/api/:z
|
||||||
|
|||||||
52
docker/Dockerfile.gpu
Normal file
52
docker/Dockerfile.gpu
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# ---------------------------------------
|
||||||
|
# Base image for redis
|
||||||
|
FROM redis:7-bookworm as redis
|
||||||
|
|
||||||
|
# ---------------------------------------
|
||||||
|
# Build frontend
|
||||||
|
FROM node:20-bookworm-slim as frontend
|
||||||
|
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
COPY ./web/package.json ./web/package-lock.json ./
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
|
COPY ./web /usr/src/app/web/
|
||||||
|
WORKDIR /usr/src/app/web/
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# ---------------------------------------
|
||||||
|
# Runtime environment
|
||||||
|
FROM python:3.11-slim-bookworm as release
|
||||||
|
|
||||||
|
# Set ENV
|
||||||
|
ENV NODE_ENV='production'
|
||||||
|
ENV TZ=Etc/UTC
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Copy artifacts
|
||||||
|
COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
|
||||||
|
COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
|
||||||
|
COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
|
||||||
|
COPY ./api /usr/src/app/api
|
||||||
|
COPY scripts/deploy.sh /usr/src/app/deploy.sh
|
||||||
|
COPY scripts/serge.env /usr/src/app/serge.env
|
||||||
|
COPY vendor/requirements.txt /usr/src/app/requirements.txt
|
||||||
|
|
||||||
|
# Install api dependencies
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends dumb-init \
|
||||||
|
&& pip install --no-cache-dir ./api \
|
||||||
|
&& pip install -r /usr/src/app/requirements.txt \
|
||||||
|
&& apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \
|
||||||
|
&& chmod 755 /usr/src/app/deploy.sh \
|
||||||
|
&& chmod 755 /usr/local/bin/redis-server \
|
||||||
|
&& chmod 755 /usr/local/bin/redis-cli \
|
||||||
|
&& mkdir -p /etc/redis \
|
||||||
|
&& mkdir -p /data/db \
|
||||||
|
&& mkdir -p /usr/src/app/weights \
|
||||||
|
&& echo "appendonly yes" >> /etc/redis/redis.conf \
|
||||||
|
&& echo "dir /data/db/" >> /etc/redis/redis.conf
|
||||||
|
|
||||||
|
EXPOSE 8008
|
||||||
|
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||||
|
CMD ["/bin/bash", "-c", "/usr/src/app/deploy.sh"]
|
||||||
@ -19,14 +19,20 @@ detect_cpu_features() {
|
|||||||
echo "basic"
|
echo "basic"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check if the CPU architecture is aarch64/arm64
|
# Check if the CPU architecture is aarch64/arm64
|
||||||
if [ "$cpu_arch" = "aarch64" ]; then
|
if [ "$cpu_arch" = "aarch64" ]; then
|
||||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://gaby.github.io/arm64-wheels/"
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://gaby.github.io/arm64-wheels/"
|
||||||
else
|
else
|
||||||
# Use @smartappli provided wheels
|
# Use @smartappli provided wheels
|
||||||
cpu_feature=$(detect_cpu_features)
|
cpu_feature=$(detect_cpu_features)
|
||||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
|
|
||||||
|
if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
|
||||||
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
|
||||||
|
elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
|
||||||
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1"
|
||||||
|
else
|
||||||
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Recommended install command for llama-cpp-python: $pip_command"
|
echo "Recommended install command for llama-cpp-python: $pip_command"
|
||||||
|
|||||||
@ -26,7 +26,14 @@ if [ "$cpu_arch" = "aarch64" ]; then
|
|||||||
else
|
else
|
||||||
# Use @smartappli provided wheels
|
# Use @smartappli provided wheels
|
||||||
cpu_feature=$(detect_cpu_features)
|
cpu_feature=$(detect_cpu_features)
|
||||||
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
|
|
||||||
|
if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
|
||||||
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
|
||||||
|
elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
|
||||||
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1"
|
||||||
|
else
|
||||||
|
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Recommended install command for llama-cpp-python: $pip_command"
|
echo "Recommended install command for llama-cpp-python: $pip_command"
|
||||||
|
|||||||
@ -1,3 +1,5 @@
|
|||||||
|
SERGE_GPU_NVIDIA_SUPPORT=false
|
||||||
|
SERGE_GPU_AMD_SUPPORT=false
|
||||||
LLAMA_PYTHON_VERSION=0.2.50
|
LLAMA_PYTHON_VERSION=0.2.50
|
||||||
SERGE_ENABLE_IPV4=true
|
SERGE_ENABLE_IPV4=true
|
||||||
SERGE_ENABLE_IPV6=false
|
SERGE_ENABLE_IPV6=false
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user