mirror of
https://gitlab.alpinelinux.org/alpine/aports.git
synced 2026-05-17 10:36:22 +02:00
221 lines
6.8 KiB
Plaintext
221 lines
6.8 KiB
Plaintext
# Contributor: Marian Buschsieweke <marian.buschsieweke@posteo.net>
|
|
maintainer="Hugo Osvaldo Barrera <hugo@whynothugo.nl>"
|
|
pkgname=llama.cpp
|
|
pkgver=0.0.9006
|
|
_buildver=${pkgver#0.0.}
|
|
pkgrel=0
|
|
pkgdesc="LLM inference in C/C++ (with Vulkan GPU acceleration)"
|
|
# armhf armv7, riscv64, x86: build fails
|
|
# ppc64le: internal compiler error
|
|
arch="all !armv7 !armhf !ppc64le !riscv64 !x86"
|
|
url="https://github.com/ggml-org/llama.cpp"
|
|
license="MIT"
|
|
pkgusers="llama-server"
|
|
pkggroups="llama-server"
|
|
makedepends="
|
|
cmake
|
|
glslang-dev
|
|
jq
|
|
linux-headers
|
|
openblas-dev
|
|
openssl-dev
|
|
samurai
|
|
shaderc
|
|
spirv-headers
|
|
vulkan-loader-dev
|
|
"
|
|
checkdepends="bash py3-jinja2"
|
|
install="$pkgname.pre-install"
|
|
# convert_hf_to_gguf: disabled; too many missing depends.
|
|
subpackages="
|
|
$pkgname-vulkan
|
|
$pkgname-dev
|
|
$pkgname-libs
|
|
$pkgname-extras
|
|
llama-server:server
|
|
llama-server-openrc:server_openrc
|
|
"
|
|
source="$pkgname-b$_buildver.tar.gz::https://github.com/ggml-org/llama.cpp/archive/refs/tags/b$_buildver.tar.gz
|
|
$pkgname-b$_buildver.json::https://api.github.com/repos/ggml-org/llama.cpp/git/ref/tags/b$_buildver
|
|
llama-server.initd
|
|
llama-server.confd
|
|
0001-Fix-link-failures-in-s390x.patch
|
|
0002-Install-libraries-into-LLAMA_LIB_INSTALL_DIR.patch
|
|
0003-cmake-add-flag-to-use-system-httplib.patch
|
|
"
|
|
builddir="$srcdir/$pkgname-b$_buildver"
|
|
ldpath="/usr/lib/$pkgname"
|
|
sonameprefix="$pkgname:"
|
|
|
|
case "$CARCH" in
|
|
loongarch64)
|
|
_ggml_cpu_all_variants=OFF
|
|
;;
|
|
*)
|
|
_ggml_cpu_all_variants=ON
|
|
subpackages="$pkgname-cpu $subpackages"
|
|
;;
|
|
esac
|
|
|
|
case "$CARCH" in
|
|
riscv64) _lto=OFF ;;
|
|
*) _lto=ON ;;
|
|
esac
|
|
|
|
# The bundled ggml and the standalone ggml have diverging APIs.
|
|
# llama.cpp won't build with the standalone one.
|
|
# Upstream recommends using the vendored one.
|
|
|
|
build() {
|
|
_commit=$(jq -r '.object.sha' <$srcdir/$pkgname-b$_buildver.json)
|
|
|
|
cmake -B build -G Ninja -Wno-dev \
|
|
-DBUILD_SHARED_LIBS=ON \
|
|
-DCMAKE_BUILD_TYPE=Release \
|
|
-DCMAKE_INSTALL_PREFIX=/usr \
|
|
-DGGML_ALL_WARNINGS=OFF \
|
|
-DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
|
|
-DGGML_BACKEND_DIR=/usr/lib/llama.cpp \
|
|
-DGGML_BACKEND_DL=ON \
|
|
-DGGML_BLAS=ON \
|
|
-DGGML_BLAS_VENDOR=OpenBLAS \
|
|
-DGGML_BUILD_EXAMPLES=OFF \
|
|
-DGGML_CPU_ALL_VARIANTS=$_ggml_cpu_all_variants \
|
|
-DGGML_CUDA_FA_ALL_QUANTS=ON \
|
|
-DGGML_LIB_INSTALL_DIR=/usr/lib/llama.cpp \
|
|
-DGGML_LTO=$_lto \
|
|
-DGGML_NATIVE=OFF \
|
|
-DGGML_RPC=ON \
|
|
-DGGML_VULKAN=ON \
|
|
-DLLAMA_BUILD_COMMIT="$_commit" \
|
|
-DLLAMA_BUILD_NUMBER="$_buildver" \
|
|
-DLLAMA_LIB_INSTALL_DIR=/usr/lib/llama.cpp
|
|
cmake --build build
|
|
}
|
|
|
|
check() {
|
|
local skip="test-arg-parser" # test fails
|
|
skip="$skip|test-backend-ops" # segfaults
|
|
skip=$skip"|test-download-model" # requires network
|
|
skip=$skip"|test-eval-callback-download-model" # requires network
|
|
skip=$skip"|test-eval-callback" # requires network
|
|
skip=$skip"|test-state-restore-fragmented" # requires network
|
|
skip=$skip"|test-thread-safety" # requires network
|
|
skip=$skip"|test-tokenizers-ggml-vocabs" # fails on CI (only when network is available)
|
|
skip=$skip"|test-llama-archs" # https://github.com/ggml-org/llama.cpp/issues/20611
|
|
skip=$skip"|test-opt" # hangs, needs research
|
|
|
|
case "$CARCH" in
|
|
ppc64le) skip="$skip|test-opt" ;; # only rpc backend available
|
|
s390x) return ;; # requires big-endian models
|
|
esac
|
|
|
|
ctest --test-dir build -E "$skip"
|
|
}
|
|
|
|
package() {
|
|
DESTDIR="$pkgdir" cmake --install build
|
|
install -Dm 644 LICENSE "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
|
|
install -Dm 755 "$srcdir/llama-server.initd" "$pkgdir/etc/init.d/llama-server"
|
|
install -Dm 644 "$srcdir/llama-server.confd" "$pkgdir/etc/conf.d/llama-server"
|
|
install -dm 755 -o llama-server -g llama-server "$pkgdir/var/lib/llama-server"
|
|
|
|
# # Don't install tests.
|
|
find "$pkgdir/usr/bin/" -name "test-*" -exec rm {} \;
|
|
|
|
# TODO: missing dependencies for convert_hf_to_gguf
|
|
rm "$pkgdir/usr/bin/convert_hf_to_gguf.py"
|
|
}
|
|
|
|
libs() {
|
|
pkgdesc="$pkgdesc (shared libraries)"
|
|
amove usr/lib/llama.cpp
|
|
amove usr/lib/libllama-common.so.*
|
|
}
|
|
|
|
cpu() {
|
|
install_if="$pkgname-libs=$pkgver-r$pkgrel"
|
|
depends=""
|
|
amove usr/lib/llama.cpp/libggml-cpu-*.so
|
|
}
|
|
|
|
vulkan() {
|
|
install_if="$pkgname-libs=$pkgver-r$pkgrel vulkan-loader"
|
|
depends=""
|
|
amove usr/lib/llama.cpp/libggml-vulkan.so
|
|
}
|
|
|
|
extras() {
|
|
pkgdesc="llama.cpp additional binaries"
|
|
amove usr/bin/llama-batched \
|
|
usr/bin/llama-batched-bench \
|
|
usr/bin/llama-completion \
|
|
usr/bin/llama-debug \
|
|
usr/bin/llama-diffusion-cli \
|
|
usr/bin/llama-embedding \
|
|
usr/bin/llama-eval-callback \
|
|
usr/bin/llama-finetune \
|
|
usr/bin/llama-fit-params \
|
|
usr/bin/llama-gen-docs \
|
|
usr/bin/llama-gguf \
|
|
usr/bin/llama-gguf-hash \
|
|
usr/bin/llama-gguf-split \
|
|
usr/bin/llama-idle \
|
|
usr/bin/llama-imatrix \
|
|
usr/bin/llama-lookahead \
|
|
usr/bin/llama-lookup \
|
|
usr/bin/llama-lookup-create \
|
|
usr/bin/llama-lookup-merge \
|
|
usr/bin/llama-lookup-stats \
|
|
usr/bin/llama-mtmd-cli \
|
|
usr/bin/llama-parallel \
|
|
usr/bin/llama-passkey \
|
|
usr/bin/llama-perplexity \
|
|
usr/bin/llama-quantize \
|
|
usr/bin/llama-retrieval \
|
|
usr/bin/llama-save-load-state \
|
|
usr/bin/llama-simple \
|
|
usr/bin/llama-speculative \
|
|
usr/bin/llama-speculative-simple \
|
|
usr/bin/llama-tokenize \
|
|
usr/bin/rpc-server
|
|
}
|
|
|
|
server() {
|
|
pkgdesc="llama.cpp server"
|
|
amove usr/bin/llama-server
|
|
}
|
|
|
|
server_openrc() {
|
|
pkgdesc="llama.cpp server (OpenRC init scripts)"
|
|
depends="llama-server=$pkgver-r$pkgrel openrc"
|
|
install_if="llama-server=$pkgver-r$pkgrel openrc"
|
|
amove etc/init.d/llama-server
|
|
amove etc/conf.d/llama-server
|
|
}
|
|
|
|
convert_hf_to_gguf() {
|
|
# FIXME: all except py3-numpy are missing.
|
|
# py3-pytorch is a huge effort to package.
|
|
depends="
|
|
py3-gguf
|
|
py3-numpy
|
|
py3-pytorch
|
|
py3-safetensors
|
|
py3-sentencepiece
|
|
py3-transformers
|
|
"
|
|
|
|
amove usr/bin/convert_hf_to_gguf.py
|
|
}
|
|
|
|
sha512sums="
|
|
9a4e21519b069721692ad4927935793d96a93c25e59a50d8436e535e0d393202cf178ea2b236f6e760dcb0073654f87804ecb3feaa7750882e9e280cbc222d56 llama.cpp-b9006.tar.gz
|
|
117e3e60b0f941a5326274976065629137e962ce7db99575aeed2ff66863f4d8be28b78210430d6500ff1f4b1be348056bb871f783d1d2c466846f6e089c3f74 llama.cpp-b9006.json
|
|
3429f39441b96c91d73484219b350a6f0562b2310b9c6a2eb8a774f6cd9839bf07fef9fb5211aa027afa151981b1fb04e76c55bc3f8fc5b422b99c51ade20014 llama-server.initd
|
|
4165b6c288267b10ad063521b67e8fdfbfc35f2a9959f01fe3aedda214000c385dd646167b5f78e61124ccf9e40233f9fa3ecb9b0dc014efd774d009403f779c llama-server.confd
|
|
5bf9c70c018a5e3b41d1457bd5ac491a7f4b711b75c162ec858a10ee0ecae868fe5742bf25e0b9118355b6788f00743b97c605759c0906d31b984b195fd16884 0001-Fix-link-failures-in-s390x.patch
|
|
09cab3d4db89b95954851ebde6172c0401554478ef787288329364e6f3a419c72241c901eb77a8cac489e52a6c649847fcddd09904301b1fd7964d74124e863e 0002-Install-libraries-into-LLAMA_LIB_INSTALL_DIR.patch
|
|
fc7ffcd4b1a5e66e9cf21ef6bbf5f579962bd6873b0ed2ff669f4918f5f3e2b29830d4365c2d2abbc81965c4483968e8a87e1004fddd0a0eb65b423f1982e0da 0003-cmake-add-flag-to-use-system-httplib.patch
|
|
"
|