aports/testing/llama.cpp/APKBUILD

# Contributor: Marian Buschsieweke <marian.buschsieweke@posteo.net>
maintainer="Hugo Osvaldo Barrera <hugo@whynothugo.nl>"
pkgname=llama.cpp
pkgver=0.0.8611
_buildver=${pkgver#0.0.}
pkgrel=0
pkgdesc="LLM inference in C/C++ (with Vulkan GPU acceleration)"
# armhf armv7, riscv64, x86: build fails
# ppc64le: internal compiler error
arch="all !armv7 !armhf !ppc64le !riscv64 !x86"
url="https://github.com/ggml-org/llama.cpp"
license="MIT"
pkgusers="llama-server"
pkggroups="llama-server"
makedepends="
	cmake
	glslang-dev
	jq
	linux-headers
	openblas-dev
	openssl-dev
	samurai
	shaderc
	vulkan-loader-dev
	"
checkdepends="bash py3-jinja2"
install="$pkgname.pre-install"
# convert_hf_to_gguf: disabled; too many missing depends.
subpackages="
	$pkgname-vulkan
	$pkgname-dev
	$pkgname-extras
	$pkgname-openrc
	"
source="$pkgname-b$_buildver.tar.gz::https://github.com/ggml-org/llama.cpp/archive/refs/tags/b$_buildver.tar.gz
	$pkgname-b$_buildver.json::https://api.github.com/repos/ggml-org/llama.cpp/git/ref/tags/b$_buildver
	llama-server.initd
	llama-server.confd
	0001-Fix-link-failures-in-s390x.patch
	0001-Install-libraries-into-LLAMA_LIB_INSTALL_DIR.patch
	"
builddir="$srcdir/$pkgname-b$_buildver"
ldpath="/usr/lib/$pkgname"
sonameprefix="$pkgname:"

case "$CARCH" in
loongarch64)
	_ggml_cpu_all_variants=OFF
	;;
*)
	_ggml_cpu_all_variants=ON
	subpackages="$pkgname-cpu $subpackages"
	;;
esac

case "$CARCH" in
riscv64) _lto=OFF ;;
*) _lto=ON ;;
esac

# The bundled ggml and the standalone ggml have diverging APIs.
# llama.cpp won't build with the standalone one.
# Upstream recommends using the vendored one.

build() {
	_commit=$(jq -r '.object.sha' < $srcdir/$pkgname-b$_buildver.json)

	cmake -B build -G Ninja -Wno-dev \
		-DBUILD_SHARED_LIBS=ON \
		-DCMAKE_BUILD_TYPE=Release \
		-DCMAKE_INSTALL_PREFIX=/usr \
		-DGGML_ALL_WARNINGS=OFF \
		-DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
		-DGGML_BACKEND_DIR=/usr/lib/llama.cpp \
		-DGGML_BACKEND_DL=ON \
		-DGGML_BLAS=ON \
		-DGGML_BLAS_VENDOR=OpenBLAS \
		-DGGML_BUILD_EXAMPLES=OFF \
		-DGGML_CPU_ALL_VARIANTS=$_ggml_cpu_all_variants \
		-DGGML_CUDA_FA_ALL_QUANTS=ON \
		-DGGML_LIB_INSTALL_DIR=/usr/lib/llama.cpp \
		-DLLAMA_LIB_INSTALL_DIR=/usr/lib/llama.cpp \
		-DGGML_LTO=$_lto \
		-DGGML_NATIVE=OFF \
		-DGGML_RPC=ON \
		-DLLAMA_BUILD_COMMIT="$_commit" \
		-DLLAMA_BUILD_NUMBER="$_buildver" \
		-DGGML_VULKAN=ON
	cmake --build build
}

check() {
	local skip="test-arg-parser"                   # test fails
	skip="$skip|test-backend-ops"                  # segfaults
	skip=$skip"|test-download-model"               # requires network
	skip=$skip"|test-eval-callback-download-model" # requires network
	skip=$skip"|test-eval-callback"                # requires network
	skip=$skip"|test-state-restore-fragmented"     # requires network
	skip=$skip"|test-thread-safety"                # requires network
	skip=$skip"|test-tokenizers-ggml-vocabs"       # fails on CI (only when network is available)
	skip=$skip"|test-llama-archs"                  # https://github.com/ggml-org/llama.cpp/issues/20611
	skip=$skip"|test-opt"                          # hangs, needs research

	case "$CARCH" in
	ppc64le) skip="$skip|test-opt" ;; # only rpc backend available
	s390x) return ;;                  # requires big-endian models
	esac

	ctest --test-dir build -E "$skip"
}

package() {
	DESTDIR="$pkgdir" cmake --install build
	install -Dm 644 LICENSE "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
	install -Dm 755 "$srcdir/llama-server.initd" "$pkgdir/etc/init.d/llama-server"
	install -Dm 644 "$srcdir/llama-server.confd" "$pkgdir/etc/conf.d/llama-server"
	install -dm 755 -o llama-server -g llama-server "$pkgdir/var/lib/llama-server"

	# # Don't install tests.
	find "$pkgdir/usr/bin/" -name "test-*" -exec rm {} \;

	# TODO: missing dependencies for convert_hf_to_gguf
	rm "$pkgdir/usr/bin/convert_hf_to_gguf.py"
}

cpu() {
	install_if="$pkgname=$pkgver-r$pkgrel"
	depends=""
	amove usr/lib/llama.cpp/libggml-cpu-*.so
}

vulkan() {
	install_if="$pkgname=$pkgver-r$pkgrel vulkan-loader"
	depends=""
	amove usr/lib/llama.cpp/libggml-vulkan.so
}

extras() {
	pkgdesc="llama.cpp additional binaries"
	amove usr/bin/llama-batched \
		usr/bin/llama-batched-bench \
		usr/bin/llama-completion \
		usr/bin/llama-debug \
		usr/bin/llama-diffusion-cli \
		usr/bin/llama-embedding \
		usr/bin/llama-eval-callback \
		usr/bin/llama-finetune \
		usr/bin/llama-fit-params \
		usr/bin/llama-gen-docs \
		usr/bin/llama-gguf \
		usr/bin/llama-gguf-hash \
		usr/bin/llama-gguf-split \
		usr/bin/llama-idle \
		usr/bin/llama-imatrix \
		usr/bin/llama-lookahead \
		usr/bin/llama-lookup \
		usr/bin/llama-lookup-create \
		usr/bin/llama-lookup-merge \
		usr/bin/llama-lookup-stats \
		usr/bin/llama-mtmd-cli \
		usr/bin/llama-parallel \
		usr/bin/llama-passkey \
		usr/bin/llama-perplexity \
		usr/bin/llama-quantize \
		usr/bin/llama-retrieval \
		usr/bin/llama-save-load-state \
		usr/bin/llama-simple \
		usr/bin/llama-speculative \
		usr/bin/llama-speculative-simple \
		usr/bin/llama-tokenize \
		usr/bin/rpc-server
}

convert_hf_to_gguf() {
	# FIXME: all except py3-numpy are missing.
	#        py3-pytorch is a huge effort to package.
	depends="
		py3-gguf
		py3-numpy
		py3-pytorch
		py3-safetensors
		py3-sentencepiece
		py3-transformers
	"

	amove usr/bin/convert_hf_to_gguf.py
}

sha512sums="
b50b25e4df858aef348c2f63b16a49ce6aa471881ab8e5948f39164453536bd337dc07a4c81f6f647e2e26132c28703b861a904d4f7016efc9df90daf28dabe1  llama.cpp-b8611.tar.gz
6cd741c200769fcd462e49b8ba617b90eac8a6507d6add1b94cda342193737907ab80e2426ee9abdc9ace82918d8da0514203ac525e6f4b4df46b41347848ac8  llama.cpp-b8611.json
f267b37021bd67f870538d8ff3ae631192e75e4792dffc0aedc3f0d21a695ca6272468f218b9e2870fb1ff9f9ae6a885dfc5741d27f7add2f5a8078599995f30  llama-server.initd
4165b6c288267b10ad063521b67e8fdfbfc35f2a9959f01fe3aedda214000c385dd646167b5f78e61124ccf9e40233f9fa3ecb9b0dc014efd774d009403f779c  llama-server.confd
1280aeb6d3700bd3e4a765dcceb2074ccc47a774dfd3ac876e38d64616050459c1c87ab17783daeaf37f4fff43a18eba55cd47b35a8f1e92679b79c5a637dfaf  0001-Fix-link-failures-in-s390x.patch
6b1a981fb8456c1c10b428b97f8b475b9dd4ccd1f013a54c6565903f9aded2591332e3ce4bf3a161ef253f4ee19ad499b7a43626f16fd5d4e341b63e29bc5a29  0001-Install-libraries-into-LLAMA_LIB_INSTALL_DIR.patch
"