aports/community/llama.cpp/APKBUILD

# Contributor: Marian Buschsieweke <marian.buschsieweke@posteo.net>
maintainer="Hugo Osvaldo Barrera <hugo@whynothugo.nl>"
pkgname=llama.cpp
pkgver=0.0.9006
_buildver=${pkgver#0.0.}
pkgrel=0
pkgdesc="LLM inference in C/C++ (with Vulkan GPU acceleration)"
# armhf armv7, riscv64, x86: build fails
# ppc64le: internal compiler error
arch="all !armv7 !armhf !ppc64le !riscv64 !x86"
url="https://github.com/ggml-org/llama.cpp"
license="MIT"
pkgusers="llama-server"
pkggroups="llama-server"
makedepends="
	cmake
	glslang-dev
	jq
	linux-headers
	openblas-dev
	openssl-dev
	samurai
	shaderc
	spirv-headers
	vulkan-loader-dev
	"
checkdepends="bash py3-jinja2"
install="$pkgname.pre-install"
# convert_hf_to_gguf: disabled; too many missing depends.
subpackages="
	$pkgname-vulkan
	$pkgname-dev
	$pkgname-libs
	$pkgname-extras
	llama-server:server
	llama-server-openrc:server_openrc
	"
source="$pkgname-b$_buildver.tar.gz::https://github.com/ggml-org/llama.cpp/archive/refs/tags/b$_buildver.tar.gz
	$pkgname-b$_buildver.json::https://api.github.com/repos/ggml-org/llama.cpp/git/ref/tags/b$_buildver
	llama-server.initd
	llama-server.confd
	0001-Fix-link-failures-in-s390x.patch
	0002-Install-libraries-into-LLAMA_LIB_INSTALL_DIR.patch
	0003-cmake-add-flag-to-use-system-httplib.patch
	"
builddir="$srcdir/$pkgname-b$_buildver"
ldpath="/usr/lib/$pkgname"
sonameprefix="$pkgname:"

case "$CARCH" in
loongarch64)
	_ggml_cpu_all_variants=OFF
	;;
*)
	_ggml_cpu_all_variants=ON
	subpackages="$pkgname-cpu $subpackages"
	;;
esac

case "$CARCH" in
riscv64) _lto=OFF ;;
*) _lto=ON ;;
esac

# The bundled ggml and the standalone ggml have diverging APIs.
# llama.cpp won't build with the standalone one.
# Upstream recommends using the vendored one.

build() {
	_commit=$(jq -r '.object.sha' <$srcdir/$pkgname-b$_buildver.json)

	cmake -B build -G Ninja -Wno-dev \
		-DBUILD_SHARED_LIBS=ON \
		-DCMAKE_BUILD_TYPE=Release \
		-DCMAKE_INSTALL_PREFIX=/usr \
		-DGGML_ALL_WARNINGS=OFF \
		-DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
		-DGGML_BACKEND_DIR=/usr/lib/llama.cpp \
		-DGGML_BACKEND_DL=ON \
		-DGGML_BLAS=ON \
		-DGGML_BLAS_VENDOR=OpenBLAS \
		-DGGML_BUILD_EXAMPLES=OFF \
		-DGGML_CPU_ALL_VARIANTS=$_ggml_cpu_all_variants \
		-DGGML_CUDA_FA_ALL_QUANTS=ON \
		-DGGML_LIB_INSTALL_DIR=/usr/lib/llama.cpp \
		-DGGML_LTO=$_lto \
		-DGGML_NATIVE=OFF \
		-DGGML_RPC=ON \
		-DGGML_VULKAN=ON \
		-DLLAMA_BUILD_COMMIT="$_commit" \
		-DLLAMA_BUILD_NUMBER="$_buildver" \
		-DLLAMA_LIB_INSTALL_DIR=/usr/lib/llama.cpp
	cmake --build build
}

check() {
	local skip="test-arg-parser"                   # test fails
	skip="$skip|test-backend-ops"                  # segfaults
	skip=$skip"|test-download-model"               # requires network
	skip=$skip"|test-eval-callback-download-model" # requires network
	skip=$skip"|test-eval-callback"                # requires network
	skip=$skip"|test-state-restore-fragmented"     # requires network
	skip=$skip"|test-thread-safety"                # requires network
	skip=$skip"|test-tokenizers-ggml-vocabs"       # fails on CI (only when network is available)
	skip=$skip"|test-llama-archs"                  # https://github.com/ggml-org/llama.cpp/issues/20611
	skip=$skip"|test-opt"                          # hangs, needs research

	case "$CARCH" in
	ppc64le) skip="$skip|test-opt" ;; # only rpc backend available
	s390x) return ;;                  # requires big-endian models
	esac

	ctest --test-dir build -E "$skip"
}

package() {
	DESTDIR="$pkgdir" cmake --install build
	install -Dm 644 LICENSE "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
	install -Dm 755 "$srcdir/llama-server.initd" "$pkgdir/etc/init.d/llama-server"
	install -Dm 644 "$srcdir/llama-server.confd" "$pkgdir/etc/conf.d/llama-server"
	install -dm 755 -o llama-server -g llama-server "$pkgdir/var/lib/llama-server"

	# # Don't install tests.
	find "$pkgdir/usr/bin/" -name "test-*" -exec rm {} \;

	# TODO: missing dependencies for convert_hf_to_gguf
	rm "$pkgdir/usr/bin/convert_hf_to_gguf.py"
}

libs() {
	pkgdesc="$pkgdesc (shared libraries)"
	amove usr/lib/llama.cpp
	amove usr/lib/libllama-common.so.*
}

cpu() {
	install_if="$pkgname-libs=$pkgver-r$pkgrel"
	depends=""
	amove usr/lib/llama.cpp/libggml-cpu-*.so
}

vulkan() {
	install_if="$pkgname-libs=$pkgver-r$pkgrel vulkan-loader"
	depends=""
	amove usr/lib/llama.cpp/libggml-vulkan.so
}

extras() {
	pkgdesc="llama.cpp additional binaries"
	amove usr/bin/llama-batched \
		usr/bin/llama-batched-bench \
		usr/bin/llama-completion \
		usr/bin/llama-debug \
		usr/bin/llama-diffusion-cli \
		usr/bin/llama-embedding \
		usr/bin/llama-eval-callback \
		usr/bin/llama-finetune \
		usr/bin/llama-fit-params \
		usr/bin/llama-gen-docs \
		usr/bin/llama-gguf \
		usr/bin/llama-gguf-hash \
		usr/bin/llama-gguf-split \
		usr/bin/llama-idle \
		usr/bin/llama-imatrix \
		usr/bin/llama-lookahead \
		usr/bin/llama-lookup \
		usr/bin/llama-lookup-create \
		usr/bin/llama-lookup-merge \
		usr/bin/llama-lookup-stats \
		usr/bin/llama-mtmd-cli \
		usr/bin/llama-parallel \
		usr/bin/llama-passkey \
		usr/bin/llama-perplexity \
		usr/bin/llama-quantize \
		usr/bin/llama-retrieval \
		usr/bin/llama-save-load-state \
		usr/bin/llama-simple \
		usr/bin/llama-speculative \
		usr/bin/llama-speculative-simple \
		usr/bin/llama-tokenize \
		usr/bin/rpc-server
}

server() {
	pkgdesc="llama.cpp server"
	amove usr/bin/llama-server
}

server_openrc() {
	pkgdesc="llama.cpp server (OpenRC init scripts)"
	depends="llama-server=$pkgver-r$pkgrel openrc"
	install_if="llama-server=$pkgver-r$pkgrel openrc"
	amove etc/init.d/llama-server
	amove etc/conf.d/llama-server
}

convert_hf_to_gguf() {
	# FIXME: all except py3-numpy are missing.
	#        py3-pytorch is a huge effort to package.
	depends="
		py3-gguf
		py3-numpy
		py3-pytorch
		py3-safetensors
		py3-sentencepiece
		py3-transformers
	"

	amove usr/bin/convert_hf_to_gguf.py
}

sha512sums="
9a4e21519b069721692ad4927935793d96a93c25e59a50d8436e535e0d393202cf178ea2b236f6e760dcb0073654f87804ecb3feaa7750882e9e280cbc222d56  llama.cpp-b9006.tar.gz
117e3e60b0f941a5326274976065629137e962ce7db99575aeed2ff66863f4d8be28b78210430d6500ff1f4b1be348056bb871f783d1d2c466846f6e089c3f74  llama.cpp-b9006.json
3429f39441b96c91d73484219b350a6f0562b2310b9c6a2eb8a774f6cd9839bf07fef9fb5211aa027afa151981b1fb04e76c55bc3f8fc5b422b99c51ade20014  llama-server.initd
4165b6c288267b10ad063521b67e8fdfbfc35f2a9959f01fe3aedda214000c385dd646167b5f78e61124ccf9e40233f9fa3ecb9b0dc014efd774d009403f779c  llama-server.confd
5bf9c70c018a5e3b41d1457bd5ac491a7f4b711b75c162ec858a10ee0ecae868fe5742bf25e0b9118355b6788f00743b97c605759c0906d31b984b195fd16884  0001-Fix-link-failures-in-s390x.patch
09cab3d4db89b95954851ebde6172c0401554478ef787288329364e6f3a419c72241c901eb77a8cac489e52a6c649847fcddd09904301b1fd7964d74124e863e  0002-Install-libraries-into-LLAMA_LIB_INSTALL_DIR.patch
fc7ffcd4b1a5e66e9cf21ef6bbf5f579962bd6873b0ed2ff669f4918f5f3e2b29830d4365c2d2abbc81965c4483968e8a87e1004fddd0a0eb65b423f1982e0da  0003-cmake-add-flag-to-use-system-httplib.patch
"