build_library: Change extraction script to xz and rewrite to be simpler

This version writes fewer temporary files and tries cpio multiple times
for concatenated archives again.

Signed-off-by: James Le Cuirot <jlecuirot@microsoft.com>
This commit is contained in:
James Le Cuirot 2025-04-15 09:40:01 +01:00
parent d1a38bfa94
commit 14398067d5
No known key found for this signature in database
GPG Key ID: 1226415D00DD3137

View File

@ -7,51 +7,35 @@
# This will create one or more out-dir/rootfs-N directories that contain the contents of the initramfs.
set -euo pipefail
# check for unzstd. Will abort the script with an error message if the tool is not present.
unzstd -V >/dev/null
# check for xzcat. Will abort the script with an error message if the tool is not present.
xzcat -V >/dev/null
fail() {
echo "${*}" >&2
exit 1
}
# Stolen from extract-vmlinux and modified.
try_decompress() {
local header="${1}"
local no_idea="${2}"
local tool="${3}"
local image="${4}"
local tmp="${5}"
local output_basename="${6}"
local pos
local tool_filename=$(echo "${tool}" | cut -f1 -d' ')
# The obscure use of the "tr" filter is to work around older versions of
# "grep" that report the byte offset of the line instead of the pattern.
# Try to find the header and decompress from here.
for pos in $(tr "${header}\n${no_idea}" "\n${no_idea}=" < "${image}" |
grep --text --byte-offset --only-matching "^${no_idea}")
do
pos=${pos%%:*}
# Disable error handling, because we will be potentially
# giving the tool garbage or a valid archive with some garbage
# appended to it. So let the tool extract the valid archive
# and then complain about the garbage at the end, but don't
# fail the script because of it.
set +e; tail "-c+${pos}" "${image}" | "${tool}" >"${tmp}/out" 2>/dev/null; set -e;
if [ -s "${tmp}/out" ]; then
mv "${tmp}/out" "${output_basename}-${tool_filename}-at-${pos}"
else
rm -f "${tmp}/out"
fi
done
find_xz_headers() {
grep --fixed-strings --text --byte-offset --only-matching $'\xFD\x37\x7A\x58\x5A\x00' "$1" | cut -d: -f1
}
try_unzstd_decompress() {
local image="${1}"
local tmp="${2}"
local output_basename="${3}"
try_decompress '(\265/\375' xxx unzstd "${image}" "${tmp}" "${output_basename}"
decompress_at() {
# Data may not really be a valid xz, so allow for errors.
tail "-c+$((${2%:*} + 1))" "$1" | xzcat 2>/dev/null || true
}
try_extract() {
# cpio can do strange things when given garbage, so do a basic check.
[[ $(head -c6 "$1") == 070701 ]] || return 0
# There may be multiple concatenated archives so try cpio till it fails.
while cpio --quiet --extract --make-directories --directory="${out}/rootfs-${ROOTFS_IDX}" --nonmatching 'dev/*' 2>/dev/null; do
ROOTFS_IDX=$(( ROOTFS_IDX + 1 ))
done < "$1"
# Last cpio attempt may or may not leave an empty directory.
rmdir "${out}/rootfs-${ROOTFS_IDX}" 2>/dev/null || ROOTFS_IDX=$(( ROOTFS_IDX + 1 ))
}
me="${0##*/}"
@ -65,37 +49,22 @@ if [[ ! -s "${image}" ]]; then
fi
mkdir -p "${out}"
tmp=$(mktemp --directory /tmp/eifv-XXXXXX)
trap "rm -rf ${tmp}" EXIT
tmp_dec="${tmp}/decompress"
mkdir "${tmp_dec}"
fr_prefix="${tmp}/first-round"
tmp=$(mktemp --directory eifv-XXXXXX)
trap 'rm -rf -- "${tmp}"' EXIT
ROOTFS_IDX=0
perform_round() {
local image="${1}"
local tmp_dec="${2}"
local round_prefix="${3}"
try_unzstd_decompress "${image}" "${tmp_dec}" "${round_prefix}"
for rnd in "${round_prefix}"*; do
if [[ $(file --brief "${rnd}") =~ 'cpio archive' ]]; then
mkdir -p "${out}/rootfs-${ROOTFS_IDX}"
# On Linux 6.10, the first rootfs is an extra ghost rootfs of 336K, that has a corrupted CPIO
cpio --quiet --extract --make-directories --directory="${out}/rootfs-${ROOTFS_IDX}" --nonmatching 'dev/*' < $rnd || true
ROOTFS_IDX=$(( ROOTFS_IDX + 1 ))
fi
done
}
shopt -s nullglob
perform_round "${image}" "${tmp_dec}" "${fr_prefix}"
for fr in "${fr_prefix}"*; do
fr_files="${fr}-files"
fr_dec="${fr_files}/decompress"
mkdir -p "${fr_dec}"
sr_prefix="${fr_files}/second-round"
perform_round "${fr}" "${fr_dec}" "${sr_prefix}"
# arm64 kernels are not compressed, so try decompressing once.
# Other kernels are compressed, so also try decompressing twice.
for OFF1 in $(find_xz_headers "${image}")
do
decompress_at "${image}" "${OFF1}" > "${tmp}/initrd.maybe_cpio_or_elf"
try_extract "${tmp}/initrd.maybe_cpio_or_elf"
for OFF2 in $(find_xz_headers "${tmp}/initrd.maybe_cpio_or_elf")
do
decompress_at "${tmp}/initrd.maybe_cpio_or_elf" "${OFF2}" > "${tmp}/initrd.maybe_cpio"
try_extract "${tmp}/initrd.maybe_cpio"
done
done
if [[ ${ROOTFS_IDX} -eq 0 ]]; then