armbian_build/lib/tools/info/output-gha-matrix.py

#!/usr/bin/env python3

# ‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
#  SPDX-License-Identifier: GPL-2.0
#  Copyright (c) 2023 Ricardo Pardini <ricardo@pardini.net>
#  This file is a part of the Armbian Build Framework https://github.com/armbian/build/
# ‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
import json
import logging
import os

import sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from common import armbian_utils
from common import gha

# Prepare logging
armbian_utils.setup_logging()
log: logging.Logger = logging.getLogger("output-gha-matrix")


def resolve_gha_runner_tags_via_pipeline_gha_config(input: dict, artifact_name: str, artifact_arch: str):
	log.debug(f"Resolving GHA runner tags for artifact/image '{artifact_name}' '{artifact_arch}'")

	# if no config, default to "ubuntu-latest" as a last-resort
	ret = "ubuntu-latest"

	if not "pipeline" in input:
		log.warning(f"No 'pipeline' config in input, defaulting to '{ret}'")
		return ret

	pipeline = input["pipeline"]

	if not "gha" in pipeline:
		log.warning(f"No 'gha' config in input.pipeline, defaulting to '{ret}'")
		return ret

	gha = pipeline["gha"]

	if (gha is None) or (not "runners" in gha):
		log.warning(f"No 'runners' config in input.pipeline.gha, defaulting to '{ret}'")
		return ret

	runners = gha["runners"]

	if "default" in runners:
		ret = runners["default"]
		log.debug(f"Found 'default' config in input.pipeline.gha.runners, defaulting to '{ret}'")

	# Now, 'by-name' first.
	if "by-name" in runners:
		by_names = runners["by-name"]
		if artifact_name in by_names:
			ret = by_names[artifact_name]
			log.debug(f"Found 'by-name' value '{artifact_name}' config in input.pipeline.gha.runners, using '{ret}'")

	# Now, 'by-name-and-arch' second.
	artifact_name_and_arch = f"{artifact_name}{f'-{artifact_arch}' if artifact_arch is not None else ''}"
	if "by-name-and-arch" in runners:
		by_names_and_archs = runners["by-name-and-arch"]
		if artifact_name_and_arch in by_names_and_archs:
			ret = by_names_and_archs[artifact_name_and_arch]
			log.debug(f"Found 'by-name-and-arch' value '{artifact_name_and_arch}' config in input.pipeline.gha.runners, using '{ret}'")

	log.debug(f"Resolved GHA runs_on for name:'{artifact_name}' arch:'{artifact_arch}' to runs_on:'{ret}'")

	return ret


def generate_matrix_images(info) -> list[dict]:
	# each image
	matrix = []
	for image_id in info["images"]:
		image = info["images"][image_id]

		if armbian_utils.get_from_env("IMAGES_ONLY_OUTDATED_ARTIFACTS") == "yes":
			log.info(f"IMAGES_ONLY_OUTDATED_ARTIFACTS is set: outdated artifacts: {image['outdated_artifacts_count']} for image {image_id}")
			skip = image["outdated_artifacts_count"] == 0
			if skip:
				log.warning(f"Skipping image {image_id} because it has no outdated artifacts")
				continue

		if armbian_utils.get_from_env("SKIP_IMAGES") == "yes":
			log.warning(f"Skipping image {image_id} because SKIP_IMAGES=yes")
			continue

		desc = f"{image['image_file_id']} {image_id}"

		inputs = image['in']

		image_arch = image['out']['ARCH']
		runs_on = resolve_gha_runner_tags_via_pipeline_gha_config(inputs, "image", image_arch)

		cmds = (armbian_utils.map_to_armbian_params(inputs["vars"], True) + inputs["configs"])  # image build is "build" command, omitted here
		invocation = " ".join(cmds)

		item = {"desc": desc, "runs_on": runs_on, "invocation": invocation}
		matrix.append(item)
	return matrix


def generate_matrix_artifacts(info):
	# each artifact
	matrix = []
	for artifact_id in info["artifacts"]:
		artifact = info["artifacts"][artifact_id]
		skip = not not artifact["oci"]["up-to-date"]
		if skip:
			continue

		artifact_name = artifact['in']['artifact_name']

		desc = f"{artifact['out']['artifact_name']}={artifact['out']['artifact_version']}"

		inputs = artifact['in']['original_inputs']

		artifact_arch = None
		# Try via the inputs to artifact...
		if "inputs" in artifact['in']:
			if "ARCH" in artifact['in']['inputs']:
				artifact_arch = artifact['in']['inputs']['ARCH']

		runs_on = resolve_gha_runner_tags_via_pipeline_gha_config(inputs, artifact_name, artifact_arch)

		cmds = (["artifact"] + armbian_utils.map_to_armbian_params(inputs["vars"], True) + inputs["configs"])
		invocation = " ".join(cmds)

		item = {"desc": desc, "runs_on": runs_on, "invocation": invocation}
		matrix.append(item)
	return matrix


# generate images or artifacts?
type_gen = sys.argv[1]

# read the outdated artifacts+imaes json file passed as first argument as a json object
with open(sys.argv[2]) as f:
	info = json.load(f)

matrix = None
if type_gen == "artifacts":
	matrix = generate_matrix_artifacts(info)
elif type_gen == "images":
	matrix = generate_matrix_images(info)
else:
	log.error(f"Unknown type: {type_gen}")
	sys.exit(1)

# third argument is the number of chunks wanted.
ideal_chunk_size = 150
max_chunk_size = 250

# check is sys.argv[3] exists...
if len(sys.argv) >= 4:
	num_chunks = int(sys.argv[3])
else:
	log.warning(f"Number of chunks not specified. Calculating automatically, matrix: {len(matrix)} chunk ideal: {ideal_chunk_size}.")
	# calculate num_chunks by dividing the matrix size by the ideal chunk size, and rounding always up.
	num_chunks = int(len(matrix) / ideal_chunk_size) + 1
	log.warning(f"Number of chunks: {num_chunks}")

matrix_hard_limit = 17 * 30  # @TODO: maybe 17*50 later
# if over the limit, just slice to the limit, add warning about lost jobs
if len(matrix) > matrix_hard_limit:
	log.warning(f"Matrix size is over the hard limit of {matrix_hard_limit}, slicing to that limit. Matrix is incomplete.")
	matrix = matrix[:matrix_hard_limit]

# distribute the matrix items equally along the chunks. try to keep every chunk the same size.
chunks = []
for i in range(num_chunks):
	chunks.append([])
for i, item in enumerate(matrix):
	chunks[i % num_chunks].append(item)

# ensure chunks are not too big
for i, chunk in enumerate(chunks):
	if len(chunk) > ideal_chunk_size:
		log.warning(f"Chunk '{i + 1}' is bigger than ideal: {len(chunk)}")

	if len(chunk) > max_chunk_size:
		log.error(f"Chunk '{i + 1}' is too big: {len(chunk)}")
		sys.exit(1)

	# For the full matrix, we can't have empty chunks; use a "really" field to indicate a fake entry added to make it non-empty.
	if len(chunk) == 0:
		log.warning(f"Chunk '{i + 1}' for '{type_gen}' is empty, adding fake invocation.")
		chunks[i] = [
			{"desc": "Fake matrix element so matrix is not empty", "runs_on": "ubuntu-latest", "invocation": "none", "really": "no",
			 "shost": "no", "fdepth": "1"}
		]
	else:
		for item in chunk:
			item["really"] = "yes"
			# For each item in chunk, check if it is going to run in a GH-hosted runner or self-hosted, and set some matrix variables
			# accordingly; shost: yes/no, and more specifically, fdepth, which is 0 for self-hosted, and 1 for GH-hosted.
			# The reasoning for this is that git clones are much faster if not shallow on self-hosted, but much slower on GH-hosted.
			# So, we want to use shallow clones on GH-hosted, but not on self-hosted.
			if item["runs_on"] == "ubuntu-latest":
				item["shost"] = "no"
				item["fdepth"] = "1"  # use a string; 1 is shallow, 0 is full
			else:
				item["shost"] = "yes"
				item["fdepth"] = "0"  # use a string; 1 is shallow, 0 is full

	# Directly set outputs for _each_ GHA chunk here.
	gha.set_gha_output(f"{type_gen}-chunk-json-{i + 1}", json.dumps({"include": chunk}))
	# An output that is used to test for empty matrix.
	gha.set_gha_output(f"{type_gen}-chunk-not-empty-{i + 1}", "yes" if len(chunk) > 0 else "no")
	gha.set_gha_output(f"{type_gen}-chunk-size-{i + 1}", len(chunk))

# massage the chunks so they're objects with "include" key, the way GHA likes it.
all_chunks = {}
for i, chunk in enumerate(chunks):
	log.info(f"Chunk {i + 1} has {len(chunk)} elements.")
	all_chunks[f"chunk{i + 1}"] = {"include": chunk}

print(json.dumps(all_chunks))