From 65f583ce429aa61f709a5194346e18009afda582 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Thu, 5 Mar 2026 21:29:51 -0800 Subject: [PATCH] allow specific precompiled images to be rebuilt Signed-off-by: Rahul Sharma --- .github/workflows/precompiled.yaml | 54 ++++++++++++++++++++++--- scripts/precompiled.sh | 26 ++++++++++-- tests/scripts/ci-precompiled-helpers.sh | 17 ++++++-- tests/scripts/findkernelversion.sh | 38 +++++++++++++---- 4 files changed, 115 insertions(+), 20 deletions(-) diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index ddf17633c..424c7e316 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -19,6 +19,28 @@ on: schedule: - cron: '00 09 * * *' workflow_dispatch: + inputs: + driver_branch: + description: 'Driver branch to build (e.g. 535). Leave empty for all.' + required: false + default: '' + kernel_flavor: + description: 'Kernel flavor to build (e.g. azure). Leave empty for all.' + required: false + default: '' + dist: + description: 'Distribution to build (e.g. ubuntu22.04). Leave empty for all.' + required: false + default: '' + lts_kernel: + description: 'LTS kernel series to build (e.g. 6.8). Leave empty for all.' + required: false + default: '' + force_rebuild: + description: 'Force rebuild and re-publish even if the image tag already exists' + required: false + type: boolean + default: false jobs: set-driver-version-matrix: @@ -35,22 +57,38 @@ jobs: id: extract_driver_branch run: | # get driver_branch - DRIVER_BRANCH=("535" "570" "580") + if [ -n "${{ inputs.driver_branch }}" ]; then + DRIVER_BRANCH=("${{ inputs.driver_branch }}") + else + DRIVER_BRANCH=("535" "570" "580") + fi driver_branch_json=$(printf '%s\n' "${DRIVER_BRANCH[@]}" | jq -R . | jq -cs .) echo "driver_branch=$driver_branch_json" >> $GITHUB_OUTPUT # get kernel flavors - KERNEL_FLAVORS=("aws" "azure" "azure-fde" "generic" "nvidia" "oracle") + if [ -n "${{ inputs.kernel_flavor }}" ]; then + KERNEL_FLAVORS=("${{ inputs.kernel_flavor }}") + else + KERNEL_FLAVORS=("aws" "azure" "azure-fde" "generic" "nvidia" "oracle") + fi kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .) echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT # get ubuntu distributions - DIST=("ubuntu22.04" "ubuntu24.04") + if [ -n "${{ inputs.dist }}" ]; then + DIST=("${{ inputs.dist }}") + else + DIST=("ubuntu22.04" "ubuntu24.04") + fi dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .) echo "dist=$dist_json" >> $GITHUB_OUTPUT # LTS_KERNEL setup - LTS_KERNEL=("5.15" "6.8") + if [ -n "${{ inputs.lts_kernel }}" ]; then + LTS_KERNEL=("${{ inputs.lts_kernel }}") + else + LTS_KERNEL=("5.15" "6.8") + fi lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .) echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT @@ -200,6 +238,7 @@ jobs: DIST: ${{ matrix.dist }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} LTS_KERNEL: ${{ matrix.lts_kernel }} + FORCE_REBUILD: ${{ inputs.force_rebuild || 'false' }} run: | kernel_flavors_json='${{ needs.set-driver-version-matrix.outputs.kernel_flavors }}' KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]')) @@ -213,7 +252,12 @@ jobs: done)) fi source ./tests/scripts/ci-precompiled-helpers.sh - KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL)) + KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL $FORCE_REBUILD)) + rc=$? + if [[ $rc -ne 0 ]]; then + echo "registry connectivity error while determining kernel versions to test" >&2 + exit 1 + fi if [ -z "$KERNEL_VERSIONS" ]; then # no new kernel release echo "Skipping e2e tests" diff --git a/scripts/precompiled.sh b/scripts/precompiled.sh index 13311ca11..5f181dd2e 100755 --- a/scripts/precompiled.sh +++ b/scripts/precompiled.sh @@ -54,18 +54,36 @@ function pushBaseImage(){ make IMAGE_NAME=${IMAGE_NAME} DRIVER_BRANCH=${DRIVER_BRANCH} KERNEL_FLAVOR=${KERNEL_FLAVOR} push-base-${BASE_TARGET} } +function imageDigest(){ + regctl image digest --list "$1" 2>/dev/null +} + +function manifestsMatch(){ + local src_digest + local dst_digest + src_digest=$(imageDigest "$1") || { echo "failed to get digest for $1 - assuming manifests differ"; return 1; } + dst_digest=$(imageDigest "$2") || { echo "failed to get digest for $2 - assuming manifests differ"; return 1; } + [ "$src_digest" = "$dst_digest" ] +} + function pushImage(){ # check if image exists in output registry # note: DIST is in the form "signed_", so we drop the '*_' prefix # to extract the distribution string. - local out_image=${OUT_IMAGE_NAME}:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST##*_} + local tag=${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST##*_} + local out_image=${OUT_IMAGE_NAME}:${tag} + local in_image=${IMAGE_NAME}:${tag} if imageExists "$out_image"; then echo "image tag already exists in output registry - $out_image" if [ "$FORCE_PUSH" != "true" ]; then - echo "exiting" - return 0 + if manifestsMatch "$in_image" "$out_image"; then + echo "source and destination manifests match - skipping push" + return 0 + fi + echo "source and destination manifests differ - pushing updated image" + else + echo "overwriting image tag - $out_image" fi - echo "overwriting image tag - $out_image" fi # push the image make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${DRIVER_BRANCH} push-${DIST} diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 3050049db..77117a5ea 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { - if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL" >&2 + if [[ "$#" -lt 4 ]]; then + echo " Error:$0 must be called with KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL [FORCE_REBUILD]" >&2 exit 1 fi @@ -8,11 +8,19 @@ get_kernel_versions_to_test() { local -a DRIVER_BRANCHES=("${!2}") local DIST="$3" local LTS_KERNEL="$4" + local FORCE_REBUILD="${5:-false}" kernel_versions=() + local had_errors=false for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do - source ./tests/scripts/findkernelversion.sh "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" "$LTS_KERNEL" >&2 + regctl_error=false + source ./tests/scripts/findkernelversion.sh "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" "$LTS_KERNEL" "$FORCE_REBUILD" >&2 + if [[ "$regctl_error" == true ]]; then + echo "skipping ${kernel_flavor}/${DRIVER_BRANCH} due to registry connectivity error" >&2 + had_errors=true + break + fi if [[ "$should_continue" == true ]]; then break fi @@ -28,4 +36,7 @@ get_kernel_versions_to_test() { kernel_versions[$i]="${kernel_versions[$i]}-$DIST" done echo "${kernel_versions[@]}" + if [[ "$had_errors" == true ]]; then + return 1 + fi } diff --git a/tests/scripts/findkernelversion.sh b/tests/scripts/findkernelversion.sh index 9731a39c7..a7687e9c6 100755 --- a/tests/scripts/findkernelversion.sh +++ b/tests/scripts/findkernelversion.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [[ $# -ne 4 ]]; then - echo " KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL are required" +if [[ $# -lt 4 ]]; then + echo " KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL [FORCE_REBUILD] are required" exit 1 fi @@ -9,6 +9,7 @@ export KERNEL_FLAVOR="${1}" export DRIVER_BRANCH="${2}" export DIST="${3}" export LTS_KERNEL="${4}" +FORCE_REBUILD="${5:-false}" export REGCTL_VERSION=v0.7.1 mkdir -p bin @@ -32,12 +33,33 @@ if [ -n "$artifact" ]; then fi # calculate driver tag -status_nvcr=0 -status_ghcr=0 -regctl tag ls nvcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status_nvcr=$? -regctl tag ls ghcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status_ghcr=$? -if [[ $status_nvcr -eq 0 || $status_ghcr -eq 0 ]]; then +nvcr_tags=$(regctl tag ls nvcr.io/nvidia/driver 2>&1) +nvcr_status=$? +if [[ $nvcr_status -ne 0 ]]; then + echo "failed to list tags from nvcr.io/nvidia/driver (exit $nvcr_status): $nvcr_tags" >&2 export should_continue=false -else + export regctl_error=true + return 1 +fi + +if echo "$nvcr_tags" | grep -q "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$"; then + echo "image tag ${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} already exists on nvcr.io - rebuild not allowed" >&2 + export should_continue=false +elif [[ "$FORCE_REBUILD" == "true" ]]; then + echo "force rebuild requested for ${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}" >&2 export should_continue=true +else + ghcr_tags=$(regctl tag ls ghcr.io/nvidia/driver 2>&1) + ghcr_status=$? + if [[ $ghcr_status -ne 0 ]]; then + echo "failed to list tags from ghcr.io/nvidia/driver (exit $ghcr_status): $ghcr_tags" >&2 + export should_continue=false + export regctl_error=true + return 1 + fi + if echo "$ghcr_tags" | grep -q "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$"; then + export should_continue=false + else + export should_continue=true + fi fi