furtka/scripts/smoke-vm.sh
Daniel Maksymilian Syrnicki d499907613
Some checks failed
Build ISO / smoke-vm (push) Blocked by required conditions
Build ISO / build-iso (push) Successful in 24m28s
CI / test (push) Successful in 3m1s
CI / validate-json (push) Successful in 55s
CI / markdown-links (push) Successful in 37s
CI / lint (push) Failing after 13m19s
feat(ci): auto-boot every main-ISO in smoke VM on .165 Proxmox
After build-iso, a new smoke-vm job uploads the freshly built ISO to
the test Proxmox at 192.168.178.165 via PVE API token, boots it in a
fresh VM (VMID range 9000-9099, MAC derived from commit SHA so the
runner can find the DHCP IP by scanning the LAN), and curls :5000 to
confirm the webinstaller answers HTTP 200. Last 5 smoke VMs + their
ISOs are kept for post-mortem; older ones are purged. continue-on-error
on the smoke job so a VM-side flake doesn't mark the ISO build red.

Shortens the feedback loop on ISO regressions from "next manual VM
test session" (days) to "next push" (minutes) — the 2026-04-15/16 VM
sessions each found real boot-time bugs that unit tests missed.

Docs at docs/smoke-vm.md. Requires Forgejo secrets PVE_TEST_HOST and
PVE_TEST_TOKEN (dedicated smoke@pve!ci PVE token, privilege-separated).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 11:41:44 +02:00

214 lines
7.9 KiB
Bash
Executable file

#!/usr/bin/env bash
# Smoke-test a freshly built Furtka live ISO by booting it in a VM on the
# Proxmox test host (defaults to $PVE_TEST_HOST) and checking that the
# webinstaller answers HTTP 200 on :5000.
#
# Usage: ./scripts/smoke-vm.sh <iso-path>
#
# Required env:
# PVE_TEST_HOST IP/hostname of the test node (e.g. 192.168.178.165)
# PVE_TEST_TOKEN "user@realm!tokenid=secret" single string
#
# Optional env:
# PVE_TEST_NODE PVE node name; auto-detected from /nodes if empty
# PVE_TEST_ISO_STORAGE default "local"
# PVE_TEST_DISK_STORAGE default "local-lvm"
# PVE_TEST_BRIDGE default "vmbr0"
# PVE_TEST_VMID_MIN default 9000
# PVE_TEST_VMID_MAX default 9099
# PVE_TEST_KEEP how many past smoke VMs to retain (default 5)
# PVE_TEST_BOOT_TIMEOUT seconds to wait for :5000 (default 180)
# SMOKE_SHA commit SHA used in name/tag/MAC; defaults to git HEAD
#
# Exits 0 iff the ISO booted and :5000 returned 200. Prunes old VMs + ISOs
# after the test regardless of outcome so a failed build's VM stays behind
# for post-mortem (at the cost of the run before it).
set -euo pipefail
ISO_PATH="${1:?usage: $0 <iso-path>}"
[[ -f "$ISO_PATH" ]] || { echo "iso not found: $ISO_PATH" >&2; exit 1; }
: "${PVE_TEST_HOST:?PVE_TEST_HOST must be set}"
: "${PVE_TEST_TOKEN:?PVE_TEST_TOKEN must be set}"
ISO_STORAGE="${PVE_TEST_ISO_STORAGE:-local}"
DISK_STORAGE="${PVE_TEST_DISK_STORAGE:-local-lvm}"
BRIDGE="${PVE_TEST_BRIDGE:-vmbr0}"
VMID_MIN="${PVE_TEST_VMID_MIN:-9000}"
VMID_MAX="${PVE_TEST_VMID_MAX:-9099}"
KEEP="${PVE_TEST_KEEP:-5}"
BOOT_TIMEOUT="${PVE_TEST_BOOT_TIMEOUT:-180}"
SHA="${SMOKE_SHA:-$(git rev-parse HEAD 2>/dev/null || echo unknownunknown)}"
SHORT_SHA="${SHA:0:12}"
API="https://${PVE_TEST_HOST}:8006/api2/json"
api() {
curl --silent --show-error --fail-with-body -k \
--header "Authorization: PVEAPIToken=${PVE_TEST_TOKEN}" \
"$@"
}
# PVE returns {"data": <payload>}; grab .data into a python expression.
jget() {
python3 -c 'import json,sys; print(json.load(sys.stdin)["data"])'
}
# Auto-detect node name if not given: first entry from /nodes.
NODE="${PVE_TEST_NODE:-}"
if [[ -z "$NODE" ]]; then
NODE="$(api "$API/nodes" | python3 -c '
import json, sys
nodes = json.load(sys.stdin)["data"]
if not nodes:
sys.exit("no nodes returned from PVE")
print(nodes[0]["node"])
')"
fi
echo "==> node=$NODE sha=$SHORT_SHA iso=$(basename "$ISO_PATH")"
ISO_NAME="furtka-${SHORT_SHA}.iso"
VOLID="${ISO_STORAGE}:iso/${ISO_NAME}"
# --- Step 1: upload ISO ----------------------------------------------------
# PVE's upload endpoint errors if the file exists. Delete first so re-runs
# on the same SHA (e.g. workflow re-dispatch) work.
if api "$API/nodes/$NODE/storage/$ISO_STORAGE/content/$VOLID" \
--output /dev/null 2>/dev/null; then
echo "==> removing stale ISO $VOLID"
api --request DELETE "$API/nodes/$NODE/storage/$ISO_STORAGE/content/$VOLID" \
--output /dev/null
fi
echo "==> uploading ISO as $ISO_NAME"
api --request POST "$API/nodes/$NODE/storage/$ISO_STORAGE/upload" \
--form "content=iso" \
--form "filename=@${ISO_PATH};filename=${ISO_NAME}" \
> /dev/null
# --- Step 2: pick a free VMID in the reserved range ------------------------
# List VMs on the node, filter by range, pick the lowest integer not in use.
USED="$(api "$API/nodes/$NODE/qemu" | python3 -c '
import json, sys
data = json.load(sys.stdin)["data"]
print(" ".join(str(v["vmid"]) for v in data))
')"
VMID=""
for ((id = VMID_MIN; id <= VMID_MAX; id++)); do
if ! [[ " $USED " == *" $id "* ]]; then
VMID="$id"
break
fi
done
[[ -n "$VMID" ]] || { echo "no free VMID in ${VMID_MIN}-${VMID_MAX}" >&2; exit 1; }
# Derive a stable MAC from the SHA. BC:24:11 is Proxmox's assigned OUI.
MAC_TAIL="$(echo "$SHORT_SHA" | tr 'a-z' 'A-Z' | cut -c1-6)"
MAC="BC:24:11:${MAC_TAIL:0:2}:${MAC_TAIL:2:2}:${MAC_TAIL:4:2}"
echo "==> creating VM $VMID name=furtka-smoke-${SHORT_SHA} mac=$MAC"
api --request POST "$API/nodes/$NODE/qemu" \
--data-urlencode "vmid=$VMID" \
--data-urlencode "name=furtka-smoke-${SHORT_SHA}" \
--data-urlencode "tags=furtka;smoke;sha-${SHORT_SHA}" \
--data-urlencode "cores=2" \
--data-urlencode "memory=4096" \
--data-urlencode "bios=ovmf" \
--data-urlencode "machine=q35" \
--data-urlencode "ostype=l26" \
--data-urlencode "scsihw=virtio-scsi-single" \
--data-urlencode "efidisk0=${DISK_STORAGE}:1,efitype=4m,pre-enrolled-keys=0" \
--data-urlencode "scsi0=${DISK_STORAGE}:20,discard=on,ssd=1" \
--data-urlencode "ide2=${VOLID},media=cdrom" \
--data-urlencode "boot=order=ide2;scsi0" \
--data-urlencode "net0=virtio=${MAC},bridge=${BRIDGE},firewall=0" \
> /dev/null
echo "==> starting VM $VMID"
api --request POST "$API/nodes/$NODE/qemu/$VMID/status/start" > /dev/null
# --- Step 3: discover the VM's IP by MAC -----------------------------------
# The live ISO has no qemu-guest-agent, so PVE can't tell us the IP.
# We scan the LAN from the runner and match on our derived MAC.
MAC_LOWER="$(echo "$MAC" | tr 'A-Z' 'a-z')"
IP=""
deadline=$((SECONDS + 150))
while (( SECONDS < deadline )); do
if command -v arp-scan >/dev/null 2>&1; then
IP="$(sudo arp-scan --localnet --quiet --ignoredups 2>/dev/null \
| awk -v m="$MAC_LOWER" 'tolower($2) == m { print $1; exit }')"
fi
if [[ -z "$IP" ]] && command -v nmap >/dev/null 2>&1; then
sudo nmap -sn -T4 192.168.178.0/24 >/dev/null 2>&1 || true
IP="$(ip neigh show \
| awk -v m="$MAC_LOWER" 'tolower($5) == m && $1 ~ /^[0-9]/ { print $1; exit }')"
fi
[[ -n "$IP" ]] && break
sleep 5
done
if [[ -z "$IP" ]]; then
echo "!! never saw $MAC on the LAN within 150s" >&2
SMOKE_RC=1
else
echo "==> VM $VMID is at $IP (mac $MAC)"
fi
# --- Step 4: smoke the webinstaller ----------------------------------------
SMOKE_RC="${SMOKE_RC:-0}"
if [[ "$SMOKE_RC" -eq 0 ]]; then
echo "==> polling http://${IP}:5000 (timeout ${BOOT_TIMEOUT}s)"
end=$((SECONDS + BOOT_TIMEOUT))
while (( SECONDS < end )); do
if curl --silent --fail --max-time 5 --output /dev/null "http://${IP}:5000/"; then
echo "==> :5000 answered 200 — smoke passed"
SMOKE_RC=0
break
fi
SMOKE_RC=1
sleep 5
done
if [[ "$SMOKE_RC" -ne 0 ]]; then
echo "!! :5000 never returned 200 on ${IP}" >&2
fi
fi
# --- Step 5: prune old smoke VMs + ISOs ------------------------------------
echo "==> pruning smoke VMs, keeping last $KEEP"
# List VMs in the reserved range sorted by vmid desc; drop the first KEEP.
TO_DROP="$(api "$API/nodes/$NODE/qemu" | python3 -c "
import json, sys
lo, hi, keep = ${VMID_MIN}, ${VMID_MAX}, ${KEEP}
vms = [v for v in json.load(sys.stdin)['data']
if lo <= int(v['vmid']) <= hi]
vms.sort(key=lambda v: int(v['vmid']), reverse=True)
for v in vms[keep:]:
print(v['vmid'])
")"
for old in $TO_DROP; do
echo " dropping VM $old"
# Find the ISO the VM was booted from so we can delete it after.
OLD_ISO="$(api "$API/nodes/$NODE/qemu/$old/config" | python3 -c '
import json, sys, re
cfg = json.load(sys.stdin)["data"]
for k in ("ide0","ide1","ide2","ide3","sata0","sata1","sata2","sata3"):
v = cfg.get(k,"")
m = re.match(r"([^,]+),.*media=cdrom", v)
if m and m.group(1).endswith(".iso"):
print(m.group(1)); break
' || true)"
# Stop (ignore errors if already stopped), then purge.
api --request POST "$API/nodes/$NODE/qemu/$old/status/stop" \
--output /dev/null 2>/dev/null || true
# /qemu/<id> DELETE is async; the call returns a UPID but for our purposes
# "fire and forget" is fine — next prune will retry if it didn't land.
api --request DELETE "$API/nodes/$NODE/qemu/$old?purge=1&destroy-unreferenced-disks=1" \
--output /dev/null || echo " (delete of $old failed; skipping)"
if [[ -n "$OLD_ISO" && "$OLD_ISO" != "$VOLID" ]]; then
echo " dropping ISO $OLD_ISO"
api --request DELETE "$API/nodes/$NODE/storage/$ISO_STORAGE/content/$OLD_ISO" \
--output /dev/null 2>/dev/null || true
fi
done
exit "$SMOKE_RC"