furtka/scripts/smoke-vm.sh
Daniel Maksymilian Syrnicki f4f7d853ba
Some checks failed
CI / lint (pull_request) Successful in 1m3s
CI / test (pull_request) Successful in 1m23s
CI / markdown-links (pull_request) Has been cancelled
CI / validate-json (pull_request) Has been cancelled
chore(smoke): surface PVE response body on API failure
api() was swallowing Proxmox's error body because callers pipe its
output to /dev/null. With a bare "curl: (22) 403" in the log we can't
tell which permission is missing. Now we capture the response body,
print it to stderr on failure, and only emit it to stdout on success.

No behaviour change on the happy path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 14:06:09 +02:00

226 lines
8.4 KiB
Bash
Executable file

#!/usr/bin/env bash
# Smoke-test a freshly built Furtka live ISO by booting it in a VM on the
# Proxmox test host (defaults to $PVE_TEST_HOST) and checking that the
# webinstaller answers HTTP 200 on :5000.
#
# Usage: ./scripts/smoke-vm.sh <iso-path>
#
# Required env:
# PVE_TEST_HOST IP/hostname of the test node (e.g. 192.168.178.165)
# PVE_TEST_TOKEN "user@realm!tokenid=secret" single string
#
# Optional env:
# PVE_TEST_NODE PVE node name; auto-detected from /nodes if empty
# PVE_TEST_ISO_STORAGE default "local"
# PVE_TEST_DISK_STORAGE default "local-lvm"
# PVE_TEST_BRIDGE default "vmbr0"
# PVE_TEST_VMID_MIN default 9000
# PVE_TEST_VMID_MAX default 9099
# PVE_TEST_KEEP how many past smoke VMs to retain (default 5)
# PVE_TEST_BOOT_TIMEOUT seconds to wait for :5000 (default 180)
# SMOKE_SHA commit SHA used in name/tag/MAC; defaults to git HEAD
#
# Exits 0 iff the ISO booted and :5000 returned 200. Prunes old VMs + ISOs
# after the test regardless of outcome so a failed build's VM stays behind
# for post-mortem (at the cost of the run before it).
set -euo pipefail
ISO_PATH="${1:?usage: $0 <iso-path>}"
[[ -f "$ISO_PATH" ]] || { echo "iso not found: $ISO_PATH" >&2; exit 1; }
: "${PVE_TEST_HOST:?PVE_TEST_HOST must be set}"
: "${PVE_TEST_TOKEN:?PVE_TEST_TOKEN must be set}"
ISO_STORAGE="${PVE_TEST_ISO_STORAGE:-local}"
DISK_STORAGE="${PVE_TEST_DISK_STORAGE:-local-lvm}"
BRIDGE="${PVE_TEST_BRIDGE:-vmbr0}"
VMID_MIN="${PVE_TEST_VMID_MIN:-9000}"
VMID_MAX="${PVE_TEST_VMID_MAX:-9099}"
KEEP="${PVE_TEST_KEEP:-5}"
BOOT_TIMEOUT="${PVE_TEST_BOOT_TIMEOUT:-180}"
SHA="${SMOKE_SHA:-$(git rev-parse HEAD 2>/dev/null || echo unknownunknown)}"
SHORT_SHA="${SHA:0:12}"
API="https://${PVE_TEST_HOST}:8006/api2/json"
api() {
# Wrapper so that on non-2xx we print the PVE response body to stderr
# before bubbling the failure — otherwise `--fail-with-body` output
# gets swallowed by callers that pipe to /dev/null, and you're left
# staring at "curl: (22)" with no idea which permission is missing.
local body rc
body=$(curl --silent --show-error --fail-with-body -k \
--header "Authorization: PVEAPIToken=${PVE_TEST_TOKEN}" \
"$@" 2>&1)
rc=$?
if [[ $rc -ne 0 ]]; then
echo "!! PVE API call failed (rc=$rc)" >&2
echo "!! request: $*" >&2
[[ -n "$body" ]] && echo "!! response: $body" >&2
return $rc
fi
printf '%s' "$body"
}
# PVE returns {"data": <payload>}; grab .data into a python expression.
jget() {
python3 -c 'import json,sys; print(json.load(sys.stdin)["data"])'
}
# Auto-detect node name if not given: first entry from /nodes.
NODE="${PVE_TEST_NODE:-}"
if [[ -z "$NODE" ]]; then
NODE="$(api "$API/nodes" | python3 -c '
import json, sys
nodes = json.load(sys.stdin)["data"]
if not nodes:
sys.exit("no nodes returned from PVE")
print(nodes[0]["node"])
')"
fi
echo "==> node=$NODE sha=$SHORT_SHA iso=$(basename "$ISO_PATH")"
ISO_NAME="furtka-${SHORT_SHA}.iso"
VOLID="${ISO_STORAGE}:iso/${ISO_NAME}"
# --- Step 1: upload ISO (or reuse if same SHA already on PVE) ---------------
# For a given commit SHA the ISO bytes are reproducible, so if furtka-<sha>.iso
# is already in PVE storage from a prior smoke run we reuse it and skip the
# upload. Avoids DELETE-permission friction and shaves ~2 min off re-runs.
if api "$API/nodes/$NODE/storage/$ISO_STORAGE/content/$VOLID" \
--output /dev/null 2>/dev/null; then
echo "==> reusing existing ISO $VOLID"
else
echo "==> uploading ISO as $ISO_NAME"
api --request POST "$API/nodes/$NODE/storage/$ISO_STORAGE/upload" \
--form "content=iso" \
--form "filename=@${ISO_PATH};filename=${ISO_NAME}" \
> /dev/null
fi
# --- Step 2: pick a free VMID in the reserved range ------------------------
# List VMs on the node, filter by range, pick the lowest integer not in use.
USED="$(api "$API/nodes/$NODE/qemu" | python3 -c '
import json, sys
data = json.load(sys.stdin)["data"]
print(" ".join(str(v["vmid"]) for v in data))
')"
VMID=""
for ((id = VMID_MIN; id <= VMID_MAX; id++)); do
if ! [[ " $USED " == *" $id "* ]]; then
VMID="$id"
break
fi
done
[[ -n "$VMID" ]] || { echo "no free VMID in ${VMID_MIN}-${VMID_MAX}" >&2; exit 1; }
# Derive a stable MAC from the SHA. BC:24:11 is Proxmox's assigned OUI.
MAC_TAIL="$(echo "$SHORT_SHA" | tr 'a-z' 'A-Z' | cut -c1-6)"
MAC="BC:24:11:${MAC_TAIL:0:2}:${MAC_TAIL:2:2}:${MAC_TAIL:4:2}"
echo "==> creating VM $VMID name=furtka-smoke-${SHORT_SHA} mac=$MAC"
api --request POST "$API/nodes/$NODE/qemu" \
--data-urlencode "vmid=$VMID" \
--data-urlencode "name=furtka-smoke-${SHORT_SHA}" \
--data-urlencode "tags=furtka;smoke;sha-${SHORT_SHA}" \
--data-urlencode "cores=2" \
--data-urlencode "memory=4096" \
--data-urlencode "bios=ovmf" \
--data-urlencode "machine=q35" \
--data-urlencode "ostype=l26" \
--data-urlencode "scsihw=virtio-scsi-single" \
--data-urlencode "efidisk0=${DISK_STORAGE}:1,efitype=4m,pre-enrolled-keys=0" \
--data-urlencode "scsi0=${DISK_STORAGE}:20,discard=on,ssd=1" \
--data-urlencode "ide2=${VOLID},media=cdrom" \
--data-urlencode "boot=order=ide2;scsi0" \
--data-urlencode "net0=virtio=${MAC},bridge=${BRIDGE},firewall=0" \
> /dev/null
echo "==> starting VM $VMID"
api --request POST "$API/nodes/$NODE/qemu/$VMID/status/start" > /dev/null
# --- Step 3: discover the VM's IP by MAC -----------------------------------
# The live ISO has no qemu-guest-agent, so PVE can't tell us the IP.
# We scan the LAN from the runner and match on our derived MAC.
MAC_LOWER="$(echo "$MAC" | tr 'A-Z' 'a-z')"
IP=""
deadline=$((SECONDS + 150))
while (( SECONDS < deadline )); do
if command -v arp-scan >/dev/null 2>&1; then
IP="$(sudo arp-scan --localnet --quiet --ignoredups 2>/dev/null \
| awk -v m="$MAC_LOWER" 'tolower($2) == m { print $1; exit }')"
fi
if [[ -z "$IP" ]] && command -v nmap >/dev/null 2>&1; then
sudo nmap -sn -T4 192.168.178.0/24 >/dev/null 2>&1 || true
IP="$(ip neigh show \
| awk -v m="$MAC_LOWER" 'tolower($5) == m && $1 ~ /^[0-9]/ { print $1; exit }')"
fi
[[ -n "$IP" ]] && break
sleep 5
done
if [[ -z "$IP" ]]; then
echo "!! never saw $MAC on the LAN within 150s" >&2
SMOKE_RC=1
else
echo "==> VM $VMID is at $IP (mac $MAC)"
fi
# --- Step 4: smoke the webinstaller ----------------------------------------
SMOKE_RC="${SMOKE_RC:-0}"
if [[ "$SMOKE_RC" -eq 0 ]]; then
echo "==> polling http://${IP}:5000 (timeout ${BOOT_TIMEOUT}s)"
end=$((SECONDS + BOOT_TIMEOUT))
while (( SECONDS < end )); do
if curl --silent --fail --max-time 5 --output /dev/null "http://${IP}:5000/"; then
echo "==> :5000 answered 200 — smoke passed"
SMOKE_RC=0
break
fi
SMOKE_RC=1
sleep 5
done
if [[ "$SMOKE_RC" -ne 0 ]]; then
echo "!! :5000 never returned 200 on ${IP}" >&2
fi
fi
# --- Step 5: prune old smoke VMs + ISOs ------------------------------------
echo "==> pruning smoke VMs, keeping last $KEEP"
# List VMs in the reserved range sorted by vmid desc; drop the first KEEP.
TO_DROP="$(api "$API/nodes/$NODE/qemu" | python3 -c "
import json, sys
lo, hi, keep = ${VMID_MIN}, ${VMID_MAX}, ${KEEP}
vms = [v for v in json.load(sys.stdin)['data']
if lo <= int(v['vmid']) <= hi]
vms.sort(key=lambda v: int(v['vmid']), reverse=True)
for v in vms[keep:]:
print(v['vmid'])
")"
for old in $TO_DROP; do
echo " dropping VM $old"
# Find the ISO the VM was booted from so we can delete it after.
OLD_ISO="$(api "$API/nodes/$NODE/qemu/$old/config" | python3 -c '
import json, sys, re
cfg = json.load(sys.stdin)["data"]
for k in ("ide0","ide1","ide2","ide3","sata0","sata1","sata2","sata3"):
v = cfg.get(k,"")
m = re.match(r"([^,]+),.*media=cdrom", v)
if m and m.group(1).endswith(".iso"):
print(m.group(1)); break
' || true)"
# Stop (ignore errors if already stopped), then purge.
api --request POST "$API/nodes/$NODE/qemu/$old/status/stop" \
--output /dev/null 2>/dev/null || true
# /qemu/<id> DELETE is async; the call returns a UPID but for our purposes
# "fire and forget" is fine — next prune will retry if it didn't land.
api --request DELETE "$API/nodes/$NODE/qemu/$old?purge=1&destroy-unreferenced-disks=1" \
--output /dev/null || echo " (delete of $old failed; skipping)"
if [[ -n "$OLD_ISO" && "$OLD_ISO" != "$VOLID" ]]; then
echo " dropping ISO $OLD_ISO"
api --request DELETE "$API/nodes/$NODE/storage/$ISO_STORAGE/content/$OLD_ISO" \
--output /dev/null 2>/dev/null || true
fi
done
exit "$SMOKE_RC"