"""Furtka self-update logic. The runtime layout (see also ``webinstaller/app.py`` slice 1b): /opt/furtka/ ├── versions/ │ ├── 26.0-alpha/ first install extracted here │ └── 26.1-alpha/ next version, after one update └── current -> versions/26.1-alpha This module handles the transition between versions. Flow: 1. ``check_update()`` queries the Forgejo releases API for the latest tag. 2. ``prepare_update()`` downloads the tarball + sha256 sidecar, verifies it, extracts into ``/opt/furtka/versions//_staging`` and moves it to ``versions//`` on successful extract. 3. ``apply_update()`` flips ``/opt/furtka/current``, reloads caddy, and restarts furtka-reconcile + furtka-api. Then health-checks the API. On failure it flips the symlink back. The full pipeline is wrapped in ``run_update()`` for the CLI, which also writes stage-by-stage progress to ``/var/lib/furtka/update-state.json`` so the web UI can poll progress without touching the (restarting) API. Paths can be overridden via the ``FURTKA_ROOT`` env var so tests can point the updater at a tmpdir. """ from __future__ import annotations import fcntl import json import os import shutil import subprocess import time import urllib.error import urllib.request from dataclasses import dataclass from pathlib import Path from furtka import _release_common as _rc FORGEJO_HOST = os.environ.get("FURTKA_FORGEJO_HOST", "forgejo.sourcegate.online") FORGEJO_REPO = os.environ.get("FURTKA_FORGEJO_REPO", "daniel/furtka") _FURTKA_ROOT = Path(os.environ.get("FURTKA_ROOT", "/opt/furtka")) _STATE_DIR = Path(os.environ.get("FURTKA_STATE_DIR", "/var/lib/furtka")) _CADDYFILE_LIVE = Path(os.environ.get("FURTKA_CADDYFILE_PATH", "/etc/caddy/Caddyfile")) _CADDY_SNIPPET_DIR = Path( os.environ.get("FURTKA_CADDY_SNIPPET_DIR", str(_CADDYFILE_LIVE.parent / "furtka.d")) ) _CADDY_HTTPS_SNIPPET_DIR = Path( os.environ.get("FURTKA_CADDY_HTTPS_SNIPPET_DIR", str(_CADDYFILE_LIVE.parent / "furtka-https.d")) ) _SYSTEMD_DIR = Path(os.environ.get("FURTKA_SYSTEMD_DIR", "/etc/systemd/system")) _HOSTNAME_FILE = Path(os.environ.get("FURTKA_HOSTNAME_FILE", "/etc/hostname")) _CADDYFILE_HOSTNAME_MARKER = "__FURTKA_HOSTNAME__" class UpdateError(RuntimeError): """Any failure in the update flow that should surface to the caller.""" def furtka_root() -> Path: return _FURTKA_ROOT def versions_dir() -> Path: return furtka_root() / "versions" def current_symlink() -> Path: return furtka_root() / "current" def state_path() -> Path: return _STATE_DIR / "update-state.json" def lock_path() -> Path: return Path(os.environ.get("FURTKA_LOCK_PATH", "/run/furtka/update.lock")) @dataclass(frozen=True) class UpdateCheck: current: str latest: str update_available: bool tarball_url: str | None sha256_url: str | None def read_current_version() -> str: """Return the string in /VERSION, or "dev" if it can't be read.""" try: return (current_symlink() / "VERSION").read_text().strip() or "dev" except (FileNotFoundError, NotADirectoryError, OSError): return "dev" def _forgejo_api(path: str) -> dict | list: return _rc.forgejo_api(FORGEJO_HOST, FORGEJO_REPO, path, error_cls=UpdateError) _version_tuple = _rc.version_tuple def check_update() -> UpdateCheck: """Return current + latest versions and whether an update is available. Forgejo's /releases/latest endpoint skips anything marked as a pre-release, so during the CalVer alpha/beta stage where every tag carries a suffix, that endpoint always 404s. Query the paginated /releases list instead and take the first entry — Forgejo returns them newest-first, including pre-releases. """ current = read_current_version() releases = _forgejo_api("/releases?limit=1") if not isinstance(releases, list) or not releases: raise UpdateError("no releases published yet") release = releases[0] latest = str(release.get("tag_name") or "").strip() if not latest: raise UpdateError("latest release has empty tag_name") tarball_url = None sha256_url = None for asset in release.get("assets") or []: name = asset.get("name") or "" url = asset.get("browser_download_url") or "" if name.endswith(".tar.gz") and "furtka-" in name: tarball_url = url elif name.endswith(".tar.gz.sha256"): sha256_url = url available = latest != current and _version_tuple(latest) > _version_tuple(current) return UpdateCheck( current=current, latest=latest, update_available=available, tarball_url=tarball_url, sha256_url=sha256_url, ) def _download(url: str, dest: Path) -> None: _rc.download(url, dest, error_cls=UpdateError) _sha256_of = _rc.sha256_of def verify_tarball(tarball: Path, expected_sha: str) -> None: _rc.verify_tarball(tarball, expected_sha, error_cls=UpdateError) def _parse_sha256_sidecar(text: str) -> str: return _rc.parse_sha256_sidecar(text, error_cls=UpdateError) def _extract_tarball(tarball: Path, dest: Path) -> str: return _rc.extract_tarball(tarball, dest, error_cls=UpdateError) def _current_hostname() -> str: """Read the box's hostname from /etc/hostname, falling back to 'furtka'. Used to substitute the __FURTKA_HOSTNAME__ marker in the shipped Caddyfile so Caddy's `tls internal` sees a real name to issue a leaf cert for. """ try: name = _HOSTNAME_FILE.read_text().strip() except (FileNotFoundError, PermissionError, OSError): return "furtka" return name or "furtka" def _maybe_migrate_preserve_https() -> None: """26.14 → 26.15 migration: if the box already had the force-HTTPS redirect snippet on disk, that means the user explicitly opted into HTTPS under the old regime. Under the new opt-in regime, HTTPS also requires a separate listener snippet — write it here so the user's HTTPS doesn't silently break when the Caddyfile refresh removes the default hostname block. """ redirect_snippet = _CADDY_SNIPPET_DIR / "redirect.caddyfile" https_snippet = _CADDY_HTTPS_SNIPPET_DIR / "https.caddyfile" if not redirect_snippet.is_file() or https_snippet.is_file(): return hostname = _current_hostname() https_snippet.write_text( f"{hostname}.local, {hostname} {{\n\ttls internal\n\timport furtka_routes\n}}\n" ) def _refresh_caddyfile(source: Path) -> bool: """Copy the shipped Caddyfile to /etc/caddy/ iff it differs. Returns True if the file changed (so caddy needs more than a bare reload). Substitutes __FURTKA_HOSTNAME__ with the current hostname before comparing and writing — same rendering the webinstaller applies at install time, so a self-update lands byte-identical content when nothing else changed. """ if not source.is_file(): return False # Snippet dirs for the /api/furtka/https/force toggle. Pre-HTTPS # installs don't have them; ensure both so the Caddyfile's glob # imports can't trip an older Caddy on missing paths during the # first reload. furtka-https.d is new in 26.15-alpha — older boxes # upgrading across this version line won't have it on disk yet. _CADDY_SNIPPET_DIR.mkdir(mode=0o755, parents=True, exist_ok=True) _CADDY_HTTPS_SNIPPET_DIR.mkdir(mode=0o755, parents=True, exist_ok=True) # Migration: pre-26.15 Caddyfile always served :443 via tls internal, # so a box that had the "force HTTPS" redirect toggle ON relied on # HTTPS being there implicitly. After this Caddyfile refresh the # hostname block is gone, so the redirect would 301 to a dead :443. # Preserve intent by writing the HTTPS listener snippet too. _maybe_migrate_preserve_https() rendered = source.read_text().replace(_CADDYFILE_HOSTNAME_MARKER, _current_hostname()) if _CADDYFILE_LIVE.is_file() and rendered == _CADDYFILE_LIVE.read_text(): return False _CADDYFILE_LIVE.parent.mkdir(parents=True, exist_ok=True) _CADDYFILE_LIVE.write_text(rendered) return True def _link_new_units(unit_dir: Path) -> list[str]: """`systemctl link` any unit file in unit_dir that isn't already symlinked into /etc/systemd/system/. Returns the list of newly-linked unit names. Newly-linked `.timer` units are additionally `systemctl enable`d so that a self-update introducing a timer (e.g. 26.5 → 26.6 adding furtka-catalog-sync.timer) activates it automatically — the installer's enable list only applies to fresh installs. A linked-but-disabled timer never fires on its own, so without this step catalog sync would never happen on upgraded boxes. """ if not unit_dir.is_dir(): return [] linked = [] for unit_file in sorted(unit_dir.iterdir()): if unit_file.suffix not in (".service", ".timer"): continue target = _SYSTEMD_DIR / unit_file.name if target.exists() or target.is_symlink(): continue _run(["systemctl", "link", str(unit_file)]) if unit_file.suffix == ".timer": _run(["systemctl", "enable", unit_file.name]) linked.append(unit_file.name) return linked def write_state(stage: str, **extra) -> None: state_path().parent.mkdir(parents=True, exist_ok=True) tmp = state_path().with_suffix(".tmp") payload = {"stage": stage, "updated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"), **extra} tmp.write_text(json.dumps(payload, indent=2)) tmp.replace(state_path()) def read_state() -> dict: try: return json.loads(state_path().read_text()) except (FileNotFoundError, json.JSONDecodeError): return {} def acquire_lock(): path = lock_path() path.parent.mkdir(parents=True, exist_ok=True) fh = path.open("w") try: fcntl.flock(fh, fcntl.LOCK_EX | fcntl.LOCK_NB) except BlockingIOError as e: fh.close() raise UpdateError("another update is already in progress") from e return fh def _run(cmd: list[str]) -> None: proc = subprocess.run(cmd, capture_output=True, text=True, check=False) if proc.returncode != 0: raise UpdateError( f"{' '.join(cmd)} exited {proc.returncode}: {(proc.stderr or proc.stdout).strip()}" ) def _health_check(url: str, deadline_s: float = 30.0) -> bool: """Poll *url* until we get *any* response from the Python server. Treats any 2xx-4xx response as "server is up". A 401 on /api/apps after the 26.11-alpha auth-guard shipped is a perfectly valid signal that the new code imported + the socket is listening — rejecting the request is still "alive". Only 5xx or connection- level failures count as unhealthy. Rationale: pre-26.13 this function hit /api/apps and expected 200, which silently broke every upgrade across the auth boundary (26.10 → 26.11+) and auto-rolled back. Now we just need proof the new process came up. """ end = time.time() + deadline_s while time.time() < end: try: with urllib.request.urlopen(url, timeout=3) as resp: # Any 2xx/3xx → alive. urllib follows redirects by # default, so a 302 → /login resolves to /login's 200. if resp.status < 500: return True except urllib.error.HTTPError as e: # 4xx → server is up, just refused us (auth, bad request, # whatever). Counts as healthy for the "did it come back" # check. 5xx → genuinely broken, don't accept. if 400 <= e.code < 500: return True except urllib.error.URLError: # Connection refused / DNS / timeout → not up yet, retry. pass time.sleep(1) return False def prepare_update(check: UpdateCheck, download_dir: Path | None = None) -> tuple[Path, str]: """Download + verify the tarball. Returns (tarball_path, version).""" if not check.tarball_url or not check.sha256_url: raise UpdateError("release is missing tarball or sha256 asset") dl_dir = download_dir or (_STATE_DIR / "updates") dl_dir.mkdir(parents=True, exist_ok=True) tarball = dl_dir / f"furtka-{check.latest}.tar.gz" sha_file = dl_dir / f"furtka-{check.latest}.tar.gz.sha256" write_state("downloading", latest=check.latest) _download(check.tarball_url, tarball) _download(check.sha256_url, sha_file) write_state("verifying", latest=check.latest) expected = _parse_sha256_sidecar(sha_file.read_text()) verify_tarball(tarball, expected) return tarball, check.latest def apply_update(tarball: Path, version: str) -> None: """Extract, flip the symlink, restart services. Raises on failure. Caller is expected to have verified the sha256 already — but we re-check here against the on-disk file anyway (TOCTOU). """ current = current_symlink() versions = versions_dir() versions.mkdir(parents=True, exist_ok=True) write_state("extracting", latest=version) staging = versions / f"_staging-{version}" if staging.exists(): shutil.rmtree(staging) actual_version = _extract_tarball(tarball, staging) if actual_version != version: shutil.rmtree(staging, ignore_errors=True) raise UpdateError(f"tarball VERSION ({actual_version}) doesn't match expected ({version})") target = versions / version if target.exists(): shutil.rmtree(target) staging.rename(target) # mktemp-style 700 default on the staging dir carries through the # rename; Caddy (non-root) needs 755 to traverse /opt/furtka/current/. target.chmod(0o755) write_state("swapping", latest=version) previous = None if current.is_symlink(): previous = os.readlink(current) current.unlink() try: current.symlink_to(target) except OSError as e: if previous: current.symlink_to(previous) raise UpdateError(f"symlink swap failed: {e}") from e write_state("restarting", latest=version) try: # Copy new Caddyfile into /etc/caddy/ if the release changed routes. # reload always runs afterwards to flush the file-handle cache so the # symlink flip takes effect even when Caddyfile itself didn't change. _refresh_caddyfile(target / "assets" / "Caddyfile") _run(["systemctl", "reload", "caddy"]) # Pick up any new systemd unit files added by this release. Existing # linked units don't need relinking — daemon-reload rereads them. _link_new_units(target / "assets" / "systemd") _run(["systemctl", "daemon-reload"]) _run(["systemctl", "try-restart", "furtka-reconcile.service"]) _run(["systemctl", "restart", "furtka-api.service"]) except UpdateError as e: _rollback(previous, version, f"service restart failed: {e}") raise write_state("verifying", latest=version) ok = _health_check("http://127.0.0.1:7000/api/apps", deadline_s=30.0) if not ok: _rollback(previous, version, "health check failed after restart") raise UpdateError("new version failed health check — rolled back") write_state("done", version=version) def _rollback(previous_target: str | None, failed_version: str, reason: str) -> None: current = current_symlink() if previous_target: if current.is_symlink(): current.unlink() current.symlink_to(previous_target) # Best-effort restart on the previous target — if it fails too the # box is in a hard state, but we can only surface the reason. subprocess.run(["systemctl", "restart", "furtka-api.service"], check=False) write_state( "rolled_back", failed_version=failed_version, restored_to=previous_target or "(none)", reason=reason, ) def run_update() -> UpdateCheck: """End-to-end user-initiated update. Blocks on the lock. Returns the UpdateCheck so callers can see what happened. Re-raises UpdateError on any failure; the state file records the stage. """ with acquire_lock(): check = check_update() if not check.update_available: write_state("done", version=check.current, note="already up to date") return check tarball, version = prepare_update(check) apply_update(tarball, version) return check def rollback() -> str: """Roll back to the most recent non-current version slot. Returns the version we rolled back to, or raises if nothing to roll back to.""" current = current_symlink() if not current.is_symlink(): raise UpdateError("/opt/furtka/current is not a symlink — can't roll back") current_target = Path(os.readlink(current)).name slots = sorted( (p.name for p in versions_dir().iterdir() if p.is_dir() and not p.name.startswith("_")), key=_version_tuple, reverse=True, ) candidates = [s for s in slots if s != current_target] if not candidates: raise UpdateError("no other version slots available to roll back to") target_name = candidates[0] target = versions_dir() / target_name current.unlink() current.symlink_to(target) subprocess.run(["systemctl", "daemon-reload"], check=False) subprocess.run(["systemctl", "restart", "furtka-api.service"], check=False) write_state("rolled_back_manual", restored_to=target_name) return target_name