furtka/furtka/updater.py
Daniel Maksymilian Syrnicki 8fbe67ffb9
Some checks failed
Build ISO / build-iso (push) Waiting to run
CI / lint (push) Failing after 2m11s
CI / test (push) Successful in 2m8s
CI / validate-json (push) Successful in 55s
CI / markdown-links (push) Successful in 25s
Deploy site / deploy (push) Successful in 8s
fix(https): restore TLS handshake — name hostname + correct PKI path
Closes #10. Two linked bugs in 26.4-alpha's Phase 1 HTTPS made the
force-HTTPS toggle fatal: every SNI handshake on :443 died with
SSL_ERROR_INTERNAL_ERROR_ALERT, so the toggle redirected users from
working HTTP to broken HTTPS.

Root cause 1: bare `:443 { tls internal }` gives Caddy no hostname to
issue a leaf cert for, so /var/lib/caddy/certificates/ stayed empty and
Caddy sent TLS `internal_error` on every handshake. Fix: the :443 block
is now `__FURTKA_HOSTNAME__.local, __FURTKA_HOSTNAME__ { tls internal }`,
with the marker substituted by webinstaller/app.py at install time and
by furtka.updater._refresh_caddyfile on self-update (reads /etc/hostname,
falls back to "furtka"). `auto_https disable_redirects` keeps Caddy's
built-in redirect out of the way of the /settings toggle.

Root cause 2: furtka/https.py and the /rootCA.crt handler both referenced
/var/lib/caddy/.local/share/caddy/pki/authorities/local/ — a path that
doesn't exist. caddy.service sets XDG_DATA_HOME=/var/lib, so Caddy's
storage is /var/lib/caddy/ directly. Fix: both paths corrected.

Verified on the 192.168.178.110 smoke VM by swapping the Caddyfile in,
reloading, handshaking, restoring: TLS 1.3 handshake succeeds, leaf cert
issued under /var/lib/caddy/certificates/local/, /rootCA.crt returns 200.

Tests: new cases assert the Caddyfile ships the hostname placeholder,
the webinstaller substitutes it, _refresh_caddyfile re-substitutes from
/etc/hostname on update, and the asset sets auto_https disable_redirects.
Unit tests still stub the Caddy reload — the real handshake regression
needs a smoke-VM integration test (follow-up, separate from this fix).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 11:39:48 +02:00

457 lines
17 KiB
Python

"""Furtka self-update logic.
The runtime layout (see also ``webinstaller/app.py`` slice 1b):
/opt/furtka/
├── versions/
│ ├── 26.0-alpha/ first install extracted here
│ └── 26.1-alpha/ next version, after one update
└── current -> versions/26.1-alpha
This module handles the transition between versions. Flow:
1. ``check_update()`` queries the Forgejo releases API for the latest tag.
2. ``prepare_update()`` downloads the tarball + sha256 sidecar, verifies it,
extracts into ``/opt/furtka/versions/<ver>/_staging`` and moves it to
``versions/<ver>/`` on successful extract.
3. ``apply_update()`` flips ``/opt/furtka/current``, reloads caddy, and
restarts furtka-reconcile + furtka-api. Then health-checks the API. On
failure it flips the symlink back.
The full pipeline is wrapped in ``run_update()`` for the CLI, which also
writes stage-by-stage progress to ``/var/lib/furtka/update-state.json`` so
the web UI can poll progress without touching the (restarting) API.
Paths can be overridden via the ``FURTKA_ROOT`` env var so tests can point
the updater at a tmpdir.
"""
from __future__ import annotations
import fcntl
import hashlib
import json
import os
import shutil
import subprocess
import tarfile
import time
import urllib.error
import urllib.request
from dataclasses import dataclass
from pathlib import Path
FORGEJO_HOST = os.environ.get("FURTKA_FORGEJO_HOST", "forgejo.sourcegate.online")
FORGEJO_REPO = os.environ.get("FURTKA_FORGEJO_REPO", "daniel/furtka")
_FURTKA_ROOT = Path(os.environ.get("FURTKA_ROOT", "/opt/furtka"))
_STATE_DIR = Path(os.environ.get("FURTKA_STATE_DIR", "/var/lib/furtka"))
_CADDYFILE_LIVE = Path(os.environ.get("FURTKA_CADDYFILE_PATH", "/etc/caddy/Caddyfile"))
_CADDY_SNIPPET_DIR = Path(
os.environ.get("FURTKA_CADDY_SNIPPET_DIR", str(_CADDYFILE_LIVE.parent / "furtka.d"))
)
_SYSTEMD_DIR = Path(os.environ.get("FURTKA_SYSTEMD_DIR", "/etc/systemd/system"))
_HOSTNAME_FILE = Path(os.environ.get("FURTKA_HOSTNAME_FILE", "/etc/hostname"))
_CADDYFILE_HOSTNAME_MARKER = "__FURTKA_HOSTNAME__"
class UpdateError(RuntimeError):
"""Any failure in the update flow that should surface to the caller."""
def furtka_root() -> Path:
return _FURTKA_ROOT
def versions_dir() -> Path:
return furtka_root() / "versions"
def current_symlink() -> Path:
return furtka_root() / "current"
def state_path() -> Path:
return _STATE_DIR / "update-state.json"
def lock_path() -> Path:
return Path(os.environ.get("FURTKA_LOCK_PATH", "/run/furtka/update.lock"))
@dataclass(frozen=True)
class UpdateCheck:
current: str
latest: str
update_available: bool
tarball_url: str | None
sha256_url: str | None
def read_current_version() -> str:
"""Return the string in <current>/VERSION, or "dev" if it can't be read."""
try:
return (current_symlink() / "VERSION").read_text().strip() or "dev"
except (FileNotFoundError, NotADirectoryError, OSError):
return "dev"
def _forgejo_api(path: str) -> dict:
url = f"https://{FORGEJO_HOST}/api/v1/repos/{FORGEJO_REPO}{path}"
req = urllib.request.Request(url, headers={"Accept": "application/json"})
try:
with urllib.request.urlopen(req, timeout=15) as resp:
return json.loads(resp.read())
except (urllib.error.URLError, json.JSONDecodeError) as e:
raise UpdateError(f"forgejo api {url}: {e}") from e
def _version_tuple(v: str) -> tuple:
"""Compare CalVer tags like 26.1-alpha < 26.1-beta < 26.1 < 26.2-alpha.
The "stable" release (no suffix) sorts after its own pre-releases. Uses a
tuple of (year, release, stage-rank, stage-tag). Stage rank: alpha=0,
beta=1, rc=2, stable=3, unknown=-1.
"""
stage_rank = {"alpha": 0, "beta": 1, "rc": 2}
head, _, suffix = v.partition("-")
try:
year_str, release_str = head.split(".", 1)
year = int(year_str)
release = int(release_str)
except (ValueError, IndexError):
return (-1, -1, -1, v)
if not suffix:
return (year, release, 3, "")
for name, rank in stage_rank.items():
if suffix.startswith(name):
return (year, release, rank, suffix)
return (year, release, -1, suffix)
def check_update() -> UpdateCheck:
"""Return current + latest versions and whether an update is available.
Forgejo's /releases/latest endpoint skips anything marked as a
pre-release, so during the CalVer alpha/beta stage where every tag
carries a suffix, that endpoint always 404s. Query the paginated
/releases list instead and take the first entry — Forgejo returns
them newest-first, including pre-releases.
"""
current = read_current_version()
releases = _forgejo_api("/releases?limit=1")
if not isinstance(releases, list) or not releases:
raise UpdateError("no releases published yet")
release = releases[0]
latest = str(release.get("tag_name") or "").strip()
if not latest:
raise UpdateError("latest release has empty tag_name")
tarball_url = None
sha256_url = None
for asset in release.get("assets") or []:
name = asset.get("name") or ""
url = asset.get("browser_download_url") or ""
if name.endswith(".tar.gz") and "furtka-" in name:
tarball_url = url
elif name.endswith(".tar.gz.sha256"):
sha256_url = url
available = latest != current and _version_tuple(latest) > _version_tuple(current)
return UpdateCheck(
current=current,
latest=latest,
update_available=available,
tarball_url=tarball_url,
sha256_url=sha256_url,
)
def _download(url: str, dest: Path) -> None:
dest.parent.mkdir(parents=True, exist_ok=True)
req = urllib.request.Request(url)
try:
with urllib.request.urlopen(req, timeout=60) as resp, dest.open("wb") as f:
shutil.copyfileobj(resp, f)
except urllib.error.URLError as e:
raise UpdateError(f"download {url}: {e}") from e
def _sha256_of(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
h.update(chunk)
return h.hexdigest()
def verify_tarball(tarball: Path, expected_sha: str) -> None:
actual = _sha256_of(tarball)
if actual != expected_sha:
raise UpdateError(f"sha256 mismatch: expected {expected_sha}, got {actual}")
def _parse_sha256_sidecar(text: str) -> str:
"""Extract the hash from a standard `sha256sum` sidecar line."""
line = text.strip().split("\n", 1)[0].strip()
if not line:
raise UpdateError("empty sha256 sidecar")
return line.split()[0]
def _extract_tarball(tarball: Path, dest: Path) -> str:
"""Extract the tarball and return the VERSION read from its root."""
dest.mkdir(parents=True, exist_ok=True)
with tarfile.open(tarball, "r:gz") as tf:
# defensive: refuse entries that would escape dest
for member in tf.getmembers():
if member.name.startswith(("/", "..")) or ".." in Path(member.name).parts:
raise UpdateError(f"refusing tarball entry {member.name!r}")
# Python 3.12+ grew a stricter default filter; opt into it where
# available to catch symlink-escape / device-node / setuid tricks
# that our regex check can't see. Older Pythons fall back to the
# historical permissive behaviour.
try:
tf.extractall(dest, filter="data")
except TypeError:
tf.extractall(dest)
version_file = dest / "VERSION"
if not version_file.is_file():
raise UpdateError("tarball has no VERSION file at root")
return version_file.read_text().strip()
def _current_hostname() -> str:
"""Read the box's hostname from /etc/hostname, falling back to 'furtka'.
Used to substitute the __FURTKA_HOSTNAME__ marker in the shipped Caddyfile
so Caddy's `tls internal` sees a real name to issue a leaf cert for.
"""
try:
name = _HOSTNAME_FILE.read_text().strip()
except (FileNotFoundError, PermissionError, OSError):
return "furtka"
return name or "furtka"
def _refresh_caddyfile(source: Path) -> bool:
"""Copy the shipped Caddyfile to /etc/caddy/ iff it differs. Returns True
if the file changed (so caddy needs more than a bare reload).
Substitutes __FURTKA_HOSTNAME__ with the current hostname before comparing
and writing — same rendering the webinstaller applies at install time, so
a self-update lands byte-identical content when nothing else changed.
"""
if not source.is_file():
return False
# Snippet dir for the /api/furtka/https/force toggle. Pre-HTTPS installs
# don't have this dir; ensure it so the Caddyfile's glob import can't
# trip an older Caddy on a missing path during the first reload.
_CADDY_SNIPPET_DIR.mkdir(mode=0o755, parents=True, exist_ok=True)
rendered = source.read_text().replace(_CADDYFILE_HOSTNAME_MARKER, _current_hostname())
if _CADDYFILE_LIVE.is_file() and rendered == _CADDYFILE_LIVE.read_text():
return False
_CADDYFILE_LIVE.parent.mkdir(parents=True, exist_ok=True)
_CADDYFILE_LIVE.write_text(rendered)
return True
def _link_new_units(unit_dir: Path) -> list[str]:
"""`systemctl link` any unit file in unit_dir that isn't already symlinked
into /etc/systemd/system/. Returns the list of newly-linked unit names."""
if not unit_dir.is_dir():
return []
linked = []
for unit_file in sorted(unit_dir.iterdir()):
if unit_file.suffix not in (".service", ".timer"):
continue
target = _SYSTEMD_DIR / unit_file.name
if target.exists() or target.is_symlink():
continue
_run(["systemctl", "link", str(unit_file)])
linked.append(unit_file.name)
return linked
def write_state(stage: str, **extra) -> None:
state_path().parent.mkdir(parents=True, exist_ok=True)
tmp = state_path().with_suffix(".tmp")
payload = {"stage": stage, "updated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"), **extra}
tmp.write_text(json.dumps(payload, indent=2))
tmp.replace(state_path())
def read_state() -> dict:
try:
return json.loads(state_path().read_text())
except (FileNotFoundError, json.JSONDecodeError):
return {}
def acquire_lock():
path = lock_path()
path.parent.mkdir(parents=True, exist_ok=True)
fh = path.open("w")
try:
fcntl.flock(fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
except BlockingIOError as e:
fh.close()
raise UpdateError("another update is already in progress") from e
return fh
def _run(cmd: list[str]) -> None:
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
if proc.returncode != 0:
raise UpdateError(
f"{' '.join(cmd)} exited {proc.returncode}: {(proc.stderr or proc.stdout).strip()}"
)
def _health_check(url: str, deadline_s: float = 30.0) -> bool:
end = time.time() + deadline_s
while time.time() < end:
try:
with urllib.request.urlopen(url, timeout=3) as resp:
if resp.status == 200:
return True
except urllib.error.URLError:
pass
time.sleep(1)
return False
def prepare_update(check: UpdateCheck, download_dir: Path | None = None) -> tuple[Path, str]:
"""Download + verify the tarball. Returns (tarball_path, version)."""
if not check.tarball_url or not check.sha256_url:
raise UpdateError("release is missing tarball or sha256 asset")
dl_dir = download_dir or (_STATE_DIR / "updates")
dl_dir.mkdir(parents=True, exist_ok=True)
tarball = dl_dir / f"furtka-{check.latest}.tar.gz"
sha_file = dl_dir / f"furtka-{check.latest}.tar.gz.sha256"
write_state("downloading", latest=check.latest)
_download(check.tarball_url, tarball)
_download(check.sha256_url, sha_file)
write_state("verifying", latest=check.latest)
expected = _parse_sha256_sidecar(sha_file.read_text())
verify_tarball(tarball, expected)
return tarball, check.latest
def apply_update(tarball: Path, version: str) -> None:
"""Extract, flip the symlink, restart services. Raises on failure.
Caller is expected to have verified the sha256 already — but we re-check
here against the on-disk file anyway (TOCTOU).
"""
current = current_symlink()
versions = versions_dir()
versions.mkdir(parents=True, exist_ok=True)
write_state("extracting", latest=version)
staging = versions / f"_staging-{version}"
if staging.exists():
shutil.rmtree(staging)
actual_version = _extract_tarball(tarball, staging)
if actual_version != version:
shutil.rmtree(staging, ignore_errors=True)
raise UpdateError(f"tarball VERSION ({actual_version}) doesn't match expected ({version})")
target = versions / version
if target.exists():
shutil.rmtree(target)
staging.rename(target)
# mktemp-style 700 default on the staging dir carries through the
# rename; Caddy (non-root) needs 755 to traverse /opt/furtka/current/.
target.chmod(0o755)
write_state("swapping", latest=version)
previous = None
if current.is_symlink():
previous = os.readlink(current)
current.unlink()
try:
current.symlink_to(target)
except OSError as e:
if previous:
current.symlink_to(previous)
raise UpdateError(f"symlink swap failed: {e}") from e
write_state("restarting", latest=version)
try:
# Copy new Caddyfile into /etc/caddy/ if the release changed routes.
# reload always runs afterwards to flush the file-handle cache so the
# symlink flip takes effect even when Caddyfile itself didn't change.
_refresh_caddyfile(target / "assets" / "Caddyfile")
_run(["systemctl", "reload", "caddy"])
# Pick up any new systemd unit files added by this release. Existing
# linked units don't need relinking — daemon-reload rereads them.
_link_new_units(target / "assets" / "systemd")
_run(["systemctl", "daemon-reload"])
_run(["systemctl", "try-restart", "furtka-reconcile.service"])
_run(["systemctl", "restart", "furtka-api.service"])
except UpdateError as e:
_rollback(previous, version, f"service restart failed: {e}")
raise
write_state("verifying", latest=version)
ok = _health_check("http://127.0.0.1:7000/api/apps", deadline_s=30.0)
if not ok:
_rollback(previous, version, "health check failed after restart")
raise UpdateError("new version failed health check — rolled back")
write_state("done", version=version)
def _rollback(previous_target: str | None, failed_version: str, reason: str) -> None:
current = current_symlink()
if previous_target:
if current.is_symlink():
current.unlink()
current.symlink_to(previous_target)
# Best-effort restart on the previous target — if it fails too the
# box is in a hard state, but we can only surface the reason.
subprocess.run(["systemctl", "restart", "furtka-api.service"], check=False)
write_state(
"rolled_back",
failed_version=failed_version,
restored_to=previous_target or "(none)",
reason=reason,
)
def run_update() -> UpdateCheck:
"""End-to-end user-initiated update. Blocks on the lock.
Returns the UpdateCheck so callers can see what happened. Re-raises
UpdateError on any failure; the state file records the stage.
"""
with acquire_lock():
check = check_update()
if not check.update_available:
write_state("done", version=check.current, note="already up to date")
return check
tarball, version = prepare_update(check)
apply_update(tarball, version)
return check
def rollback() -> str:
"""Roll back to the most recent non-current version slot. Returns the
version we rolled back to, or raises if nothing to roll back to."""
current = current_symlink()
if not current.is_symlink():
raise UpdateError("/opt/furtka/current is not a symlink — can't roll back")
current_target = Path(os.readlink(current)).name
slots = sorted(
(p.name for p in versions_dir().iterdir() if p.is_dir() and not p.name.startswith("_")),
key=_version_tuple,
reverse=True,
)
candidates = [s for s in slots if s != current_target]
if not candidates:
raise UpdateError("no other version slots available to roll back to")
target_name = candidates[0]
target = versions_dir() / target_name
current.unlink()
current.symlink_to(target)
subprocess.run(["systemctl", "daemon-reload"], check=False)
subprocess.run(["systemctl", "restart", "furtka-api.service"], check=False)
write_state("rolled_back_manual", restored_to=target_name)
return target_name