diff --git a/CHANGELOG.md b/CHANGELOG.md index ed6b137..90da56b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,42 @@ This project uses calendar versioning: `YY.N-stage` (e.g. `26.0-alpha` = 2026, r ## [Unreleased] +## [26.13-alpha] - 2026-04-21 + +### Fixed + +- **Upgrade path from pre-auth releases actually works.** 26.11-alpha + introduced `from werkzeug.security import ...` in `furtka/auth.py`, + but werkzeug isn't installed on the target system — core runs as + system Python with stdlib only, and `flask>=3.0` in `pyproject.toml` + is never pip-installed on the box. Fresh boxes from the 26.11/26.12 + ISO without a manually-installed werkzeug crashed on import; boxes + upgrading from pre-26.11 got double-broken by that plus the health + check below. Replaced the werkzeug dependency with a stdlib-only + `furtka/passwd.py` that uses `hashlib.pbkdf2_hmac` for new hashes + and parses werkzeug's `scrypt:N:r:p$salt$hex` format for backward + compatibility — existing `users.json` files created on the rare + boxes that did have werkzeug keep working after this upgrade, no + re-setup needed. `from werkzeug.security import ...` is gone from + the import chain entirely; `pyproject.toml`'s flask dep stays only + for the live-ISO webinstaller. +- **Self-update no longer auto-rolls-back when crossing the auth + boundary.** `updater._health_check` pinged `/api/apps` and demanded + a 200, which meant every 26.10 → 26.11+ upgrade hit the post-restart + check, got a 401 (auth guard), and treated that as "server dead" + → rollback. Now any 2xx–4xx response counts as "server alive"; only + connection-level failures or 5xx fail the check. 5xx still fails + rollback because that means the new process is up but broken. +- **Install lock closes its race window.** `POST /api/apps/install` + used to release the fcntl lock immediately after the sync + pre-validation so the systemd-run child could re-acquire it — + leaving a tiny gap where a second POST could slip in, pass the lock + check, and return 202. Both child processes would start, one would + win the in-child lock, the other would die silently. Now the API + also reads `install-state.json` and refuses with 409 if the stage + is non-terminal (`pulling_image`, `creating_volumes`, + `starting_container`). The fcntl lock stays as belt-and-suspenders. + ## [26.12-alpha] - 2026-04-21 ### Changed @@ -243,7 +279,8 @@ First tagged snapshot. Pre-alpha — the installer does not yet boot, but the de - **Containers:** Docker + Compose - **License:** AGPL-3.0 -[Unreleased]: https://forgejo.sourcegate.online/daniel/furtka/compare/26.12-alpha...HEAD +[Unreleased]: https://forgejo.sourcegate.online/daniel/furtka/compare/26.13-alpha...HEAD +[26.13-alpha]: https://forgejo.sourcegate.online/daniel/furtka/releases/tag/26.13-alpha [26.12-alpha]: https://forgejo.sourcegate.online/daniel/furtka/releases/tag/26.12-alpha [26.11-alpha]: https://forgejo.sourcegate.online/daniel/furtka/releases/tag/26.11-alpha [26.10-alpha]: https://forgejo.sourcegate.online/daniel/furtka/releases/tag/26.10-alpha diff --git a/furtka/api.py b/furtka/api.py index 9095754..7f7c479 100644 --- a/furtka/api.py +++ b/furtka/api.py @@ -692,6 +692,9 @@ def _do_get_settings(name): } +_INSTALL_TERMINAL_STAGES = frozenset({"done", "error"}) + + def _do_install(name, settings=None): """Kick off an app install. Synchronous sync-phase + async docker-phase. @@ -703,6 +706,22 @@ def _do_install(name, settings=None): """ import subprocess + # Reject if the state file reports a non-terminal install. The + # fcntl lock below catches the same race, but only *after* the API + # releases it to let the systemd-run child grab it — a competing + # POST can sneak in during that tiny window. Reading the state + # first closes that gap: as long as a previous install hasn't + # written "done" or "error", we refuse. + current_state = install_runner.read_state() + current_stage = current_state.get("stage", "") if isinstance(current_state, dict) else "" + if current_stage and current_stage not in _INSTALL_TERMINAL_STAGES: + return 409, { + "error": ( + f"another install is in progress ({current_state.get('app', '?')}" + f" at {current_stage})" + ) + } + # Fast-fail if another install is already in flight. Lock lives under # /run/ so a previous reboot clears it automatically. try: diff --git a/furtka/auth.py b/furtka/auth.py index 526760a..b14818b 100644 --- a/furtka/auth.py +++ b/furtka/auth.py @@ -1,15 +1,20 @@ """Login-guard primitives for the Furtka UI. -One admin, one password. Passwords are PBKDF2-hashed via werkzeug (already -pulled in by the flask runtime dep), stored in /var/lib/furtka/users.json -with mode 0600. Sessions live in memory — a systemctl restart logs -everyone out again, which is fine for an alpha single-user box. +One admin, one password. Passwords are PBKDF2-SHA256 hashed via +``furtka.passwd`` (stdlib-only — hashlib.pbkdf2_hmac / hashlib.scrypt), +stored in /var/lib/furtka/users.json with mode 0600. Sessions live in +memory — a systemctl restart logs everyone out again, which is fine +for an alpha single-user box. -On upgrade from 26.10-alpha the users.json file does not exist yet; the -api's GET /login detects this via `setup_needed()` and renders a first- -run form that POSTs to /login as if it were a setup submit. Fresh installs -get the file pre-populated by the webinstaller so the setup step is -skipped. +On upgrade from pre-auth Furtka the users.json file does not exist +yet; the api's GET /login detects this via ``setup_needed()`` and +renders a first-run form that POSTs to /login as if it were a setup +submit. Fresh installs get the file pre-populated by the webinstaller +so the setup step is skipped. + +Hash format is compatible with werkzeug.security — 26.11 / 26.12 boxes +that happened to have werkzeug installed can carry their users.json +forward without re-setup; see ``furtka.passwd`` for the scrypt reader. """ from __future__ import annotations @@ -20,8 +25,8 @@ import threading from dataclasses import dataclass from datetime import UTC, datetime, timedelta -from werkzeug.security import check_password_hash, generate_password_hash - +from furtka.passwd import hash_password as _hash_password +from furtka.passwd import verify_password as _verify_password from furtka.paths import users_file COOKIE_NAME = "furtka_session" @@ -29,13 +34,13 @@ COOKIE_TTL_SECONDS = 7 * 24 * 3600 # one week def hash_password(plain: str) -> str: - """PBKDF2-SHA256 via werkzeug. Cost default (~600k iterations).""" - return generate_password_hash(plain) + """PBKDF2-SHA256 via stdlib. 600k iterations (OWASP 2023).""" + return _hash_password(plain) def verify_password(plain: str, hashed: str) -> bool: - # werkzeug's check_password_hash is constant-time. - return check_password_hash(hashed, plain) + """Constant-time compare. Accepts stdlib + legacy werkzeug formats.""" + return _verify_password(plain, hashed) def load_users() -> dict: diff --git a/furtka/passwd.py b/furtka/passwd.py new file mode 100644 index 0000000..9ece1e1 --- /dev/null +++ b/furtka/passwd.py @@ -0,0 +1,95 @@ +"""Stdlib-only password hashing, compatible with werkzeug's hash format. + +Why this exists: 26.11-alpha introduced auth via ``werkzeug.security``, +but the target system doesn't have ``werkzeug`` installed (Core runs as +system Python with only the stdlib — pyproject.toml's ``flask>=3.0`` +dep is never pip-installed on the box). Fresh installs from a 26.11 / +26.12 ISO crashed on import; upgrades from pre-auth versions were +double-broken by that plus a too-strict updater health check. + +Fix: replace werkzeug with stdlib equivalents using the same hash +**format** so existing ``users.json`` files created by 26.11 / 26.12 on +the rare boxes that happened to have werkzeug installed (Medion, .196 +after manual pacman) still verify. + +Format: ``$$`` + - ``pbkdf2::`` — what we generate by default here + - ``scrypt:::

`` — what werkzeug's default produces +Both are implemented via ``hashlib`` which has been stdlib since 3.6. +""" + +from __future__ import annotations + +import hashlib +import hmac +import secrets + +_PBKDF2_HASH = "sha256" +_PBKDF2_ITERATIONS = 600_000 +_SALT_LEN = 16 + + +def hash_password(password: str) -> str: + """Return a ``pbkdf2:sha256:$$`` hash of *password*. + + PBKDF2-SHA256 over UTF-8. 600k iterations — same as werkzeug's + default in the 3.x series, roughly OWASP 2023's recommendation. + """ + if not isinstance(password, str): + raise TypeError("password must be str") + salt = secrets.token_urlsafe(_SALT_LEN)[:_SALT_LEN] + dk = hashlib.pbkdf2_hmac( + _PBKDF2_HASH, password.encode("utf-8"), salt.encode("utf-8"), _PBKDF2_ITERATIONS + ) + return f"pbkdf2:{_PBKDF2_HASH}:{_PBKDF2_ITERATIONS}${salt}${dk.hex()}" + + +def verify_password(password: str, hashed: str) -> bool: + """Constant-time verify *password* against a stored *hashed* value. + + Accepts both our own pbkdf2 hashes and legacy werkzeug scrypt + hashes in ``scrypt:N:r:p$salt$hex`` form — so users.json files + written by 26.11 / 26.12 keep working after upgrade. + """ + if not isinstance(password, str) or not isinstance(hashed, str): + return False + try: + method, salt, expected = hashed.split("$", 2) + except ValueError: + return False + parts = method.split(":") + if not parts: + return False + algo = parts[0] + pw_bytes = password.encode("utf-8") + salt_bytes = salt.encode("utf-8") + try: + if algo == "pbkdf2": + if len(parts) < 3: + return False + inner_hash = parts[1] + iterations = int(parts[2]) + dk = hashlib.pbkdf2_hmac(inner_hash, pw_bytes, salt_bytes, iterations) + elif algo == "scrypt": + # werkzeug: scrypt:N:r:p, dklen=64, maxmem=132 MiB. Without + # the explicit maxmem we'd hit OpenSSL's default memory cap + # and throw ValueError on N >= 32768. + if len(parts) < 4: + return False + n = int(parts[1]) + r = int(parts[2]) + p = int(parts[3]) + dk = hashlib.scrypt( + pw_bytes, + salt=salt_bytes, + n=n, + r=r, + p=p, + dklen=64, + maxmem=132 * 1024 * 1024, + ) + else: + return False + except (ValueError, TypeError, OverflowError): + return False + return hmac.compare_digest(dk.hex(), expected) diff --git a/furtka/updater.py b/furtka/updater.py index 88e2088..b78ea03 100644 --- a/furtka/updater.py +++ b/furtka/updater.py @@ -255,13 +255,35 @@ def _run(cmd: list[str]) -> None: def _health_check(url: str, deadline_s: float = 30.0) -> bool: + """Poll *url* until we get *any* response from the Python server. + + Treats any 2xx-4xx response as "server is up". A 401 on + /api/apps after the 26.11-alpha auth-guard shipped is a perfectly + valid signal that the new code imported + the socket is listening + — rejecting the request is still "alive". Only 5xx or connection- + level failures count as unhealthy. + + Rationale: pre-26.13 this function hit /api/apps and expected 200, + which silently broke every upgrade across the auth boundary (26.10 + → 26.11+) and auto-rolled back. Now we just need proof the new + process came up. + """ end = time.time() + deadline_s while time.time() < end: try: with urllib.request.urlopen(url, timeout=3) as resp: - if resp.status == 200: + # Any 2xx/3xx → alive. urllib follows redirects by + # default, so a 302 → /login resolves to /login's 200. + if resp.status < 500: return True + except urllib.error.HTTPError as e: + # 4xx → server is up, just refused us (auth, bad request, + # whatever). Counts as healthy for the "did it come back" + # check. 5xx → genuinely broken, don't accept. + if 400 <= e.code < 500: + return True except urllib.error.URLError: + # Connection refused / DNS / timeout → not up yet, retry. pass time.sleep(1) return False diff --git a/pyproject.toml b/pyproject.toml index f964c71..f9fea3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "furtka" -version = "26.12-alpha" +version = "26.13-alpha" description = "Open-source home server OS — simple enough for everyone." requires-python = ">=3.11" readme = "README.md" diff --git a/tests/test_api.py b/tests/test_api.py index 1b35d26..84f9371 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -949,6 +949,36 @@ def test_do_install_returns_409_when_locked(fake_dirs, no_docker, no_systemd_run fh.close() +def test_do_install_returns_409_when_state_reports_running(fake_dirs, no_docker, no_systemd_run): + """Closes the race window where _do_install had already released + the fcntl lock (so the systemd-run child could grab it) but a + second POST tried to start a new install while the first was still + mid-flight. The state file's non-terminal stage is the reliable + "someone else is installing" signal.""" + _, bundled = fake_dirs + _write_bundled(bundled, "fileshare", env_example="A=real") + api.install_runner.write_state("pulling_image", app="jellyfin") + status, body = api._do_install("fileshare") + assert status == 409 + assert "in progress" in body["error"] + assert "jellyfin" in body["error"] + assert "pulling_image" in body["error"] + + +def test_do_install_goes_through_after_terminal_state(fake_dirs, no_docker, no_systemd_run): + """After a successful or failed install, the state file stays at + done/error — a new install must be accepted, not blocked.""" + _, bundled = fake_dirs + _write_bundled(bundled, "fileshare", env_example="A=real") + api.install_runner.write_state("done", app="previous", version="1.0.0") + status, _ = api._do_install("fileshare") + assert status == 202 + + api.install_runner.write_state("error", app="previous", error="oops") + status, _ = api._do_install("fileshare") + assert status == 202 + + def test_do_install_status_returns_state(fake_dirs): # Write state directly, then GET it via the status handler. api.install_runner.write_state("pulling_image", app="jellyfin") diff --git a/tests/test_passwd.py b/tests/test_passwd.py new file mode 100644 index 0000000..ee7c0f9 --- /dev/null +++ b/tests/test_passwd.py @@ -0,0 +1,74 @@ +"""Tests for furtka.passwd — stdlib-only password hashing. + +The primary contract: hash/verify roundtrips cleanly, AND the verifier +accepts the werkzeug hash format that 26.11 / 26.12 boxes wrote to +``users.json``. Losing that backward compat would lock out existing +admins after a 26.13+ upgrade. +""" + +from __future__ import annotations + +from furtka import passwd + + +def test_hash_roundtrip(): + h = passwd.hash_password("hunter2") + assert passwd.verify_password("hunter2", h) + assert not passwd.verify_password("wrong", h) + + +def test_hash_is_salted(): + # Two separate hashes of the same password must diverge. + a = passwd.hash_password("same-pw") + b = passwd.hash_password("same-pw") + assert a != b + assert passwd.verify_password("same-pw", a) + assert passwd.verify_password("same-pw", b) + + +def test_generated_hash_format(): + # Shape is pbkdf2::$$ + h = passwd.hash_password("x") + parts = h.split("$", 2) + assert len(parts) == 3 + method, salt, digest = parts + assert method.startswith("pbkdf2:sha256:") + assert salt + # digest is hex of pbkdf2_hmac sha256 → 64 hex chars + assert len(digest) == 64 + assert all(c in "0123456789abcdef" for c in digest) + + +def test_verify_werkzeug_scrypt_hash(): + """Known werkzeug scrypt hash generated by 26.11 / 26.12 boxes. + + Captured live off a .196 test VM after its auth bootstrap: + username=daniel, password=test-admin-pw1 + Hash format: scrypt:32768:8:1$$ + If this regresses, every existing box that upgraded via 26.11 and + set a password gets locked out on the next upgrade. + """ + known = ( + "scrypt:32768:8:1$yWZUqJodowt9ieI1$" + "2d1059b3564da7492b4aa3c2be7fff6fef06085e5e1bfd52e897948c58246b7a" + "9603400355b7264f61c4436eba7bf8c947adec3d7a76be03b50efb4227e15a80" + ) + assert passwd.verify_password("test-admin-pw1", known) + assert not passwd.verify_password("wrong-password", known) + + +def test_verify_rejects_malformed_hashes(): + # Empty / missing delimiters / unknown method / bad int — all False. + assert not passwd.verify_password("x", "") + assert not passwd.verify_password("x", "nothingspecial") + assert not passwd.verify_password("x", "pbkdf2:sha256:600000") # no $salt$digest + assert not passwd.verify_password("x", "pbkdf2$salt$digest") # missing hash + iter + assert not passwd.verify_password("x", "bcrypt:12$salt$digest") # unsupported algo + assert not passwd.verify_password("x", "pbkdf2:sha256:abc$salt$digest") # bad iter int + + +def test_verify_rejects_nonstring_inputs(): + # Defensive: users.json can be corrupted or have nulls. + assert not passwd.verify_password(None, "pbkdf2:sha256:1000$salt$digest") # type: ignore[arg-type] + assert not passwd.verify_password("x", None) # type: ignore[arg-type] + assert not passwd.verify_password("x", 12345) # type: ignore[arg-type] diff --git a/tests/test_updater.py b/tests/test_updater.py index 55ef155..acbaac1 100644 --- a/tests/test_updater.py +++ b/tests/test_updater.py @@ -224,6 +224,76 @@ def test_refresh_caddyfile_substitutes_hostname_placeholder(updater, tmp_path): assert updater._refresh_caddyfile(src) is False +def test_health_check_treats_4xx_as_healthy(updater, monkeypatch): + """26.11+ auth makes /api/apps return 401 on unauth requests. If the + health check treated that as "down", every pre-auth → auth upgrade + auto-rolls back. Server responding at all is enough signal for the + health check.""" + import urllib.error + + calls = {"n": 0} + + class _FakeResp: + def __init__(self, code): + self.status = code + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def raising_401(url, timeout): + calls["n"] += 1 + raise urllib.error.HTTPError(url, 401, "Unauthorized", {}, None) + + monkeypatch.setattr("urllib.request.urlopen", raising_401) + assert updater._health_check("http://127.0.0.1:7000/api/apps", deadline_s=2.0) is True + # One call was enough — early exit on 4xx, no retry loop. + assert calls["n"] == 1 + + +def test_health_check_rejects_5xx(updater, monkeypatch): + """500s mean the server is up but broken — that's NOT healthy. + Distinguishes auth refusals (4xx = healthy) from real runtime + errors (5xx = unhealthy, roll back).""" + import urllib.error + + def raising_500(url, timeout): + raise urllib.error.HTTPError(url, 500, "Internal Server Error", {}, None) + + monkeypatch.setattr("urllib.request.urlopen", raising_500) + assert updater._health_check("http://127.0.0.1:7000/api/apps", deadline_s=1.5) is False + + +def test_health_check_retries_on_connection_refused(updater, monkeypatch): + """While furtka-api is still starting, urlopen raises URLError. + The loop must keep polling until the server comes up or deadline.""" + import urllib.error + + calls = {"n": 0} + + def flaky(url, timeout): + calls["n"] += 1 + if calls["n"] < 3: + raise urllib.error.URLError("connection refused") + + class _Resp: + status = 200 + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + return _Resp() + + monkeypatch.setattr("urllib.request.urlopen", flaky) + assert updater._health_check("http://127.0.0.1:7000/api/apps", deadline_s=10.0) is True + assert calls["n"] == 3 + + def test_current_hostname_falls_back_when_file_missing(updater, monkeypatch, tmp_path): monkeypatch.setenv("FURTKA_HOSTNAME_FILE", str(tmp_path / "missing")) import importlib