From f0268305ae0768feb3b5a7af4c560a106b3ed8cc055f485e9fdc359cc9ac0cfa Mon Sep 17 00:00:00 2001 From: Tyler J King Date: Wed, 15 Apr 2026 20:36:00 -0400 Subject: [PATCH] docs(spire): revocation cascade timing + Keylime SPIRE server config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the trust withdrawal cascade: Keylime breach → posture degraded → sessions downgraded → SPIRE re-attestation fails → SVIDs expire → service mTLS fails → quorum degrades No new code for the cascade — it's emergent from existing re-attestation behavior + the Keylime attestor plugin. SPIRE federation handles cross-edge propagation through standard certificate expiration. Three timing profiles: Standard (~1hr), Enhanced (~15min), Critical (~5min) with SVID TTL configuration guidance. Example SPIRE server config with Keylime attestor + k8s_psat fallback for nodes without hardware TPM. Signed-off-by: Tyler King Signed-off-by: Tyler J King --- deploy/cascade-timing.md | 63 ++++++++++++++++++++++++++++ deploy/spire-server-keylime.yaml | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 deploy/cascade-timing.md create mode 100644 deploy/spire-server-keylime.yaml diff --git a/deploy/cascade-timing.md b/deploy/cascade-timing.md new file mode 100644 index 0000000..a422fe2 --- /dev/null +++ b/deploy/cascade-timing.md @@ -0,0 +1,63 @@ +# Revocation Cascade Timing + +When Keylime detects a TPM attestation failure, trust withdrawal cascades +through the stack automatically. No custom revocation protocol — each +layer reacts to the posture change produced by the layer above it. + +## Cascade Stages + +| # | Stage | Default Interval | Trigger | +|---|-------|-----------------|---------| +| 1 | Keylime detects failure | 300s (pull) / varies (push) | TPM quote mismatch, IMA violation, boot integrity | +| 2 | Posture evaluator updates ConfigMap | 300s | Keylime verdict changes | +| 3 | Bascule session downgrade | 30s | posture-current ConfigMap poll | +| 4 | SPIRE re-attestation fails | varies | Keylime attestor reads ConfigMap | +| 5 | SVID expiry (identity loss) | 3600s (1 hour) | SPIRE agent can't renew | +| 6 | Service mTLS failures | immediate after 5 | Peer cert expired/missing | +| 7 | Quorum degradation | immediate after 6 | SPIRE federation — other members see expired SVIDs | + +## Timing Profiles + +### Standard (default) — ~1 hour total cascade + +Acceptable for most workloads. SVID TTL provides grace period for +transient failures. + +### Enhanced — ~15 minute total cascade + +```yaml +# SVID TTL +default_x509_svid_ttl: 15m +# SPIRE re-attestation (implicit via SVID rotation interval) +# Keylime poll: default 300s is fine +``` + +Higher renewal overhead, faster trust withdrawal. + +### Critical — ~5 minute total cascade + +```yaml +default_x509_svid_ttl: 5m +``` + +Significant renewal overhead. Use only for high-security workloads +where fast trust withdrawal justifies the cost. + +## Cross-Edge (Quorum) Propagation + +When one quorum member's edge fails Keylime attestation: + +1. That edge's SPIRE server stops issuing SVIDs (Keylime attestor rejects) +2. Existing SVIDs expire (per TTL) +3. Other members' services see TLS handshake failures (expired peer cert) +4. No explicit cross-member notification needed — SPIRE federation's + standard certificate lifecycle handles it + +This is automatic. SPIRE federation trusts Member A's SVIDs via the +bundle exchange. When those SVIDs expire and aren't renewed, Member B's +services can't verify them. + +## Configuration + +See `spire-server-keylime.yaml` for the SPIRE server configuration with +the Keylime node attestor plugin and timing parameters. diff --git a/deploy/spire-server-keylime.yaml b/deploy/spire-server-keylime.yaml new file mode 100644 index 0000000..0148dd9 --- /dev/null +++ b/deploy/spire-server-keylime.yaml @@ -0,0 +1,72 @@ +# SPIRE Server configuration with Keylime node attestor. +# +# Replaces tpm_devid attestor with Keylime-backed attestation. +# Keylime is the single TPM authority; this plugin queries its +# attestation results from the posture-current ConfigMap. +# +# See deploy/cascade-timing.md for revocation cascade timing. + +server: + bind_address: 0.0.0.0 + bind_port: 8081 + data_dir: /var/lib/spire/server + log_level: INFO + trust_domain: guild-a.guildhouse.io + ca_ttl: 8760h # 1 year + default_x509_svid_ttl: 1h + default_jwt_svid_ttl: 5m + +plugins: + NodeAttestor: + # Primary: Keylime-backed attestation for nodes with hardware TPM. + # Reads posture-current ConfigMap via volume mount. + keylime: + plugin_cmd: /opt/spire/plugins/keylime-attestor + plugin_data: + source: configmap + posture_configmap_path: /var/run/posture/posture-current + max_attestation_age_secs: 600 + + # Fallback: K8s PSAT for cloud nodes without hardware TPM. + k8s_psat: + plugin_data: + clusters: + local: + service_account_allow_list: + - spire:spire-agent + + KeyManager: + guildhouse_substrate: + plugin_cmd: /opt/spire/plugins/substrate-keymanager + plugin_data: + trust_domain: guild-a.guildhouse.io + governance_addr: governance.quartermaster.svc.cluster.local:50051 + notary_addr: notary.quartermaster.svc.cluster.local:50051 + cluster_id: guild-a + governance_epoch_seconds: 300 + + CredentialComposer: + guildhouse_ssh: + plugin_cmd: /opt/spire/plugins/ssh-credential-composer + plugin_data: + trust_domain: guild-a.guildhouse.io + governance_addr: governance.quartermaster.svc.cluster.local:50051 + default_cert_ttl: 5m + max_cert_ttl: 1h + + Notifier: + guildhouse_governance: + plugin_cmd: /opt/spire/plugins/governance-notifier + plugin_data: + governance_addr: governance.quartermaster.svc.cluster.local:50051 + ceremony_addr: ceremony.bascule.svc.cluster.local:50052 + notary_addr: notary.quartermaster.svc.cluster.local:50051 + cluster_id: guild-a + trust_domain: guild-a.guildhouse.io + governance_epoch_seconds: 300 + + DataStore: + sql: + plugin_data: + database_type: sqlite3 + connection_string: /var/lib/spire/server/datastore.sqlite3