//! Corpus directory gate — the live killswitch. //! //! Verifies two properties before a binary is allowed to execute: //! //! 1. The binary name is present in the corpus directory keyed by CID //! (existing directory-name check). //! 2. The binary's on-disk SHA-256 matches the CID its directory is //! named for (content-verification added as part of the CID-content //! verification audit fix). //! //! Property 2 closes the gap where an attacker with write access to the //! corpus directory could plant a malicious binary under a legitimate //! CID and have it execute with that CID's privileges. With content //! verification, the corpus directory can still be tampered with, but //! the tampered binary will not run. //! //! Mismatches emit Chronicle-shaped structured tracing events //! (`target: "chronicle"`) with the event_type constants from //! [`crate::chronicle_events`], so tamper incidents remain forensically //! complete rather than denied-and-forgotten. use std::path::{Path, PathBuf}; use sha2::{Digest, Sha256}; use tracing::warn; use crate::chronicle_events as events; /// Result of a corpus check. #[derive(Debug)] pub enum CorpusCheckResult { /// Binary found in corpus and its content hashes to the expected CID. Allowed, /// Corpus is ungoverned — no check performed. Ungoverned, /// Corpus directory not found on this host (not an error, host may not have it mounted). NotMounted, /// Binary not in corpus directory — denied (killswitch active). Denied { command: String, corpus_cid: String }, /// Binary present but content does not hash to the expected CID. /// Denied — the file has been tampered with or was placed under a /// CID directory that does not match its content. ContentMismatch { command: String, corpus_cid: String, actual_cid: String, path: PathBuf, }, /// Binary present but could not be read for hashing. Fail-closed: /// a read we cannot verify is a verification we cannot complete. ReadFailed { command: String, corpus_cid: String, path: PathBuf, detail: String, }, } /// Default corpus base directory. pub const DEFAULT_CORPUS_BASE: &str = "/opt/substrate/corpus"; /// Check if a command is authorized in the corpus directory. /// /// `base_dir` overrides the default /opt/substrate/corpus (set via GSH_CORPUS_DIR env). /// Returns Ok(result) always. Caller decides whether to block on Denied, /// ContentMismatch, or ReadFailed — all three are execution-denied /// states. pub fn corpus_check(corpus_cid: &str, command: &str) -> CorpusCheckResult { corpus_check_with_base(corpus_cid, command, DEFAULT_CORPUS_BASE) } /// corpus_check with an explicit base directory. pub fn corpus_check_with_base(corpus_cid: &str, command: &str, base_dir: &str) -> CorpusCheckResult { if corpus_cid == "sha256:ungoverned" { return CorpusCheckResult::Ungoverned; } let corpus_dir = Path::new(base_dir).join(corpus_cid); if !corpus_dir.exists() { return CorpusCheckResult::NotMounted; } // Extract command name (first word, basename only): let cmd_name = command.split_whitespace().next().unwrap_or(command); let cmd_name = Path::new(cmd_name) .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_else(|| cmd_name.to_string()); let binary_path = corpus_dir.join(&cmd_name); if !binary_path.exists() { return CorpusCheckResult::Denied { command: cmd_name, corpus_cid: corpus_cid.to_string(), }; } // Content verification: hash the binary on disk and compare to the // CID the directory is named for. OCI registries make the admission // layer's CID→content binding cryptographic; this re-check protects // against post-admission filesystem tampering. match std::fs::read(&binary_path) { Ok(bytes) => { let actual_cid = format!("sha256:{}", hex::encode(Sha256::digest(&bytes))); if actual_cid == corpus_cid { CorpusCheckResult::Allowed } else { warn!( target: "chronicle", event_type = events::CID_MISMATCH_EXECUTION_CONTENT_MISMATCH, claimed_cid = corpus_cid, actual_cid = %actual_cid, context = %binary_path.display(), actor = "gsh", severity = "error", command = %cmd_name, "Corpus binary content does not match CID directory name (tamper signal)" ); CorpusCheckResult::ContentMismatch { command: cmd_name, corpus_cid: corpus_cid.to_string(), actual_cid, path: binary_path, } } } Err(e) => { let detail = e.to_string(); warn!( target: "chronicle", event_type = events::CID_MISMATCH_EXECUTION_READ_FAILED, claimed_cid = corpus_cid, context = %binary_path.display(), actor = "gsh", severity = "error", command = %cmd_name, detail = %detail, "Could not read corpus binary for hash verification (fail-closed)" ); CorpusCheckResult::ReadFailed { command: cmd_name, corpus_cid: corpus_cid.to_string(), path: binary_path, detail, } } } } #[cfg(test)] mod tests { //! Scenario coverage map (execution half of the CID-content //! verification audit fix): //! //! - **Valid CID, content matches: execution allowed** — //! `binary_with_matching_content_is_allowed`. //! - **Valid CID at admission, tampered content at execution: //! execution denies** — `tampered_content_triggers_content_mismatch`. //! - **Missing binary where directory exists: denied (existing //! behavior preserved as sanity check)** — //! `missing_binary_in_corpus_is_denied`. //! - **Binary present but unreadable: denied fail-closed** — //! `unreadable_binary_triggers_read_failed`. //! - **Sentinel: ungoverned CID** — `ungoverned_skips_check`. //! - **Sentinel: corpus directory not mounted on host** — //! `missing_corpus_dir_reports_not_mounted`. //! //! The admission half (forged CID rejected at CRD reconcile) is //! covered in corpus-operator::verifier. use super::*; /// Write bytes to `dir/cid/name` and return the path so the caller can /// pass a matching CID for the happy path or a different one to /// simulate tamper. fn write_binary(dir: &Path, cid: &str, name: &str, contents: &[u8]) -> PathBuf { let corpus_dir = dir.join(cid); std::fs::create_dir_all(&corpus_dir).unwrap(); let path = corpus_dir.join(name); std::fs::write(&path, contents).unwrap(); path } fn cid_of(bytes: &[u8]) -> String { format!("sha256:{}", hex::encode(Sha256::digest(bytes))) } #[test] fn ungoverned_skips_check() { assert!(matches!( corpus_check("sha256:ungoverned", "anything"), CorpusCheckResult::Ungoverned )); } #[test] fn missing_corpus_dir_reports_not_mounted() { assert!(matches!( corpus_check("sha256:nonexistent", "kubectl"), CorpusCheckResult::NotMounted )); } #[test] fn binary_with_matching_content_is_allowed() { let dir = tempfile::tempdir().unwrap(); let contents = b"#!/bin/sh\necho kubectl\n"; let cid = cid_of(contents); write_binary(dir.path(), &cid, "kubectl", contents); let base = dir.path().to_str().unwrap(); assert!(matches!( corpus_check_with_base(&cid, "kubectl get pods -n test", base), CorpusCheckResult::Allowed )); } #[test] fn missing_binary_in_corpus_is_denied() { let dir = tempfile::tempdir().unwrap(); let contents = b"kubectl"; let cid = cid_of(contents); write_binary(dir.path(), &cid, "kubectl", contents); let base = dir.path().to_str().unwrap(); assert!(matches!( corpus_check_with_base(&cid, "helm install", base), CorpusCheckResult::Denied { .. } )); } #[test] fn tampered_content_triggers_content_mismatch() { let dir = tempfile::tempdir().unwrap(); let claimed = cid_of(b"original-kubectl-content"); // Write content that hashes to something OTHER than the claimed CID // but store it under the claimed CID's directory — the tamper case. write_binary(dir.path(), &claimed, "kubectl", b"malicious-replacement"); let base = dir.path().to_str().unwrap(); match corpus_check_with_base(&claimed, "kubectl", base) { CorpusCheckResult::ContentMismatch { corpus_cid, actual_cid, .. } => { assert_eq!(corpus_cid, claimed); assert_ne!(actual_cid, claimed); } other => panic!("expected ContentMismatch, got {other:?}"), } } /// Place a directory at the path where the binary should live; the /// `exists()` check passes but `read()` fails. Verifies the fail-closed /// path: an unreadable binary is denied rather than allowed. #[test] fn unreadable_binary_triggers_read_failed() { let dir = tempfile::tempdir().unwrap(); let claimed = cid_of(b"any-content"); let corpus_dir = dir.path().join(&claimed); // Make a directory at the binary path — it satisfies `exists()` but // `read()` will fail with EISDIR or similar. std::fs::create_dir_all(corpus_dir.join("kubectl")).unwrap(); let base = dir.path().to_str().unwrap(); match corpus_check_with_base(&claimed, "kubectl", base) { CorpusCheckResult::ReadFailed { corpus_cid, command, .. } => { assert_eq!(corpus_cid, claimed); assert_eq!(command, "kubectl"); } other => panic!("expected ReadFailed, got {other:?}"), } } }