gsh/libgsh/src/corpus.rs
Tyler J King 91f027ae61 libgsh: complete scenario coverage for corpus_check execution paths
Adds the ReadFailed scenario (binary path resolves to a directory so
exists() succeeds but read() fails) and a scenarios coverage map at the
top of the test module. The map links each test to the audit fix
scenarios:

- valid CID, content matches: Allowed
- valid CID at admission, tampered content at execution: ContentMismatch
- missing binary where directory exists: Denied (sanity preserved)
- binary present but unreadable: ReadFailed (fail-closed)

Plus the existing sentinels for ungoverned-CID and corpus-not-mounted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Tyler J King <tking@guildhouse.dev>
2026-04-25 03:18:56 -04:00

279 lines
10 KiB
Rust

//! Corpus directory gate — the live killswitch.
//!
//! Verifies two properties before a binary is allowed to execute:
//!
//! 1. The binary name is present in the corpus directory keyed by CID
//! (existing directory-name check).
//! 2. The binary's on-disk SHA-256 matches the CID its directory is
//! named for (content-verification added as part of the CID-content
//! verification audit fix).
//!
//! Property 2 closes the gap where an attacker with write access to the
//! corpus directory could plant a malicious binary under a legitimate
//! CID and have it execute with that CID's privileges. With content
//! verification, the corpus directory can still be tampered with, but
//! the tampered binary will not run.
//!
//! Mismatches emit Chronicle-shaped structured tracing events
//! (`target: "chronicle"`) with the event_type constants from
//! [`crate::chronicle_events`], so tamper incidents remain forensically
//! complete rather than denied-and-forgotten.
use std::path::{Path, PathBuf};
use sha2::{Digest, Sha256};
use tracing::warn;
use crate::chronicle_events as events;
/// Result of a corpus check.
#[derive(Debug)]
pub enum CorpusCheckResult {
/// Binary found in corpus and its content hashes to the expected CID.
Allowed,
/// Corpus is ungoverned — no check performed.
Ungoverned,
/// Corpus directory not found on this host (not an error, host may not have it mounted).
NotMounted,
/// Binary not in corpus directory — denied (killswitch active).
Denied { command: String, corpus_cid: String },
/// Binary present but content does not hash to the expected CID.
/// Denied — the file has been tampered with or was placed under a
/// CID directory that does not match its content.
ContentMismatch {
command: String,
corpus_cid: String,
actual_cid: String,
path: PathBuf,
},
/// Binary present but could not be read for hashing. Fail-closed:
/// a read we cannot verify is a verification we cannot complete.
ReadFailed {
command: String,
corpus_cid: String,
path: PathBuf,
detail: String,
},
}
/// Default corpus base directory.
pub const DEFAULT_CORPUS_BASE: &str = "/opt/substrate/corpus";
/// Check if a command is authorized in the corpus directory.
///
/// `base_dir` overrides the default /opt/substrate/corpus (set via GSH_CORPUS_DIR env).
/// Returns Ok(result) always. Caller decides whether to block on Denied,
/// ContentMismatch, or ReadFailed — all three are execution-denied
/// states.
pub fn corpus_check(corpus_cid: &str, command: &str) -> CorpusCheckResult {
corpus_check_with_base(corpus_cid, command, DEFAULT_CORPUS_BASE)
}
/// corpus_check with an explicit base directory.
pub fn corpus_check_with_base(corpus_cid: &str, command: &str, base_dir: &str) -> CorpusCheckResult {
if corpus_cid == "sha256:ungoverned" {
return CorpusCheckResult::Ungoverned;
}
let corpus_dir = Path::new(base_dir).join(corpus_cid);
if !corpus_dir.exists() {
return CorpusCheckResult::NotMounted;
}
// Extract command name (first word, basename only):
let cmd_name = command.split_whitespace().next().unwrap_or(command);
let cmd_name = Path::new(cmd_name)
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| cmd_name.to_string());
let binary_path = corpus_dir.join(&cmd_name);
if !binary_path.exists() {
return CorpusCheckResult::Denied {
command: cmd_name,
corpus_cid: corpus_cid.to_string(),
};
}
// Content verification: hash the binary on disk and compare to the
// CID the directory is named for. OCI registries make the admission
// layer's CID→content binding cryptographic; this re-check protects
// against post-admission filesystem tampering.
match std::fs::read(&binary_path) {
Ok(bytes) => {
let actual_cid = format!("sha256:{}", hex::encode(Sha256::digest(&bytes)));
if actual_cid == corpus_cid {
CorpusCheckResult::Allowed
} else {
warn!(
target: "chronicle",
event_type = events::CID_MISMATCH_EXECUTION_CONTENT_MISMATCH,
claimed_cid = corpus_cid,
actual_cid = %actual_cid,
context = %binary_path.display(),
actor = "gsh",
severity = "error",
command = %cmd_name,
"Corpus binary content does not match CID directory name (tamper signal)"
);
CorpusCheckResult::ContentMismatch {
command: cmd_name,
corpus_cid: corpus_cid.to_string(),
actual_cid,
path: binary_path,
}
}
}
Err(e) => {
let detail = e.to_string();
warn!(
target: "chronicle",
event_type = events::CID_MISMATCH_EXECUTION_READ_FAILED,
claimed_cid = corpus_cid,
context = %binary_path.display(),
actor = "gsh",
severity = "error",
command = %cmd_name,
detail = %detail,
"Could not read corpus binary for hash verification (fail-closed)"
);
CorpusCheckResult::ReadFailed {
command: cmd_name,
corpus_cid: corpus_cid.to_string(),
path: binary_path,
detail,
}
}
}
}
#[cfg(test)]
mod tests {
//! Scenario coverage map (execution half of the CID-content
//! verification audit fix):
//!
//! - **Valid CID, content matches: execution allowed** —
//! `binary_with_matching_content_is_allowed`.
//! - **Valid CID at admission, tampered content at execution:
//! execution denies** — `tampered_content_triggers_content_mismatch`.
//! - **Missing binary where directory exists: denied (existing
//! behavior preserved as sanity check)** —
//! `missing_binary_in_corpus_is_denied`.
//! - **Binary present but unreadable: denied fail-closed** —
//! `unreadable_binary_triggers_read_failed`.
//! - **Sentinel: ungoverned CID** — `ungoverned_skips_check`.
//! - **Sentinel: corpus directory not mounted on host** —
//! `missing_corpus_dir_reports_not_mounted`.
//!
//! The admission half (forged CID rejected at CRD reconcile) is
//! covered in corpus-operator::verifier.
use super::*;
/// Write bytes to `dir/cid/name` and return the path so the caller can
/// pass a matching CID for the happy path or a different one to
/// simulate tamper.
fn write_binary(dir: &Path, cid: &str, name: &str, contents: &[u8]) -> PathBuf {
let corpus_dir = dir.join(cid);
std::fs::create_dir_all(&corpus_dir).unwrap();
let path = corpus_dir.join(name);
std::fs::write(&path, contents).unwrap();
path
}
fn cid_of(bytes: &[u8]) -> String {
format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
}
#[test]
fn ungoverned_skips_check() {
assert!(matches!(
corpus_check("sha256:ungoverned", "anything"),
CorpusCheckResult::Ungoverned
));
}
#[test]
fn missing_corpus_dir_reports_not_mounted() {
assert!(matches!(
corpus_check("sha256:nonexistent", "kubectl"),
CorpusCheckResult::NotMounted
));
}
#[test]
fn binary_with_matching_content_is_allowed() {
let dir = tempfile::tempdir().unwrap();
let contents = b"#!/bin/sh\necho kubectl\n";
let cid = cid_of(contents);
write_binary(dir.path(), &cid, "kubectl", contents);
let base = dir.path().to_str().unwrap();
assert!(matches!(
corpus_check_with_base(&cid, "kubectl get pods -n test", base),
CorpusCheckResult::Allowed
));
}
#[test]
fn missing_binary_in_corpus_is_denied() {
let dir = tempfile::tempdir().unwrap();
let contents = b"kubectl";
let cid = cid_of(contents);
write_binary(dir.path(), &cid, "kubectl", contents);
let base = dir.path().to_str().unwrap();
assert!(matches!(
corpus_check_with_base(&cid, "helm install", base),
CorpusCheckResult::Denied { .. }
));
}
#[test]
fn tampered_content_triggers_content_mismatch() {
let dir = tempfile::tempdir().unwrap();
let claimed = cid_of(b"original-kubectl-content");
// Write content that hashes to something OTHER than the claimed CID
// but store it under the claimed CID's directory — the tamper case.
write_binary(dir.path(), &claimed, "kubectl", b"malicious-replacement");
let base = dir.path().to_str().unwrap();
match corpus_check_with_base(&claimed, "kubectl", base) {
CorpusCheckResult::ContentMismatch {
corpus_cid,
actual_cid,
..
} => {
assert_eq!(corpus_cid, claimed);
assert_ne!(actual_cid, claimed);
}
other => panic!("expected ContentMismatch, got {other:?}"),
}
}
/// Place a directory at the path where the binary should live; the
/// `exists()` check passes but `read()` fails. Verifies the fail-closed
/// path: an unreadable binary is denied rather than allowed.
#[test]
fn unreadable_binary_triggers_read_failed() {
let dir = tempfile::tempdir().unwrap();
let claimed = cid_of(b"any-content");
let corpus_dir = dir.path().join(&claimed);
// Make a directory at the binary path — it satisfies `exists()` but
// `read()` will fail with EISDIR or similar.
std::fs::create_dir_all(corpus_dir.join("kubectl")).unwrap();
let base = dir.path().to_str().unwrap();
match corpus_check_with_base(&claimed, "kubectl", base) {
CorpusCheckResult::ReadFailed {
corpus_cid,
command,
..
} => {
assert_eq!(corpus_cid, claimed);
assert_eq!(command, "kubectl");
}
other => panic!("expected ReadFailed, got {other:?}"),
}
}
}