diff --git a/Cargo.lock b/Cargo.lock index c324166..5cc561d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -911,6 +911,7 @@ dependencies = [ "sha2", "tempfile", "thiserror 2.0.18", + "tracing", ] [[package]] @@ -1756,9 +1757,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.36" diff --git a/Cargo.toml b/Cargo.toml index 076b6bb..97f0cf3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,3 +23,4 @@ dirs = "5" reedline = "0.38" colored = "2" atty = "0.2" +tracing = "0.1" diff --git a/gsh/src/main.rs b/gsh/src/main.rs index 10be1f3..3d5d0b9 100644 --- a/gsh/src/main.rs +++ b/gsh/src/main.rs @@ -244,10 +244,40 @@ fn run(args: Args) -> Result { eprintln!("gsh: command '{}' not in corpus {} (killswitch active)", command, corpus_cid); return Ok(3); } + libgsh::CorpusCheckResult::ContentMismatch { + command, + corpus_cid, + actual_cid, + path, + } => { + eprintln!( + "gsh: command '{}' content does not match CID {} (found {}, path {}): execution denied (tamper signal)", + command, + corpus_cid, + actual_cid, + path.display() + ); + return Ok(3); + } + libgsh::CorpusCheckResult::ReadFailed { + command, + corpus_cid, + path, + detail, + } => { + eprintln!( + "gsh: command '{}' in corpus {} could not be read for hash verification ({}); execution denied fail-closed (path {})", + command, + corpus_cid, + detail, + path.display() + ); + return Ok(3); + } libgsh::CorpusCheckResult::NotMounted => { eprintln!("gsh: corpus directory not found (host may not have corpus mounted)"); } - _ => {} + libgsh::CorpusCheckResult::Allowed | libgsh::CorpusCheckResult::Ungoverned => {} } if args.dry_run { diff --git a/libgsh/Cargo.toml b/libgsh/Cargo.toml index e7e0d5c..37a793f 100644 --- a/libgsh/Cargo.toml +++ b/libgsh/Cargo.toml @@ -13,6 +13,7 @@ sha2 = { workspace = true } hex = { workspace = true } chrono = { workspace = true } dirs = { workspace = true } +tracing = { workspace = true } [dev-dependencies] diff --git a/libgsh/src/chronicle_events.rs b/libgsh/src/chronicle_events.rs new file mode 100644 index 0000000..9bd8d8f --- /dev/null +++ b/libgsh/src/chronicle_events.rs @@ -0,0 +1,40 @@ +//! Chronicle-shaped event schemas for CID verification (execution path). +//! +//! Stable `event_type` constants emitted via structured tracing with +//! `target: "chronicle"`. The field shape matches what the post-io_uring +//! substrate-chronicle emission API is expected to require, so migrating +//! to direct Chronicle emission is a mechanical translation once that API +//! stabilizes: replace `tracing::warn!(target: "chronicle", ...)` with the +//! emitter call using the same field names. +//! +//! # Event envelope +//! +//! Common fields every event carries: +//! +//! - `event_type`: &'static str — one of the constants below. +//! - `claimed_cid`: &str — the CID under which the binary is authorized. +//! - `actual_cid`: &str — the hash of the binary as it appears on disk +//! (omitted when hashing failed outright). +//! - `context`: &str — binary path (and where available, session id). +//! - `actor`: &str — "gsh" (this process). SPIFFE SVID at execution time +//! is not currently carried; once the mutation schema grows an identity +//! field, this becomes the workload identity. +//! - `severity`: &str — "error" (mismatches are security events). +//! - `detail`: Display — the underlying error string or extra context. +//! +//! # Event types + +/// The binary at the corpus-directory path hashes to a different CID than +/// the one the session was authorized under. This is the tamper signal: +/// either the corpus directory was modified after admission, or the +/// directory name never matched its content and the admission layer did +/// not catch it. Either way, execution is denied. +pub const CID_MISMATCH_EXECUTION_CONTENT_MISMATCH: &str = + "cid_mismatch_execution_content_mismatch"; + +/// The file at the expected path exists but could not be read for +/// hashing (IO error, permissions, truncation). Execution is denied +/// fail-closed; a read we cannot verify is a verification we cannot +/// complete. +pub const CID_MISMATCH_EXECUTION_READ_FAILED: &str = + "cid_mismatch_execution_read_failed"; diff --git a/libgsh/src/corpus.rs b/libgsh/src/corpus.rs index f394164..4fff4d7 100644 --- a/libgsh/src/corpus.rs +++ b/libgsh/src/corpus.rs @@ -1,11 +1,35 @@ //! Corpus directory gate — the live killswitch. +//! +//! Verifies two properties before a binary is allowed to execute: +//! +//! 1. The binary name is present in the corpus directory keyed by CID +//! (existing directory-name check). +//! 2. The binary's on-disk SHA-256 matches the CID its directory is +//! named for (content-verification added as part of the CID-content +//! verification audit fix). +//! +//! Property 2 closes the gap where an attacker with write access to the +//! corpus directory could plant a malicious binary under a legitimate +//! CID and have it execute with that CID's privileges. With content +//! verification, the corpus directory can still be tampered with, but +//! the tampered binary will not run. +//! +//! Mismatches emit Chronicle-shaped structured tracing events +//! (`target: "chronicle"`) with the event_type constants from +//! [`crate::chronicle_events`], so tamper incidents remain forensically +//! complete rather than denied-and-forgotten. -use std::path::Path; +use std::path::{Path, PathBuf}; + +use sha2::{Digest, Sha256}; +use tracing::warn; + +use crate::chronicle_events as events; /// Result of a corpus check. #[derive(Debug)] pub enum CorpusCheckResult { - /// Binary found in corpus — allowed. + /// Binary found in corpus and its content hashes to the expected CID. Allowed, /// Corpus is ungoverned — no check performed. Ungoverned, @@ -13,6 +37,23 @@ pub enum CorpusCheckResult { NotMounted, /// Binary not in corpus directory — denied (killswitch active). Denied { command: String, corpus_cid: String }, + /// Binary present but content does not hash to the expected CID. + /// Denied — the file has been tampered with or was placed under a + /// CID directory that does not match its content. + ContentMismatch { + command: String, + corpus_cid: String, + actual_cid: String, + path: PathBuf, + }, + /// Binary present but could not be read for hashing. Fail-closed: + /// a read we cannot verify is a verification we cannot complete. + ReadFailed { + command: String, + corpus_cid: String, + path: PathBuf, + detail: String, + }, } /// Default corpus base directory. @@ -21,7 +62,9 @@ pub const DEFAULT_CORPUS_BASE: &str = "/opt/substrate/corpus"; /// Check if a command is authorized in the corpus directory. /// /// `base_dir` overrides the default /opt/substrate/corpus (set via GSH_CORPUS_DIR env). -/// Returns Ok(result) always. Caller decides whether to block on Denied. +/// Returns Ok(result) always. Caller decides whether to block on Denied, +/// ContentMismatch, or ReadFailed — all three are execution-denied +/// states. pub fn corpus_check(corpus_cid: &str, command: &str) -> CorpusCheckResult { corpus_check_with_base(corpus_cid, command, DEFAULT_CORPUS_BASE) } @@ -44,12 +87,62 @@ pub fn corpus_check_with_base(corpus_cid: &str, command: &str, base_dir: &str) - .map(|n| n.to_string_lossy().to_string()) .unwrap_or_else(|| cmd_name.to_string()); - if corpus_dir.join(&cmd_name).exists() { - CorpusCheckResult::Allowed - } else { - CorpusCheckResult::Denied { + let binary_path = corpus_dir.join(&cmd_name); + if !binary_path.exists() { + return CorpusCheckResult::Denied { command: cmd_name, corpus_cid: corpus_cid.to_string(), + }; + } + + // Content verification: hash the binary on disk and compare to the + // CID the directory is named for. OCI registries make the admission + // layer's CID→content binding cryptographic; this re-check protects + // against post-admission filesystem tampering. + match std::fs::read(&binary_path) { + Ok(bytes) => { + let actual_cid = format!("sha256:{}", hex::encode(Sha256::digest(&bytes))); + if actual_cid == corpus_cid { + CorpusCheckResult::Allowed + } else { + warn!( + target: "chronicle", + event_type = events::CID_MISMATCH_EXECUTION_CONTENT_MISMATCH, + claimed_cid = corpus_cid, + actual_cid = %actual_cid, + context = %binary_path.display(), + actor = "gsh", + severity = "error", + command = %cmd_name, + "Corpus binary content does not match CID directory name (tamper signal)" + ); + CorpusCheckResult::ContentMismatch { + command: cmd_name, + corpus_cid: corpus_cid.to_string(), + actual_cid, + path: binary_path, + } + } + } + Err(e) => { + let detail = e.to_string(); + warn!( + target: "chronicle", + event_type = events::CID_MISMATCH_EXECUTION_READ_FAILED, + claimed_cid = corpus_cid, + context = %binary_path.display(), + actor = "gsh", + severity = "error", + command = %cmd_name, + detail = %detail, + "Could not read corpus binary for hash verification (fail-closed)" + ); + CorpusCheckResult::ReadFailed { + command: cmd_name, + corpus_cid: corpus_cid.to_string(), + path: binary_path, + detail, + } } } } @@ -58,8 +151,23 @@ pub fn corpus_check_with_base(corpus_cid: &str, command: &str, base_dir: &str) - mod tests { use super::*; + /// Write bytes to `dir/cid/name` and return the CID derived from those + /// bytes so the caller can pass a matching CID for the happy path or + /// a different one to simulate tamper. + fn write_binary(dir: &Path, cid: &str, name: &str, contents: &[u8]) -> PathBuf { + let corpus_dir = dir.join(cid); + std::fs::create_dir_all(&corpus_dir).unwrap(); + let path = corpus_dir.join(name); + std::fs::write(&path, contents).unwrap(); + path + } + + fn cid_of(bytes: &[u8]) -> String { + format!("sha256:{}", hex::encode(Sha256::digest(bytes))) + } + #[test] - fn test_ungoverned_skips_check() { + fn ungoverned_skips_check() { assert!(matches!( corpus_check("sha256:ungoverned", "anything"), CorpusCheckResult::Ungoverned @@ -67,7 +175,7 @@ mod tests { } #[test] - fn test_missing_corpus_dir() { + fn missing_corpus_dir_reports_not_mounted() { assert!(matches!( corpus_check("sha256:nonexistent", "kubectl"), CorpusCheckResult::NotMounted @@ -75,21 +183,52 @@ mod tests { } #[test] - fn test_corpus_with_real_dir() { + fn binary_with_matching_content_is_allowed() { let dir = tempfile::tempdir().unwrap(); - let cid = "sha256:test-corpus"; - let corpus_dir = dir.path().join(cid); - std::fs::create_dir_all(&corpus_dir).unwrap(); - std::fs::write(corpus_dir.join("kubectl"), "").unwrap(); + let contents = b"#!/bin/sh\necho kubectl\n"; + let cid = cid_of(contents); + write_binary(dir.path(), &cid, "kubectl", contents); let base = dir.path().to_str().unwrap(); assert!(matches!( - corpus_check_with_base(cid, "kubectl get pods -n test", base), + corpus_check_with_base(&cid, "kubectl get pods -n test", base), CorpusCheckResult::Allowed )); + } + + #[test] + fn missing_binary_in_corpus_is_denied() { + let dir = tempfile::tempdir().unwrap(); + let contents = b"kubectl"; + let cid = cid_of(contents); + write_binary(dir.path(), &cid, "kubectl", contents); + + let base = dir.path().to_str().unwrap(); assert!(matches!( - corpus_check_with_base(cid, "helm install", base), + corpus_check_with_base(&cid, "helm install", base), CorpusCheckResult::Denied { .. } )); } + + #[test] + fn tampered_content_triggers_content_mismatch() { + let dir = tempfile::tempdir().unwrap(); + let claimed = cid_of(b"original-kubectl-content"); + // Write content that hashes to something OTHER than the claimed CID + // but store it under the claimed CID's directory — the tamper case. + write_binary(dir.path(), &claimed, "kubectl", b"malicious-replacement"); + + let base = dir.path().to_str().unwrap(); + match corpus_check_with_base(&claimed, "kubectl", base) { + CorpusCheckResult::ContentMismatch { + corpus_cid, + actual_cid, + .. + } => { + assert_eq!(corpus_cid, claimed); + assert_ne!(actual_cid, claimed); + } + other => panic!("expected ContentMismatch, got {other:?}"), + } + } } diff --git a/libgsh/src/lib.rs b/libgsh/src/lib.rs index ab35d27..a61113f 100644 --- a/libgsh/src/lib.rs +++ b/libgsh/src/lib.rs @@ -1,4 +1,5 @@ pub mod ac; +pub mod chronicle_events; pub mod classifier; pub mod config; pub mod corpus;