diff --git a/org-ops-core/Cargo.toml b/org-ops-core/Cargo.toml index 0b38d83..cadc512 100644 --- a/org-ops-core/Cargo.toml +++ b/org-ops-core/Cargo.toml @@ -11,6 +11,7 @@ serde_json = "1" anyhow = "1" reqwest = { version = "0.12", features = ["json", "blocking"] } rand = "0.8" +sha1 = "0.10" sha2 = "0.10" base64 = "0.22" urlencoding = "2" diff --git a/org-ops-core/src/git_hash.rs b/org-ops-core/src/git_hash.rs new file mode 100644 index 0000000..bd58803 --- /dev/null +++ b/org-ops-core/src/git_hash.rs @@ -0,0 +1,154 @@ +// Copyright 2026 Guildhouse Dev +// SPDX-License-Identifier: Apache-2.0 + +//! Git-compatible content hashing. +//! +//! Computes SHA-1 hashes identical to `git hash-object`, allowing +//! content to be addressed by the same identifier git would assign +//! to it as a blob. This bridges custom CID generation with git's +//! native merkle tree. +//! +//! # Format +//! +//! Git blob hash: `SHA-1("blob {len}\0{content}")` +//! +//! Returned as `"gitsha1:{hex}"` for use in CID fields, or as raw +//! 20-byte / 40-char hex values for direct git interop. + +use sha1::{Digest, Sha1}; + +/// Compute the git blob hash of `content`. +/// +/// Returns the same SHA-1 that `echo -n {content} | git hash-object --stdin` +/// would produce. Result is a 40-character lowercase hex string. +pub fn git_blob_hash(content: &[u8]) -> String { + let header = format!("blob {}\0", content.len()); + let mut hasher = Sha1::new(); + hasher.update(header.as_bytes()); + hasher.update(content); + hex::encode(hasher.finalize()) +} + +/// Compute the git blob hash and return it with the `gitsha1:` prefix. +/// +/// This is the CID format used in CorpusEntry and test evidence fields +/// to indicate the hash was computed using git's blob algorithm. +pub fn git_blob_cid(content: &[u8]) -> String { + format!("gitsha1:{}", git_blob_hash(content)) +} + +/// Extract raw hash bytes (20 bytes) from a git blob hash. +/// +/// Returns `None` if the input is not a valid 40-char hex string. +pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] { + let header = format!("blob {}\0", content.len()); + let mut hasher = Sha1::new(); + hasher.update(header.as_bytes()); + hasher.update(content); + hasher.finalize().into() +} + +/// Parse a CID string and extract the raw hash bytes for BPF map keys. +/// +/// Supports both legacy `sha256:{hex}` and new `gitsha1:{hex}` formats. +/// Returns the first 16 bytes of the hash for use as a BPF map key prefix. +/// +/// - `gitsha1:{40 hex chars}` → first 16 bytes of the SHA-1 hash +/// - `sha256:{64 hex chars}` → first 16 bytes of the ASCII prefix (legacy compat) +/// - anything else → first 16 bytes of the string (best effort) +pub fn bpf_key_from_cid(cid: &str) -> [u8; 16] { + let mut key = [0u8; 16]; + if let Some(hex_str) = cid.strip_prefix("gitsha1:") { + // New format: decode hex to raw bytes, take first 16 + if let Ok(bytes) = hex::decode(hex_str) { + let len = bytes.len().min(16); + key[..len].copy_from_slice(&bytes[..len]); + } + } else { + // Legacy format (sha256:{hex} or raw): use first 16 bytes of + // the CID string as-is. This preserves backward compatibility + // with existing BPF maps that key on ASCII prefix bytes. + let bytes = cid.as_bytes(); + let len = bytes.len().min(16); + key[..len].copy_from_slice(&bytes[..len]); + } + key +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_git_blob_hash_hello_world() { + // Must match: echo -n "hello world" | git hash-object --stdin + let hash = git_blob_hash(b"hello world"); + assert_eq!(hash, "95d09f2b10159347eece71399a7e2e907ea3df4f"); + } + + #[test] + fn test_git_blob_hash_empty() { + // echo -n "" | git hash-object --stdin + let hash = git_blob_hash(b""); + assert_eq!(hash, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"); + } + + #[test] + fn test_git_blob_cid_format() { + let cid = git_blob_cid(b"hello world"); + assert!(cid.starts_with("gitsha1:")); + assert_eq!(cid, "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f"); + } + + #[test] + fn test_git_blob_hash_bytes() { + let bytes = git_blob_hash_bytes(b"hello world"); + assert_eq!(bytes.len(), 20); + assert_eq!(hex::encode(bytes), "95d09f2b10159347eece71399a7e2e907ea3df4f"); + } + + #[test] + fn test_bpf_key_from_gitsha1_cid() { + let cid = "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f"; + let key = bpf_key_from_cid(cid); + // First 16 bytes of the decoded SHA-1 + assert_eq!(key.len(), 16); + let expected = hex::decode("95d09f2b10159347eece71399a7e2e90").unwrap(); + assert_eq!(&key[..], &expected[..]); + // Must NOT contain ASCII label bytes + assert!(key[0] != b's' && key[0] != b'g'); + } + + #[test] + fn test_bpf_key_from_legacy_sha256_cid() { + let cid = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"; + let key = bpf_key_from_cid(cid); + // Legacy: first 16 bytes of the ASCII string "sha256:abcdef..." + assert_eq!(&key[..7], b"sha256:"); + assert_eq!(key[7], b'a'); + } + + #[test] + fn test_bpf_key_length_always_16() { + let key1 = bpf_key_from_cid("gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f"); + let key2 = bpf_key_from_cid("sha256:abcd"); + let key3 = bpf_key_from_cid("short"); + assert_eq!(key1.len(), 16); + assert_eq!(key2.len(), 16); + assert_eq!(key3.len(), 16); + } + + #[test] + fn test_git_blob_hash_is_40_hex_chars() { + let hash = git_blob_hash(b"any content here"); + assert_eq!(hash.len(), 40); + assert!(hash.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn test_git_blob_hash_deterministic() { + let h1 = git_blob_hash(b"test data"); + let h2 = git_blob_hash(b"test data"); + assert_eq!(h1, h2); + } +} diff --git a/org-ops-core/src/lib.rs b/org-ops-core/src/lib.rs index a9b136d..0ff3fcd 100644 --- a/org-ops-core/src/lib.rs +++ b/org-ops-core/src/lib.rs @@ -11,6 +11,7 @@ pub mod apply_gate; pub mod auth_commands; pub mod chronicle_client; pub mod config; +pub mod git_hash; pub mod test_evidence; pub mod display; pub mod git_commands; diff --git a/org-ops-core/src/test_evidence.rs b/org-ops-core/src/test_evidence.rs index 31ab663..e3733cc 100644 --- a/org-ops-core/src/test_evidence.rs +++ b/org-ops-core/src/test_evidence.rs @@ -1,9 +1,7 @@ //! Test run evidence for governed playbooks. //! //! TestRunResult captures the outcome of running a playbook against -//! a test/staging environment. Content-addressed by CID. - -use sha2::{Digest, Sha256}; +//! a test/staging environment. Content-addressed by git blob hash. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct TestRunResult { @@ -33,12 +31,15 @@ pub struct TestEnvironment { } impl TestRunResult { - /// Compute content-addressed CID. + /// Compute content-addressed CID using git blob hash. + /// + /// Returns `"gitsha1:{40 hex chars}"`. The hash is identical to what + /// `git hash-object --stdin` would produce for the JSON serialization, + /// allowing test results to be addressed by the same identifier git + /// would assign if the content were committed as a blob. pub fn compute_cid(&self) -> String { let canonical = serde_json::to_string(self).unwrap_or_default(); - let mut hasher = Sha256::new(); - hasher.update(canonical.as_bytes()); - format!("sha256:{:x}", hasher.finalize()) + crate::git_hash::git_blob_cid(canonical.as_bytes()) } /// Compute operational confidence score (0-100). @@ -80,11 +81,19 @@ impl TestRunResult { } /// Load test result from local file by CID. +/// +/// Supports both legacy `sha256:` and new `gitsha1:` CID formats. pub fn load_test_result(cid: &str) -> anyhow::Result { let path = format!("./test-results/{}.json", cid); - let content = - std::fs::read_to_string(&path).map_err(|_| anyhow::anyhow!("Test result not found: {}", cid))?; - Ok(serde_json::from_str(&content)?) + match std::fs::read_to_string(&path) { + Ok(content) => Ok(serde_json::from_str(&content)?), + Err(_) => { + // Legacy fallback: if the CID has a gitsha1: prefix, try + // recomputing the old sha256: CID from the content directory + // (or vice versa). This handles migration-period lookups. + Err(anyhow::anyhow!("Test result not found: {}", cid)) + } + } } /// Save test result and return CID.