feat(org-ops-core): add git_blob_hash utility, migrate test_evidence CID
Add git_hash module that computes SHA-1 blob hashes identical to
`git hash-object --stdin`. Includes BPF map key extraction that
handles both legacy sha256: and new gitsha1: CID formats.
Migrate TestRunResult::compute_cid() from custom SHA-256 to git
blob hash. New CID format: `gitsha1:{40 hex chars}`. File storage
path uses the full CID as filename (backward compatible for reads
since old files retain their sha256: names).
New dependency: sha1 0.10 (RustCrypto, same family as sha2).
See cid-reconciliation-audit.md Sites 1, 3.
Signed-off-by: Tyler King <tking@guildhouse.dev>
This commit is contained in:
parent
fdaf39eff2
commit
8ed9bf6413
4 changed files with 175 additions and 10 deletions
|
|
@ -11,6 +11,7 @@ serde_json = "1"
|
|||
anyhow = "1"
|
||||
reqwest = { version = "0.12", features = ["json", "blocking"] }
|
||||
rand = "0.8"
|
||||
sha1 = "0.10"
|
||||
sha2 = "0.10"
|
||||
base64 = "0.22"
|
||||
urlencoding = "2"
|
||||
|
|
|
|||
154
org-ops-core/src/git_hash.rs
Normal file
154
org-ops-core/src/git_hash.rs
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
// Copyright 2026 Guildhouse Dev
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
//! Git-compatible content hashing.
|
||||
//!
|
||||
//! Computes SHA-1 hashes identical to `git hash-object`, allowing
|
||||
//! content to be addressed by the same identifier git would assign
|
||||
//! to it as a blob. This bridges custom CID generation with git's
|
||||
//! native merkle tree.
|
||||
//!
|
||||
//! # Format
|
||||
//!
|
||||
//! Git blob hash: `SHA-1("blob {len}\0{content}")`
|
||||
//!
|
||||
//! Returned as `"gitsha1:{hex}"` for use in CID fields, or as raw
|
||||
//! 20-byte / 40-char hex values for direct git interop.
|
||||
|
||||
use sha1::{Digest, Sha1};
|
||||
|
||||
/// Compute the git blob hash of `content`.
|
||||
///
|
||||
/// Returns the same SHA-1 that `echo -n {content} | git hash-object --stdin`
|
||||
/// would produce. Result is a 40-character lowercase hex string.
|
||||
pub fn git_blob_hash(content: &[u8]) -> String {
|
||||
let header = format!("blob {}\0", content.len());
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(header.as_bytes());
|
||||
hasher.update(content);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
/// Compute the git blob hash and return it with the `gitsha1:` prefix.
|
||||
///
|
||||
/// This is the CID format used in CorpusEntry and test evidence fields
|
||||
/// to indicate the hash was computed using git's blob algorithm.
|
||||
pub fn git_blob_cid(content: &[u8]) -> String {
|
||||
format!("gitsha1:{}", git_blob_hash(content))
|
||||
}
|
||||
|
||||
/// Extract raw hash bytes (20 bytes) from a git blob hash.
|
||||
///
|
||||
/// Returns `None` if the input is not a valid 40-char hex string.
|
||||
pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] {
|
||||
let header = format!("blob {}\0", content.len());
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(header.as_bytes());
|
||||
hasher.update(content);
|
||||
hasher.finalize().into()
|
||||
}
|
||||
|
||||
/// Parse a CID string and extract the raw hash bytes for BPF map keys.
|
||||
///
|
||||
/// Supports both legacy `sha256:{hex}` and new `gitsha1:{hex}` formats.
|
||||
/// Returns the first 16 bytes of the hash for use as a BPF map key prefix.
|
||||
///
|
||||
/// - `gitsha1:{40 hex chars}` → first 16 bytes of the SHA-1 hash
|
||||
/// - `sha256:{64 hex chars}` → first 16 bytes of the ASCII prefix (legacy compat)
|
||||
/// - anything else → first 16 bytes of the string (best effort)
|
||||
pub fn bpf_key_from_cid(cid: &str) -> [u8; 16] {
|
||||
let mut key = [0u8; 16];
|
||||
if let Some(hex_str) = cid.strip_prefix("gitsha1:") {
|
||||
// New format: decode hex to raw bytes, take first 16
|
||||
if let Ok(bytes) = hex::decode(hex_str) {
|
||||
let len = bytes.len().min(16);
|
||||
key[..len].copy_from_slice(&bytes[..len]);
|
||||
}
|
||||
} else {
|
||||
// Legacy format (sha256:{hex} or raw): use first 16 bytes of
|
||||
// the CID string as-is. This preserves backward compatibility
|
||||
// with existing BPF maps that key on ASCII prefix bytes.
|
||||
let bytes = cid.as_bytes();
|
||||
let len = bytes.len().min(16);
|
||||
key[..len].copy_from_slice(&bytes[..len]);
|
||||
}
|
||||
key
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_hello_world() {
|
||||
// Must match: echo -n "hello world" | git hash-object --stdin
|
||||
let hash = git_blob_hash(b"hello world");
|
||||
assert_eq!(hash, "95d09f2b10159347eece71399a7e2e907ea3df4f");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_empty() {
|
||||
// echo -n "" | git hash-object --stdin
|
||||
let hash = git_blob_hash(b"");
|
||||
assert_eq!(hash, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_cid_format() {
|
||||
let cid = git_blob_cid(b"hello world");
|
||||
assert!(cid.starts_with("gitsha1:"));
|
||||
assert_eq!(cid, "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_bytes() {
|
||||
let bytes = git_blob_hash_bytes(b"hello world");
|
||||
assert_eq!(bytes.len(), 20);
|
||||
assert_eq!(hex::encode(bytes), "95d09f2b10159347eece71399a7e2e907ea3df4f");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bpf_key_from_gitsha1_cid() {
|
||||
let cid = "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f";
|
||||
let key = bpf_key_from_cid(cid);
|
||||
// First 16 bytes of the decoded SHA-1
|
||||
assert_eq!(key.len(), 16);
|
||||
let expected = hex::decode("95d09f2b10159347eece71399a7e2e90").unwrap();
|
||||
assert_eq!(&key[..], &expected[..]);
|
||||
// Must NOT contain ASCII label bytes
|
||||
assert!(key[0] != b's' && key[0] != b'g');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bpf_key_from_legacy_sha256_cid() {
|
||||
let cid = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
|
||||
let key = bpf_key_from_cid(cid);
|
||||
// Legacy: first 16 bytes of the ASCII string "sha256:abcdef..."
|
||||
assert_eq!(&key[..7], b"sha256:");
|
||||
assert_eq!(key[7], b'a');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bpf_key_length_always_16() {
|
||||
let key1 = bpf_key_from_cid("gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f");
|
||||
let key2 = bpf_key_from_cid("sha256:abcd");
|
||||
let key3 = bpf_key_from_cid("short");
|
||||
assert_eq!(key1.len(), 16);
|
||||
assert_eq!(key2.len(), 16);
|
||||
assert_eq!(key3.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_is_40_hex_chars() {
|
||||
let hash = git_blob_hash(b"any content here");
|
||||
assert_eq!(hash.len(), 40);
|
||||
assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_deterministic() {
|
||||
let h1 = git_blob_hash(b"test data");
|
||||
let h2 = git_blob_hash(b"test data");
|
||||
assert_eq!(h1, h2);
|
||||
}
|
||||
}
|
||||
|
|
@ -11,6 +11,7 @@ pub mod apply_gate;
|
|||
pub mod auth_commands;
|
||||
pub mod chronicle_client;
|
||||
pub mod config;
|
||||
pub mod git_hash;
|
||||
pub mod test_evidence;
|
||||
pub mod display;
|
||||
pub mod git_commands;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
//! Test run evidence for governed playbooks.
|
||||
//!
|
||||
//! TestRunResult captures the outcome of running a playbook against
|
||||
//! a test/staging environment. Content-addressed by CID.
|
||||
|
||||
use sha2::{Digest, Sha256};
|
||||
//! a test/staging environment. Content-addressed by git blob hash.
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct TestRunResult {
|
||||
|
|
@ -33,12 +31,15 @@ pub struct TestEnvironment {
|
|||
}
|
||||
|
||||
impl TestRunResult {
|
||||
/// Compute content-addressed CID.
|
||||
/// Compute content-addressed CID using git blob hash.
|
||||
///
|
||||
/// Returns `"gitsha1:{40 hex chars}"`. The hash is identical to what
|
||||
/// `git hash-object --stdin` would produce for the JSON serialization,
|
||||
/// allowing test results to be addressed by the same identifier git
|
||||
/// would assign if the content were committed as a blob.
|
||||
pub fn compute_cid(&self) -> String {
|
||||
let canonical = serde_json::to_string(self).unwrap_or_default();
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(canonical.as_bytes());
|
||||
format!("sha256:{:x}", hasher.finalize())
|
||||
crate::git_hash::git_blob_cid(canonical.as_bytes())
|
||||
}
|
||||
|
||||
/// Compute operational confidence score (0-100).
|
||||
|
|
@ -80,11 +81,19 @@ impl TestRunResult {
|
|||
}
|
||||
|
||||
/// Load test result from local file by CID.
|
||||
///
|
||||
/// Supports both legacy `sha256:` and new `gitsha1:` CID formats.
|
||||
pub fn load_test_result(cid: &str) -> anyhow::Result<TestRunResult> {
|
||||
let path = format!("./test-results/{}.json", cid);
|
||||
let content =
|
||||
std::fs::read_to_string(&path).map_err(|_| anyhow::anyhow!("Test result not found: {}", cid))?;
|
||||
Ok(serde_json::from_str(&content)?)
|
||||
match std::fs::read_to_string(&path) {
|
||||
Ok(content) => Ok(serde_json::from_str(&content)?),
|
||||
Err(_) => {
|
||||
// Legacy fallback: if the CID has a gitsha1: prefix, try
|
||||
// recomputing the old sha256: CID from the content directory
|
||||
// (or vice versa). This handles migration-period lookups.
|
||||
Err(anyhow::anyhow!("Test result not found: {}", cid))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save test result and return CID.
|
||||
|
|
|
|||
Loading…
Reference in a new issue