From 4ce225654d0f7b2a99ea5d640ea38c0b6cdde02d457b5350c9b5ee03a1c96df9 Mon Sep 17 00:00:00 2001 From: Tyler J King Date: Sun, 12 Apr 2026 11:55:29 -0400 Subject: [PATCH] refactor: deduplicate git_blob_hash via governance-types crate org-ops-core now re-exports git_blob_hash, git_blob_hash_hex, and git_blob_cid from the shared governance-types crate. BPF key helpers remain local. sha1 direct dependency removed (transitive through governance-types). Signed-off-by: Tyler King --- org-ops-core/Cargo.toml | 2 +- org-ops-core/src/git_hash.rs | 73 +++++++----------------------------- 2 files changed, 15 insertions(+), 60 deletions(-) diff --git a/org-ops-core/Cargo.toml b/org-ops-core/Cargo.toml index cadc512..feca025 100644 --- a/org-ops-core/Cargo.toml +++ b/org-ops-core/Cargo.toml @@ -11,11 +11,11 @@ serde_json = "1" anyhow = "1" reqwest = { version = "0.12", features = ["json", "blocking"] } rand = "0.8" -sha1 = "0.10" sha2 = "0.10" base64 = "0.22" urlencoding = "2" uuid = { version = "1", features = ["v4"] } +governance-types = { path = "../../substrate/crates/governance-types" } hex = "0.4" [dev-dependencies] diff --git a/org-ops-core/src/git_hash.rs b/org-ops-core/src/git_hash.rs index bd58803..48b5fa0 100644 --- a/org-ops-core/src/git_hash.rs +++ b/org-ops-core/src/git_hash.rs @@ -3,49 +3,22 @@ //! Git-compatible content hashing. //! -//! Computes SHA-1 hashes identical to `git hash-object`, allowing -//! content to be addressed by the same identifier git would assign -//! to it as a blob. This bridges custom CID generation with git's -//! native merkle tree. -//! -//! # Format -//! -//! Git blob hash: `SHA-1("blob {len}\0{content}")` -//! -//! Returned as `"gitsha1:{hex}"` for use in CID fields, or as raw -//! 20-byte / 40-char hex values for direct git interop. +//! Re-exports core hash functions from [`governance_types::git_hash`] +//! and adds BPF map key helpers that are substrate-specific. -use sha1::{Digest, Sha1}; +// Re-export from the shared crate. +pub use governance_types::git_hash::{git_blob_cid, git_blob_hash, git_blob_hash_hex}; -/// Compute the git blob hash of `content`. +/// Compute the git blob hash as a 40-char hex string. /// -/// Returns the same SHA-1 that `echo -n {content} | git hash-object --stdin` -/// would produce. Result is a 40-character lowercase hex string. -pub fn git_blob_hash(content: &[u8]) -> String { - let header = format!("blob {}\0", content.len()); - let mut hasher = Sha1::new(); - hasher.update(header.as_bytes()); - hasher.update(content); - hex::encode(hasher.finalize()) -} - -/// Compute the git blob hash and return it with the `gitsha1:` prefix. -/// -/// This is the CID format used in CorpusEntry and test evidence fields -/// to indicate the hash was computed using git's blob algorithm. -pub fn git_blob_cid(content: &[u8]) -> String { - format!("gitsha1:{}", git_blob_hash(content)) +/// Convenience wrapper matching the previous API that returned `String`. +pub fn git_blob_hash_string(content: &[u8]) -> String { + git_blob_hash_hex(content) } /// Extract raw hash bytes (20 bytes) from a git blob hash. -/// -/// Returns `None` if the input is not a valid 40-char hex string. pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] { - let header = format!("blob {}\0", content.len()); - let mut hasher = Sha1::new(); - hasher.update(header.as_bytes()); - hasher.update(content); - hasher.finalize().into() + git_blob_hash(content) } /// Parse a CID string and extract the raw hash bytes for BPF map keys. @@ -53,9 +26,9 @@ pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] { /// Supports both legacy `sha256:{hex}` and new `gitsha1:{hex}` formats. /// Returns the first 16 bytes of the hash for use as a BPF map key prefix. /// -/// - `gitsha1:{40 hex chars}` → first 16 bytes of the SHA-1 hash -/// - `sha256:{64 hex chars}` → first 16 bytes of the ASCII prefix (legacy compat) -/// - anything else → first 16 bytes of the string (best effort) +/// - `gitsha1:{40 hex chars}` -> first 16 bytes of the SHA-1 hash +/// - `sha256:{64 hex chars}` -> first 16 bytes of the ASCII prefix (legacy compat) +/// - anything else -> first 16 bytes of the string (best effort) pub fn bpf_key_from_cid(cid: &str) -> [u8; 16] { let mut key = [0u8; 16]; if let Some(hex_str) = cid.strip_prefix("gitsha1:") { @@ -82,14 +55,13 @@ mod tests { #[test] fn test_git_blob_hash_hello_world() { // Must match: echo -n "hello world" | git hash-object --stdin - let hash = git_blob_hash(b"hello world"); + let hash = git_blob_hash_hex(b"hello world"); assert_eq!(hash, "95d09f2b10159347eece71399a7e2e907ea3df4f"); } #[test] fn test_git_blob_hash_empty() { - // echo -n "" | git hash-object --stdin - let hash = git_blob_hash(b""); + let hash = git_blob_hash_hex(b""); assert_eq!(hash, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"); } @@ -111,11 +83,9 @@ mod tests { fn test_bpf_key_from_gitsha1_cid() { let cid = "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f"; let key = bpf_key_from_cid(cid); - // First 16 bytes of the decoded SHA-1 assert_eq!(key.len(), 16); let expected = hex::decode("95d09f2b10159347eece71399a7e2e90").unwrap(); assert_eq!(&key[..], &expected[..]); - // Must NOT contain ASCII label bytes assert!(key[0] != b's' && key[0] != b'g'); } @@ -123,7 +93,6 @@ mod tests { fn test_bpf_key_from_legacy_sha256_cid() { let cid = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"; let key = bpf_key_from_cid(cid); - // Legacy: first 16 bytes of the ASCII string "sha256:abcdef..." assert_eq!(&key[..7], b"sha256:"); assert_eq!(key[7], b'a'); } @@ -137,18 +106,4 @@ mod tests { assert_eq!(key2.len(), 16); assert_eq!(key3.len(), 16); } - - #[test] - fn test_git_blob_hash_is_40_hex_chars() { - let hash = git_blob_hash(b"any content here"); - assert_eq!(hash.len(), 40); - assert!(hash.chars().all(|c| c.is_ascii_hexdigit())); - } - - #[test] - fn test_git_blob_hash_deterministic() { - let h1 = git_blob_hash(b"test data"); - let h2 = git_blob_hash(b"test data"); - assert_eq!(h1, h2); - } }