refactor: deduplicate git_blob_hash via governance-types crate

org-ops-core now re-exports git_blob_hash, git_blob_hash_hex, and
git_blob_cid from the shared governance-types crate. BPF key
helpers remain local. sha1 direct dependency removed (transitive
through governance-types).

Signed-off-by: Tyler King <tking@guildhouse.dev>
This commit is contained in:
Tyler J King 2026-04-12 11:55:29 -04:00
parent 8ed9bf6413
commit 4ce225654d
2 changed files with 15 additions and 60 deletions

View file

@ -11,11 +11,11 @@ serde_json = "1"
anyhow = "1"
reqwest = { version = "0.12", features = ["json", "blocking"] }
rand = "0.8"
sha1 = "0.10"
sha2 = "0.10"
base64 = "0.22"
urlencoding = "2"
uuid = { version = "1", features = ["v4"] }
governance-types = { path = "../../substrate/crates/governance-types" }
hex = "0.4"
[dev-dependencies]

View file

@ -3,49 +3,22 @@
//! Git-compatible content hashing.
//!
//! Computes SHA-1 hashes identical to `git hash-object`, allowing
//! content to be addressed by the same identifier git would assign
//! to it as a blob. This bridges custom CID generation with git's
//! native merkle tree.
//!
//! # Format
//!
//! Git blob hash: `SHA-1("blob {len}\0{content}")`
//!
//! Returned as `"gitsha1:{hex}"` for use in CID fields, or as raw
//! 20-byte / 40-char hex values for direct git interop.
//! Re-exports core hash functions from [`governance_types::git_hash`]
//! and adds BPF map key helpers that are substrate-specific.
use sha1::{Digest, Sha1};
// Re-export from the shared crate.
pub use governance_types::git_hash::{git_blob_cid, git_blob_hash, git_blob_hash_hex};
/// Compute the git blob hash of `content`.
/// Compute the git blob hash as a 40-char hex string.
///
/// Returns the same SHA-1 that `echo -n {content} | git hash-object --stdin`
/// would produce. Result is a 40-character lowercase hex string.
pub fn git_blob_hash(content: &[u8]) -> String {
let header = format!("blob {}\0", content.len());
let mut hasher = Sha1::new();
hasher.update(header.as_bytes());
hasher.update(content);
hex::encode(hasher.finalize())
}
/// Compute the git blob hash and return it with the `gitsha1:` prefix.
///
/// This is the CID format used in CorpusEntry and test evidence fields
/// to indicate the hash was computed using git's blob algorithm.
pub fn git_blob_cid(content: &[u8]) -> String {
format!("gitsha1:{}", git_blob_hash(content))
/// Convenience wrapper matching the previous API that returned `String`.
pub fn git_blob_hash_string(content: &[u8]) -> String {
git_blob_hash_hex(content)
}
/// Extract raw hash bytes (20 bytes) from a git blob hash.
///
/// Returns `None` if the input is not a valid 40-char hex string.
pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] {
let header = format!("blob {}\0", content.len());
let mut hasher = Sha1::new();
hasher.update(header.as_bytes());
hasher.update(content);
hasher.finalize().into()
git_blob_hash(content)
}
/// Parse a CID string and extract the raw hash bytes for BPF map keys.
@ -53,9 +26,9 @@ pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] {
/// Supports both legacy `sha256:{hex}` and new `gitsha1:{hex}` formats.
/// Returns the first 16 bytes of the hash for use as a BPF map key prefix.
///
/// - `gitsha1:{40 hex chars}` first 16 bytes of the SHA-1 hash
/// - `sha256:{64 hex chars}` first 16 bytes of the ASCII prefix (legacy compat)
/// - anything else first 16 bytes of the string (best effort)
/// - `gitsha1:{40 hex chars}` -> first 16 bytes of the SHA-1 hash
/// - `sha256:{64 hex chars}` -> first 16 bytes of the ASCII prefix (legacy compat)
/// - anything else -> first 16 bytes of the string (best effort)
pub fn bpf_key_from_cid(cid: &str) -> [u8; 16] {
let mut key = [0u8; 16];
if let Some(hex_str) = cid.strip_prefix("gitsha1:") {
@ -82,14 +55,13 @@ mod tests {
#[test]
fn test_git_blob_hash_hello_world() {
// Must match: echo -n "hello world" | git hash-object --stdin
let hash = git_blob_hash(b"hello world");
let hash = git_blob_hash_hex(b"hello world");
assert_eq!(hash, "95d09f2b10159347eece71399a7e2e907ea3df4f");
}
#[test]
fn test_git_blob_hash_empty() {
// echo -n "" | git hash-object --stdin
let hash = git_blob_hash(b"");
let hash = git_blob_hash_hex(b"");
assert_eq!(hash, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
}
@ -111,11 +83,9 @@ mod tests {
fn test_bpf_key_from_gitsha1_cid() {
let cid = "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f";
let key = bpf_key_from_cid(cid);
// First 16 bytes of the decoded SHA-1
assert_eq!(key.len(), 16);
let expected = hex::decode("95d09f2b10159347eece71399a7e2e90").unwrap();
assert_eq!(&key[..], &expected[..]);
// Must NOT contain ASCII label bytes
assert!(key[0] != b's' && key[0] != b'g');
}
@ -123,7 +93,6 @@ mod tests {
fn test_bpf_key_from_legacy_sha256_cid() {
let cid = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
let key = bpf_key_from_cid(cid);
// Legacy: first 16 bytes of the ASCII string "sha256:abcdef..."
assert_eq!(&key[..7], b"sha256:");
assert_eq!(key[7], b'a');
}
@ -137,18 +106,4 @@ mod tests {
assert_eq!(key2.len(), 16);
assert_eq!(key3.len(), 16);
}
#[test]
fn test_git_blob_hash_is_40_hex_chars() {
let hash = git_blob_hash(b"any content here");
assert_eq!(hash.len(), 40);
assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn test_git_blob_hash_deterministic() {
let h1 = git_blob_hash(b"test data");
let h2 = git_blob_hash(b"test data");
assert_eq!(h1, h2);
}
}