refactor: deduplicate git_blob_hash via governance-types crate
org-ops-core now re-exports git_blob_hash, git_blob_hash_hex, and git_blob_cid from the shared governance-types crate. BPF key helpers remain local. sha1 direct dependency removed (transitive through governance-types). Signed-off-by: Tyler King <tking@guildhouse.dev>
This commit is contained in:
parent
8ed9bf6413
commit
4ce225654d
2 changed files with 15 additions and 60 deletions
|
|
@ -11,11 +11,11 @@ serde_json = "1"
|
|||
anyhow = "1"
|
||||
reqwest = { version = "0.12", features = ["json", "blocking"] }
|
||||
rand = "0.8"
|
||||
sha1 = "0.10"
|
||||
sha2 = "0.10"
|
||||
base64 = "0.22"
|
||||
urlencoding = "2"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
governance-types = { path = "../../substrate/crates/governance-types" }
|
||||
hex = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -3,49 +3,22 @@
|
|||
|
||||
//! Git-compatible content hashing.
|
||||
//!
|
||||
//! Computes SHA-1 hashes identical to `git hash-object`, allowing
|
||||
//! content to be addressed by the same identifier git would assign
|
||||
//! to it as a blob. This bridges custom CID generation with git's
|
||||
//! native merkle tree.
|
||||
//!
|
||||
//! # Format
|
||||
//!
|
||||
//! Git blob hash: `SHA-1("blob {len}\0{content}")`
|
||||
//!
|
||||
//! Returned as `"gitsha1:{hex}"` for use in CID fields, or as raw
|
||||
//! 20-byte / 40-char hex values for direct git interop.
|
||||
//! Re-exports core hash functions from [`governance_types::git_hash`]
|
||||
//! and adds BPF map key helpers that are substrate-specific.
|
||||
|
||||
use sha1::{Digest, Sha1};
|
||||
// Re-export from the shared crate.
|
||||
pub use governance_types::git_hash::{git_blob_cid, git_blob_hash, git_blob_hash_hex};
|
||||
|
||||
/// Compute the git blob hash of `content`.
|
||||
/// Compute the git blob hash as a 40-char hex string.
|
||||
///
|
||||
/// Returns the same SHA-1 that `echo -n {content} | git hash-object --stdin`
|
||||
/// would produce. Result is a 40-character lowercase hex string.
|
||||
pub fn git_blob_hash(content: &[u8]) -> String {
|
||||
let header = format!("blob {}\0", content.len());
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(header.as_bytes());
|
||||
hasher.update(content);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
/// Compute the git blob hash and return it with the `gitsha1:` prefix.
|
||||
///
|
||||
/// This is the CID format used in CorpusEntry and test evidence fields
|
||||
/// to indicate the hash was computed using git's blob algorithm.
|
||||
pub fn git_blob_cid(content: &[u8]) -> String {
|
||||
format!("gitsha1:{}", git_blob_hash(content))
|
||||
/// Convenience wrapper matching the previous API that returned `String`.
|
||||
pub fn git_blob_hash_string(content: &[u8]) -> String {
|
||||
git_blob_hash_hex(content)
|
||||
}
|
||||
|
||||
/// Extract raw hash bytes (20 bytes) from a git blob hash.
|
||||
///
|
||||
/// Returns `None` if the input is not a valid 40-char hex string.
|
||||
pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] {
|
||||
let header = format!("blob {}\0", content.len());
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(header.as_bytes());
|
||||
hasher.update(content);
|
||||
hasher.finalize().into()
|
||||
git_blob_hash(content)
|
||||
}
|
||||
|
||||
/// Parse a CID string and extract the raw hash bytes for BPF map keys.
|
||||
|
|
@ -53,9 +26,9 @@ pub fn git_blob_hash_bytes(content: &[u8]) -> [u8; 20] {
|
|||
/// Supports both legacy `sha256:{hex}` and new `gitsha1:{hex}` formats.
|
||||
/// Returns the first 16 bytes of the hash for use as a BPF map key prefix.
|
||||
///
|
||||
/// - `gitsha1:{40 hex chars}` → first 16 bytes of the SHA-1 hash
|
||||
/// - `sha256:{64 hex chars}` → first 16 bytes of the ASCII prefix (legacy compat)
|
||||
/// - anything else → first 16 bytes of the string (best effort)
|
||||
/// - `gitsha1:{40 hex chars}` -> first 16 bytes of the SHA-1 hash
|
||||
/// - `sha256:{64 hex chars}` -> first 16 bytes of the ASCII prefix (legacy compat)
|
||||
/// - anything else -> first 16 bytes of the string (best effort)
|
||||
pub fn bpf_key_from_cid(cid: &str) -> [u8; 16] {
|
||||
let mut key = [0u8; 16];
|
||||
if let Some(hex_str) = cid.strip_prefix("gitsha1:") {
|
||||
|
|
@ -82,14 +55,13 @@ mod tests {
|
|||
#[test]
|
||||
fn test_git_blob_hash_hello_world() {
|
||||
// Must match: echo -n "hello world" | git hash-object --stdin
|
||||
let hash = git_blob_hash(b"hello world");
|
||||
let hash = git_blob_hash_hex(b"hello world");
|
||||
assert_eq!(hash, "95d09f2b10159347eece71399a7e2e907ea3df4f");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_empty() {
|
||||
// echo -n "" | git hash-object --stdin
|
||||
let hash = git_blob_hash(b"");
|
||||
let hash = git_blob_hash_hex(b"");
|
||||
assert_eq!(hash, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
|
||||
}
|
||||
|
||||
|
|
@ -111,11 +83,9 @@ mod tests {
|
|||
fn test_bpf_key_from_gitsha1_cid() {
|
||||
let cid = "gitsha1:95d09f2b10159347eece71399a7e2e907ea3df4f";
|
||||
let key = bpf_key_from_cid(cid);
|
||||
// First 16 bytes of the decoded SHA-1
|
||||
assert_eq!(key.len(), 16);
|
||||
let expected = hex::decode("95d09f2b10159347eece71399a7e2e90").unwrap();
|
||||
assert_eq!(&key[..], &expected[..]);
|
||||
// Must NOT contain ASCII label bytes
|
||||
assert!(key[0] != b's' && key[0] != b'g');
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +93,6 @@ mod tests {
|
|||
fn test_bpf_key_from_legacy_sha256_cid() {
|
||||
let cid = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
|
||||
let key = bpf_key_from_cid(cid);
|
||||
// Legacy: first 16 bytes of the ASCII string "sha256:abcdef..."
|
||||
assert_eq!(&key[..7], b"sha256:");
|
||||
assert_eq!(key[7], b'a');
|
||||
}
|
||||
|
|
@ -137,18 +106,4 @@ mod tests {
|
|||
assert_eq!(key2.len(), 16);
|
||||
assert_eq!(key3.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_is_40_hex_chars() {
|
||||
let hash = git_blob_hash(b"any content here");
|
||||
assert_eq!(hash.len(), 40);
|
||||
assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_blob_hash_deterministic() {
|
||||
let h1 = git_blob_hash(b"test data");
|
||||
let h2 = git_blob_hash(b"test data");
|
||||
assert_eq!(h1, h2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue