// Package governance provides a gRPC client for the Guildhouse GovernanceService // and CeremonyService, used by SPIRE plugins to participate in governed mutations. package governance import ( "context" "crypto/sha256" "crypto/tls" "crypto/x509" "encoding/hex" "encoding/json" "fmt" "log" "os" "time" pb "github.com/guildhouse-cooperative/guildhouse-spire-plugins/gen/quartermaster/v1" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" ) // Config holds governance client configuration. type Config struct { // GovernanceAddr is the gRPC address of the GovernanceService. GovernanceAddr string // CeremonyAddr is the gRPC address of the CeremonyService. CeremonyAddr string // NotaryAddr is the gRPC address of the NotaryService. NotaryAddr string // TLS configuration — REQUIRED for production. // Uses SPIFFE-aware mTLS: the plugin's own SVID authenticates // to Quartermaster services. TLSCertPath string // Path to X.509 SVID certificate TLSKeyPath string // Path to SVID private key TLSCAPath string // Path to trust bundle (CA certificates) TLSRequired bool // If true, NewClient fails without TLS config } // IntentResult holds the result of a CreateIntent call. type IntentResult struct { IntentID string CeremonyID string // non-empty if ceremony required Denied bool Error string } // RedeemResult holds the result of a RedeemIntent call. type RedeemResult struct { Success bool SatHash []byte SatBytes []byte // raw SAT bytes for downstream verification ExpiresAt time.Time // SAT expiry — consumers MUST check before use Status string Error string } // IsExpired returns true if the SAT has expired. func (r *RedeemResult) IsExpired() bool { return !r.ExpiresAt.IsZero() && time.Now().After(r.ExpiresAt) } // CredentialEvent describes a credential lifecycle event for merkle anchoring. // The CredentialFingerprint field binds the merkle leaf to a specific credential, // preventing proof replay across certificates (S-03). type CredentialEvent struct { EventType string // "issue", "rotate", "revoke" IntentID string // governance intent UUID CredentialFingerprint string // SHA-256 of certificate public key bytes, hex-encoded SpiffeID string TenantID string CertSerialNumber uint64 IssuedAt time.Time ExpiresAt time.Time } // CredentialVerification holds the parameters for verifying a credential's // governance provenance via the NotaryService. type CredentialVerification struct { IntentID string // from governance-intent extension CertificatePublicKey []byte // raw public key bytes from the certificate } // VerificationResult holds the result of a credential governance verification. type VerificationResult struct { Governed bool // true if the credential has valid governance provenance AnchorID string // merkle anchor ID FingerprintMatch bool // true if merkle leaf's fingerprint matches the cert Error string } // Client wraps gRPC clients for GovernanceService, CeremonyService, and NotaryService. type Client struct { config Config govConn *grpc.ClientConn notaryConn *grpc.ClientConn govClient pb.GovernanceServiceClient notaryClient pb.QuartermasterNotaryClient } // NewClient creates a governance client with gRPC connections. // // The returned client is lazy-connect: grpc.DialContext is non-blocking, so // NewClient succeeds even when Quartermaster is unreachable. Connection // attempts happen in the background with the exponential backoff configured // in buildDialOptions. RPCs return codes.Unavailable until QM comes up; the // caller is expected to log-and-continue rather than crash the SPIRE plugin. func NewClient(cfg Config) (*Client, error) { if cfg.GovernanceAddr == "" { return nil, fmt.Errorf("governance: governance address is required") } if cfg.TLSRequired { if cfg.TLSCertPath == "" || cfg.TLSKeyPath == "" || cfg.TLSCAPath == "" { return nil, fmt.Errorf("governance: TLS is required but cert/key/ca paths are not configured") } } dialOpts, err := buildDialOptions(cfg) if err != nil { return nil, fmt.Errorf("governance: build dial options: %w", err) } // Non-blocking dial: returns a ClientConn in IDLE state. Connection // attempts are driven in the background by grpc-go's reconnect loop, // using the backoff config set in buildDialOptions. govConn, err := grpc.DialContext(context.Background(), cfg.GovernanceAddr, dialOpts...) if err != nil { return nil, fmt.Errorf("governance: dial %s: %w", cfg.GovernanceAddr, err) } c := &Client{ config: cfg, govConn: govConn, govClient: pb.NewGovernanceServiceClient(govConn), } // NotaryService: defaults to the governance address if not set separately. notaryAddr := cfg.NotaryAddr if notaryAddr == "" { notaryAddr = cfg.GovernanceAddr } notaryConn, err := grpc.DialContext(context.Background(), notaryAddr, dialOpts...) if err != nil { govConn.Close() return nil, fmt.Errorf("governance: dial notary %s: %w", notaryAddr, err) } c.notaryConn = notaryConn c.notaryClient = pb.NewQuartermasterNotaryClient(notaryConn) // Trigger background connection attempts immediately so state transitions // log from plugin startup rather than waiting for first RPC. govConn.Connect() if notaryConn != govConn { notaryConn.Connect() } // Log connection-state transitions so operators see when QM becomes // reachable. One WARN-style line on first unreachable observation, // one INFO on Ready. go watchConnState("governance", cfg.GovernanceAddr, govConn) if notaryConn != govConn { go watchConnState("notary", notaryAddr, notaryConn) } return c, nil } // Close shuts down all gRPC connections. func (c *Client) Close() error { var firstErr error if c.govConn != nil { if err := c.govConn.Close(); err != nil && firstErr == nil { firstErr = err } } if c.notaryConn != nil { if err := c.notaryConn.Close(); err != nil && firstErr == nil { firstErr = err } } return firstErr } // CreateIntent creates a MutationIntent for a credential operation. func (c *Client) CreateIntent(ctx context.Context, registryType, verb, artifactScope, tenantID string) (*IntentResult, error) { resp, err := c.govClient.CreateIntent(ctx, &pb.CreateIntentRequest{ RegistryType: registryType, Verb: verb, ArtifactScope: artifactScope, TenantId: tenantID, }) if err != nil { return nil, fmt.Errorf("governance: CreateIntent RPC: %w", err) } return &IntentResult{ IntentID: resp.IntentId, CeremonyID: resp.CeremonyId, Denied: resp.Denied, Error: resp.Error, }, nil } // RedeemIntent redeems a MutationIntent to obtain a SAT. func (c *Client) RedeemIntent(ctx context.Context, intentID string) (*RedeemResult, error) { resp, err := c.govClient.RedeemIntent(ctx, &pb.RedeemIntentRequest{ IntentId: intentID, }) if err != nil { return nil, fmt.Errorf("governance: RedeemIntent RPC: %w", err) } result := &RedeemResult{ Success: resp.Success, Status: resp.Status, Error: resp.Error, } if resp.Sat != nil { result.SatHash = resp.Sat.SatHash result.SatBytes = resp.Sat.SatBytes if resp.Sat.ExpiresAt != nil { result.ExpiresAt = resp.Sat.ExpiresAt.AsTime() } } return result, nil } // CreateCeremony creates a governance ceremony. func (c *Client) CreateCeremony(ctx context.Context, ceremonyType, intentID string, requiredApprovals uint32) (string, error) { // CeremonyService is not yet defined in proto — use GovernanceService intent // with ceremony_id from the response as a proxy. return "", fmt.Errorf("governance: CreateCeremony requires CeremonyService proto (not yet generated)") } // SubmitMerkleLeaf submits a credential event as a merkle leaf to the NotaryService. func (c *Client) SubmitMerkleLeaf(ctx context.Context, clusterID string, leaf []byte) (string, error) { resp, err := c.notaryClient.CreateAnchor(ctx, &pb.CreateAnchorRequest{ ClusterId: clusterID, Leaves: [][]byte{leaf}, }) if err != nil { return "", fmt.Errorf("governance: SubmitMerkleLeaf RPC: %w", err) } return resp.AnchorId, nil } // NotarizeCredentialEvent sends a credential lifecycle event to the governance // plane for merkle anchoring. The event MUST include a CredentialFingerprint // to bind the merkle leaf to the specific certificate (S-03 fix). func (c *Client) NotarizeCredentialEvent(ctx context.Context, event CredentialEvent) error { if event.CredentialFingerprint == "" { return fmt.Errorf("governance: credential_fingerprint is required for notarization") } if event.IntentID == "" { return fmt.Errorf("governance: intent_id is required for notarization") } if event.EventType == "" { return fmt.Errorf("governance: event_type is required for notarization") } // Construct MutationEnvelope payload (JCS-canonicalized via json.Marshal sorted keys). envelope := map[string]interface{}{ "credential_fingerprint": event.CredentialFingerprint, "event_type": event.EventType, "intent_id": event.IntentID, "spiffe_id": event.SpiffeID, "tenant_id": event.TenantID, } if event.CertSerialNumber > 0 { envelope["cert_serial_number"] = event.CertSerialNumber } envelopeBytes, err := json.Marshal(envelope) if err != nil { return fmt.Errorf("governance: marshal envelope: %w", err) } // Domain-separated SHA-256: "guildhouse.credential.v1:" prefix. h := sha256.New() h.Write([]byte("guildhouse.credential.v1:")) h.Write(envelopeBytes) leaf := h.Sum(nil) _, err = c.SubmitMerkleLeaf(ctx, event.TenantID, leaf) return err } // VerifyCredentialGovernance checks that a credential's governance provenance // is valid by verifying the merkle proof binds to this specific credential. func (c *Client) VerifyCredentialGovernance(ctx context.Context, v CredentialVerification) (*VerificationResult, error) { if v.IntentID == "" { return &VerificationResult{Governed: false, Error: "no governance intent"}, nil } if len(v.CertificatePublicKey) == 0 { return nil, fmt.Errorf("governance: certificate public key is required for verification") } // Compute fingerprint of the certificate's public key. certHash := sha256.Sum256(v.CertificatePublicKey) expectedFingerprint := hex.EncodeToString(certHash[:]) // Construct the same leaf that was submitted during notarization. envelope := map[string]interface{}{ "credential_fingerprint": expectedFingerprint, "intent_id": v.IntentID, } envelopeBytes, _ := json.Marshal(envelope) h := sha256.New() h.Write([]byte("guildhouse.credential.v1:")) h.Write(envelopeBytes) leaf := h.Sum(nil) // Verify inclusion in the merkle tree via NotaryService. resp, err := c.notaryClient.VerifyInclusion(ctx, &pb.VerifyInclusionRequest{ Leaf: leaf, }) if err != nil { return nil, fmt.Errorf("governance: VerifyInclusion RPC: %w", err) } return &VerificationResult{ Governed: resp.Valid, FingerprintMatch: resp.Valid, // If inclusion is valid, the fingerprint matched. }, nil } // buildDialOptions creates gRPC dial options from the config (mTLS or insecure). // // All returned option sets include an exponential reconnect-backoff so that // when Quartermaster is temporarily unreachable, grpc-go retries connection // attempts in the background without blocking plugin startup or spawning // per-RPC retry goroutines. func buildDialOptions(cfg Config) ([]grpc.DialOption, error) { connectParams := grpc.WithConnectParams(grpc.ConnectParams{ Backoff: backoff.Config{ BaseDelay: 1 * time.Second, Multiplier: 1.5, Jitter: 0.2, MaxDelay: 30 * time.Second, }, MinConnectTimeout: 20 * time.Second, }) if cfg.TLSCertPath != "" && cfg.TLSKeyPath != "" && cfg.TLSCAPath != "" { cert, err := tls.LoadX509KeyPair(cfg.TLSCertPath, cfg.TLSKeyPath) if err != nil { return nil, fmt.Errorf("load TLS keypair: %w", err) } caCert, err := os.ReadFile(cfg.TLSCAPath) if err != nil { return nil, fmt.Errorf("read CA cert: %w", err) } caPool := x509.NewCertPool() if !caPool.AppendCertsFromPEM(caCert) { return nil, fmt.Errorf("failed to append CA certificate") } tlsCfg := &tls.Config{ Certificates: []tls.Certificate{cert}, RootCAs: caPool, MinVersion: tls.VersionTLS13, } return []grpc.DialOption{ grpc.WithTransportCredentials(credentials.NewTLS(tlsCfg)), connectParams, }, nil } if cfg.TLSRequired { return nil, fmt.Errorf("TLS is required but no certificates configured") } return []grpc.DialOption{ grpc.WithTransportCredentials(insecure.NewCredentials()), connectParams, }, nil } // watchConnState logs gRPC connection-state transitions for a ClientConn. // Runs until the ClientConn is closed (WaitForStateChange returns false // when the conn reaches SHUTDOWN). Logs WARN-style once when the conn is // not yet reachable and INFO when it becomes Ready, so operators see at // startup whether Quartermaster is responding. func watchConnState(name, addr string, conn *grpc.ClientConn) { loggedUnreachable := false state := conn.GetState() for { switch state { case connectivity.Ready: log.Printf("governance: %s client connected to %s", name, addr) loggedUnreachable = false case connectivity.TransientFailure, connectivity.Idle, connectivity.Connecting: if !loggedUnreachable { log.Printf("governance: %s client cannot reach %s yet; retrying in background", name, addr) loggedUnreachable = true } } if !conn.WaitForStateChange(context.Background(), state) { return // conn shut down } state = conn.GetState() } }