Skip to main content

aios_core/
trace_engine.rs

1//! Trace 引擎 — 确定性回放验证
2//!
3//! 记录 `GoldenTrace` 并在回放时验证脱敏 / 策略 / 执行的确定性。
4//!
5//! 设计取舍:
6//! - 脱敏校验完全在 `aios-core` 内完成 (sanitizer 是 `PrivacyAirGap` 的
7//!   trait 对象),所以 `validate_sanitization` 不需要外部依赖。
8//! - 策略与执行的"实际值"必须由调用方驱动 (`aios-cli` 或 daemon 已经
9//!   持有 router/policy/executor),否则 `aios-core` 就要反向依赖
10//!   `aios-agent`。`TraceValidator::validate` 因此接收调用方已经计算好的
11//!   `actual_intents` 与 `actual_executed`,引擎只负责按"语义"逐条比对
12//!   并填出 `ReplayResult`。
13//!
14//! 语义比对刻意忽略易变字段 (uuids、wall-clock 时间),所以回放只验证
15//! pipeline 的可观测结果,不验证 token-by-token 的字节一致。如果想锁
16//! 字节一致,请用 `ReplaySummary.audit_hash`。
17
18use aios_spec::traits::{PrivacySanitizer, TraceValidator};
19use aios_spec::{
20    ExecutedAction, GoldenTrace, Intent, IntentBatch, ReplayResult, SanitizedEvent, SuggestedAction,
21};
22
23/// 默认 Trace 引擎
24pub struct DefaultTraceEngine {
25    sanitizer: Box<dyn PrivacySanitizer + Send + Sync>,
26}
27
28impl DefaultTraceEngine {
29    pub fn new(sanitizer: impl PrivacySanitizer + Send + Sync + 'static) -> Self {
30        Self {
31            sanitizer: Box::new(sanitizer),
32        }
33    }
34
35    /// 仅校验脱敏。`policy_match` 与 `execution_match` 显式置为 `false`,
36    /// 且对应的 divergence 列表为空 —— 含义是"这一层未被检查",而不是
37    /// "检查过且失败"。两层语义分离:
38    ///
39    /// - match flag 回答"该层是否通过校验"。
40    /// - divergence 列表回答"如果失败了,是哪里失败"。
41    ///
42    /// 调用方通过 `result.all_match()` 自然区分;想做端到端校验请改用
43    /// [`TraceValidator::validate`],它会把三层都填出来。
44    pub fn validate_sanitization(&self, golden: &GoldenTrace) -> ReplayResult {
45        let sanitization_divergences = self.sanitization_divergences(golden);
46        ReplayResult {
47            trace_id: golden.trace_id.clone(),
48            sanitization_match: sanitization_divergences.is_empty(),
49            sanitization_divergences,
50            policy_match: false,
51            policy_divergences: vec![],
52            execution_match: false,
53            execution_divergences: vec![],
54        }
55    }
56
57    fn sanitization_divergences(&self, golden: &GoldenTrace) -> Vec<usize> {
58        let actual_sanitized: Vec<SanitizedEvent> = golden
59            .raw_events
60            .iter()
61            .map(|raw| self.sanitizer.sanitize(raw.clone()))
62            .collect();
63
64        let mut divergences = Vec::new();
65        for (i, (actual, expected)) in actual_sanitized
66            .iter()
67            .zip(golden.expected_sanitized.iter())
68            .enumerate()
69        {
70            if !sanitized_eq(actual, expected) {
71                divergences.push(i);
72            }
73        }
74        // 长度不一致时,把多余/缺失的索引也算进去。
75        let actual_len = actual_sanitized.len();
76        let expected_len = golden.expected_sanitized.len();
77        for i in actual_len.min(expected_len)..actual_len.max(expected_len) {
78            divergences.push(i);
79        }
80        divergences
81    }
82}
83
84impl TraceValidator for DefaultTraceEngine {
85    fn validate(
86        &self,
87        golden: &GoldenTrace,
88        actual_intents: &IntentBatch,
89        actual_executed: &[ExecutedAction],
90    ) -> ReplayResult {
91        let sanitization_divergences = self.sanitization_divergences(golden);
92        let policy_divergences = intent_divergences(&golden.expected_intents, actual_intents);
93        let execution_divergences =
94            execution_divergences(&golden.expected_actions, actual_executed);
95
96        ReplayResult {
97            trace_id: golden.trace_id.clone(),
98            sanitization_match: sanitization_divergences.is_empty(),
99            sanitization_divergences,
100            policy_match: policy_divergences.is_empty(),
101            policy_divergences,
102            execution_match: execution_divergences.is_empty(),
103            execution_divergences,
104        }
105    }
106}
107
108// ============================================================
109// 语义比较 — 忽略易变字段
110// ============================================================
111
112/// 比较两个 SanitizedEvent 的语义内容是否一致。
113///
114/// event_id 和 timestamp_ms 不在比较范围内 (它们因生成时间不同而变化)。
115fn sanitized_eq(a: &SanitizedEvent, b: &SanitizedEvent) -> bool {
116    a.event_type == b.event_type
117        && a.source_tier == b.source_tier
118        && a.app_package == b.app_package
119        && a.uid == b.uid
120}
121
122/// 比较两个 IntentBatch 的语义内容:window_id / generated_at_ms / intent_id
123/// 因为是 uuid/时间戳被刻意忽略。其它所有字段(包括 `rationale_tags`)都
124/// 参与比较。
125fn intent_divergences(expected: &IntentBatch, actual: &IntentBatch) -> Vec<String> {
126    let mut divergences = Vec::new();
127
128    if expected.model != actual.model {
129        divergences.push(format!(
130            "model mismatch: expected={:?} actual={:?}",
131            expected.model, actual.model
132        ));
133    }
134    if expected.intents.len() != actual.intents.len() {
135        divergences.push(format!(
136            "intent count mismatch: expected={} actual={}",
137            expected.intents.len(),
138            actual.intents.len()
139        ));
140        // 长度不同时继续按最小公共前缀逐条比对,便于定位首个差异。
141    }
142    let pairs = expected
143        .intents
144        .iter()
145        .zip(actual.intents.iter())
146        .enumerate();
147    for (i, (e, a)) in pairs {
148        if let Some(reason) = intent_diff(e, a) {
149            divergences.push(format!("intent[{i}]: {reason}"));
150        }
151    }
152    divergences
153}
154
155fn intent_diff(expected: &Intent, actual: &Intent) -> Option<String> {
156    if expected.intent_type != actual.intent_type {
157        return Some(format!(
158            "intent_type: expected={:?} actual={:?}",
159            expected.intent_type, actual.intent_type
160        ));
161    }
162    if expected.risk_level != actual.risk_level {
163        return Some(format!(
164            "risk_level: expected={:?} actual={:?}",
165            expected.risk_level, actual.risk_level
166        ));
167    }
168    if (expected.confidence - actual.confidence).abs() > f32::EPSILON {
169        return Some(format!(
170            "confidence: expected={} actual={}",
171            expected.confidence, actual.confidence
172        ));
173    }
174    if expected.rationale_tags != actual.rationale_tags {
175        return Some(format!(
176            "rationale_tags: expected={:?} actual={:?}",
177            expected.rationale_tags, actual.rationale_tags
178        ));
179    }
180    if expected.suggested_actions.len() != actual.suggested_actions.len() {
181        return Some(format!(
182            "suggested_actions count: expected={} actual={}",
183            expected.suggested_actions.len(),
184            actual.suggested_actions.len()
185        ));
186    }
187    for (j, (e_act, a_act)) in expected
188        .suggested_actions
189        .iter()
190        .zip(actual.suggested_actions.iter())
191        .enumerate()
192    {
193        if !suggested_eq(e_act, a_act) {
194            return Some(format!(
195                "suggested_actions[{j}]: expected={:?} actual={:?}",
196                e_act, a_act
197            ));
198        }
199    }
200    None
201}
202
203fn suggested_eq(a: &SuggestedAction, b: &SuggestedAction) -> bool {
204    a.action_type == b.action_type && a.target == b.target && a.urgency == b.urgency
205}
206
207fn execution_divergences(expected: &[ExecutedAction], actual: &[ExecutedAction]) -> Vec<usize> {
208    let mut divergences = Vec::new();
209    for (i, (e, a)) in expected.iter().zip(actual.iter()).enumerate() {
210        if !executed_eq(e, a) {
211            divergences.push(i);
212        }
213    }
214    // 长度差异也记入索引。
215    let actual_len = actual.len();
216    let expected_len = expected.len();
217    for i in actual_len.min(expected_len)..actual_len.max(expected_len) {
218        divergences.push(i);
219    }
220    divergences
221}
222
223/// 比较两个 ExecutedAction:忽略 executed_at_ms。
224fn executed_eq(a: &ExecutedAction, b: &ExecutedAction) -> bool {
225    a.action_type == b.action_type
226        && a.target == b.target
227        && a.success == b.success
228        && a.error_reason == b.error_reason
229}