Trace-derived refund and return exceptions collected across the incident week to validate the recovery candidate against realistic edge cases.
Go back to the dataset workbench and compare this evidence set against the rest of your current inventory.
Confirm the active evaluator set is appropriate for the cases represented in this dataset.
Use this dataset as the evidence base for candidate prompt or routing experiments.
Inspect the original runs feeding this dataset to make sure curation still matches the operational problem.
Stable dataset identifier.
Dataset creation time relative to now.
Recorded cases currently attached to this dataset.
Items carrying a source trace id for evidence inspection.
{
"traceId": "trace_returns_exception_v17_baseline",
"storyLabel": "Late return request handled with clear exception guidance",
"sessionId": "session_returns_exception_week1",
"promptName": "support-reply",
"promptVersion": 17
}{
"expectedAgentId": "Returns Resolution Copilot",
"expectedPromptName": "support-reply",
"expectedPromptVersion": 17,
"expectedStory": "Late return request handled with clear exception guidance",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}{
"traceId": "trace_returns_exception_v18_regression",
"storyLabel": "Compressed refund rollout denied a valid exception path",
"sessionId": "session_returns_exception_week1",
"promptName": "support-reply",
"promptVersion": 18
}{
"expectedAgentId": "Returns Resolution Copilot",
"expectedPromptName": "support-reply",
"expectedPromptVersion": 18,
"expectedStory": "Compressed refund rollout denied a valid exception path",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}{
"traceId": "trace_returns_exception_v19_recovery",
"storyLabel": "Recovery candidate restored refund exception handling",
"sessionId": "session_returns_exception_week1",
"promptName": "support-reply",
"promptVersion": 19
}{
"expectedAgentId": "Returns Resolution Copilot",
"expectedPromptName": "support-reply",
"expectedPromptVersion": 19,
"expectedStory": "Recovery candidate restored refund exception handling",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}{
"traceId": "trace_policy_damage_claim_v4_hallucination",
"storyLabel": "Damaged-item policy checker invented a denial rule",
"sessionId": "session_damage_claim_policy",
"promptName": "refund-policy-check",
"promptVersion": 4
}{
"expectedAgentId": "Policy Grounding Reviewer",
"expectedPromptName": "refund-policy-check",
"expectedPromptVersion": 4,
"expectedStory": "Damaged-item policy checker invented a denial rule",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}{
"traceId": "trace_refund_clarification_needed_02_011",
"storyLabel": "Refund clarification request avoided a false denial (2.11)",
"sessionId": "session_refund_clarification_02",
"promptName": "refund-policy-check",
"promptVersion": 3
}{
"expectedAgentId": "Returns Resolution Copilot",
"expectedPromptName": "refund-policy-check",
"expectedPromptVersion": 3,
"expectedStory": "Refund clarification request avoided a false denial (2.11)",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}{
"traceId": "trace_refund_clarification_needed_05_014",
"storyLabel": "Refund clarification request avoided a false denial (5.14)",
"sessionId": "session_refund_clarification_05",
"promptName": "refund-policy-check",
"promptVersion": 3
}{
"expectedAgentId": "Returns Resolution Copilot",
"expectedPromptName": "refund-policy-check",
"expectedPromptVersion": 3,
"expectedStory": "Refund clarification request avoided a false denial (5.14)",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}{
"traceId": "trace_refund_clarification_needed_07_019",
"storyLabel": "Refund clarification request avoided a false denial (7.19)",
"sessionId": "session_refund_clarification_07",
"promptName": "refund-policy-check",
"promptVersion": 3
}{
"expectedAgentId": "Returns Resolution Copilot",
"expectedPromptName": "refund-policy-check",
"expectedPromptVersion": 3,
"expectedStory": "Refund clarification request avoided a false denial (7.19)",
"expectedOutcome": "Match the seeded production behavior captured by this trace."
}