Blog 8 JSON
failure_modes.json
artifacts/failure_modes.json / 814 B
[
{
"baseline": "open_source",
"label": "Plain Mistral",
"total": 11,
"success": 2,
"agent_error": 0,
"wrong_remediation": 3,
"premature_resolution": 0,
"step_budget_exhausted": 6,
"invalid_action": 0,
"other_failure": 0
},
{
"baseline": "open_source_react",
"label": "ReAct Mistral",
"total": 11,
"success": 2,
"agent_error": 0,
"wrong_remediation": 7,
"premature_resolution": 0,
"step_budget_exhausted": 1,
"invalid_action": 0,
"other_failure": 1
},
{
"baseline": "guided_open_source",
"label": "Guided Mistral",
"total": 11,
"success": 3,
"agent_error": 0,
"wrong_remediation": 7,
"premature_resolution": 0,
"step_budget_exhausted": 1,
"invalid_action": 0,
"other_failure": 0
}
]