mirror of
https://github.com/linshenkx/prompt-optimizer.git
synced 2026-05-07 05:56:49 +08:00
72 lines
2.2 KiB
JSON
72 lines
2.2 KiB
JSON
{
|
|
"generatedAt": "2026-03-22T10:44:18.102Z",
|
|
"rows": [
|
|
{
|
|
"caseId": "live-basic-system-boundary-control",
|
|
"title": "真实模型: basic-system 边界控制改动",
|
|
"kind": "live",
|
|
"score": 75,
|
|
"stopRecommendation": "continue",
|
|
"targetVsBaseline": "improved",
|
|
"targetVsReferenceGap": "minor",
|
|
"expectationMatched": null,
|
|
"expectationTotal": null
|
|
},
|
|
{
|
|
"caseId": "synthetic-medical-latent-trigger-overfit",
|
|
"title": "合成样本: 医疗分诊里的隐性触发过拟合",
|
|
"kind": "synthetic",
|
|
"score": 35,
|
|
"stopRecommendation": "review",
|
|
"targetVsBaseline": "regressed",
|
|
"targetVsReferenceGap": "major",
|
|
"expectationMatched": 3,
|
|
"expectationTotal": 5
|
|
},
|
|
{
|
|
"caseId": "synthetic-ecommerce-schema-no-model-worship",
|
|
"title": "合成样本: 电商抽取里不能因为 teacher 更会写就忽略 schema",
|
|
"kind": "synthetic",
|
|
"score": 40,
|
|
"stopRecommendation": "review",
|
|
"targetVsBaseline": "regressed",
|
|
"targetVsReferenceGap": "minor",
|
|
"expectationMatched": 6,
|
|
"expectationTotal": 6
|
|
},
|
|
{
|
|
"caseId": "synthetic-legal-flat-not-unclear",
|
|
"title": "合成样本: 法务风险摘要应该判 flat 而不是 unclear",
|
|
"kind": "synthetic",
|
|
"score": 50,
|
|
"stopRecommendation": "continue",
|
|
"targetVsBaseline": "flat",
|
|
"targetVsReferenceGap": "none",
|
|
"expectationMatched": 3,
|
|
"expectationTotal": 3
|
|
},
|
|
{
|
|
"caseId": "synthetic-teaching-overfit-regression",
|
|
"title": "合成样本: 教学讲解里的样例口诀导致回退",
|
|
"kind": "synthetic",
|
|
"score": 30,
|
|
"stopRecommendation": "review",
|
|
"targetVsBaseline": "regressed",
|
|
"targetVsReferenceGap": "major",
|
|
"expectationMatched": 6,
|
|
"expectationTotal": 6
|
|
},
|
|
{
|
|
"caseId": "synthetic-hiring-replica-semantic-instability",
|
|
"title": "合成样本: 招聘筛选里 replica 语义不稳定",
|
|
"kind": "synthetic",
|
|
"score": 65,
|
|
"stopRecommendation": "review",
|
|
"targetVsBaseline": "improved",
|
|
"targetVsReferenceGap": "none",
|
|
"expectationMatched": 4,
|
|
"expectationTotal": 4
|
|
}
|
|
]
|
|
}
|