Files
prompt-optimizer/packages/core/tests/integration/template/context-message-optimize.test.ts
linshen e5c3ba29e8 feat(template): 改进消息优化模板,添加角色保持和适度优化原则
- 添加「优化 ≠ 回复」核心原则,确保保持原消息角色
- 添加「适度优化原则」,避免过度复杂化
- 添加变量占位符保留要求
- 优化模板命名,明确适用场景
- 调整模板导入顺序,通用模板优先
- 新增集成测试覆盖关键场景
2025-12-19 11:29:29 +08:00

287 lines
11 KiB
TypeScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, it, beforeEach, beforeAll } from 'vitest';
import { PromptService } from '../../../src/services/prompt/service';
import { ModelManager } from '../../../src/services/model/manager';
import { TemplateManager } from '../../../src/services/template/manager';
import { HistoryManager } from '../../../src/services/history/manager';
import { LocalStorageProvider } from '../../../src/services/storage/localStorageProvider';
import { PreferenceService } from '../../../src/services/preference/service';
import { createLLMService } from '../../../src/services/llm/service';
import { createTemplateManager } from '../../../src/services/template/manager';
import { createTemplateLanguageService } from '../../../src/services/template/languageService';
import { createModelManager } from '../../../src/services/model/manager';
import { createHistoryManager } from '../../../src/services/history/manager';
import { TextAdapterRegistry } from '../../../src/services/llm/adapters/registry';
import { TextModelConfig } from '../../../src/services/model/types';
import type { MessageOptimizationRequest, ConversationMessage } from '../../../src/services/prompt/types';
/**
* 高级-多对话模式 优化模板测试(精简版)
*
* 本测试用于验证三个上下文消息优化模板的效果:
* 1. context-message-optimize - 通用消息优化(推荐)
* 2. context-analytical-optimize - 分析型优化(技术场景)
* 3. context-output-format-optimize - 格式化优化(数据场景)
*
* 测试场景4场景 × 3模板 = 12测试
*
* 运行方式:
* pnpm -F @prompt-optimizer/core test -- --run context-message-optimize
*/
// ============================================================================
// 测试用例定义
// ============================================================================
interface TestScenario {
/** 场景名称 */
name: string;
/** 场景描述 */
description: string;
/** 对话消息 */
messages: ConversationMessage[];
/** 要优化的消息ID */
selectedMessageId: string;
/** 评估要点 */
evaluationPoints: string[];
}
/** 要测试的模板列表 */
const TEMPLATES_TO_TEST = [
{ id: 'context-message-optimize', name: '通用消息优化(推荐)' },
{ id: 'context-analytical-optimize', name: '分析型优化(技术场景)' },
{ id: 'context-output-format-optimize', name: '格式化优化(数据场景)' },
];
/** 测试场景数组 - 精简版4场景 × 3模板 = 12测试 */
const TEST_SCENARIOS: TestScenario[] = [
// 场景1: 猫娘风格(有变量)- 风格保持 + 变量保留
{
name: '猫娘风格(有变量)',
description: '测试风格保持和变量保留',
messages: [
{ id: 'cat-1', role: 'user', content: '今天天气怎么样?' },
{ id: 'cat-2', role: 'assistant', content: '喵~今天天气很不错呢,阳光暖暖的,很适合晒太阳喵!主人要出门的话记得带伞哦~(●\'◡\'●)' },
{ id: 'cat-3', role: 'user', content: '那你现在给我{{要完成的指令}}' },
],
selectedMessageId: 'cat-3',
evaluationPoints: [
'是否保持了轻松/可爱风格?',
'是否保留了变量占位符 {{要完成的指令}}',
'是否过度复杂化?',
],
},
// 场景2: 代码审查风格(有变量)- 技术场景
{
name: '代码审查风格(有变量)',
description: '测试技术场景的优化效果',
messages: [
{ id: 'tech-1', role: 'user', content: '帮我看看这段代码有什么问题' },
{ id: 'tech-2', role: 'assistant', content: '好的,请把代码发给我,我来帮你分析。' },
{ id: 'tech-3', role: 'user', content: '```javascript\nfunction add(a, b) { return a + b }\n```\n这个函数需要{{优化方向}}' },
],
selectedMessageId: 'tech-3',
evaluationPoints: [
'代码块是否被正确保留?',
'是否保留了变量占位符 {{优化方向}}',
'分析型模板是否适度添加了技术分析维度?',
],
},
// 场景3: 傲娇风格(无变量)- ⚠️ 角色保持测试
{
name: '傲娇风格(无变量)',
description: '⚠️ 关键测试:用户请求不能变成助手回复',
messages: [
{ id: 'tsun-1', role: 'user', content: '帮我查一下明天的天气' },
{ id: 'tsun-2', role: 'assistant', content: '哼,不是我想帮你查的啦!只是正好闲着没事...明天多云转晴气温18-25度别以为我是特意帮你查的' },
{ id: 'tsun-3', role: 'user', content: '推荐几首歌' },
],
selectedMessageId: 'tsun-3',
evaluationPoints: [
'⚠️【关键】输出是否仍然是用户请求?(不能是傲娇回复)',
'是否保持简洁?',
],
},
// 场景4: 客服场景(无变量)- ⚠️ 角色对立测试
{
name: '客服场景(无变量)',
description: '⚠️ 关键测试:用户投诉不能变成客服回复',
messages: [
{ id: 'cs-1', role: 'user', content: '我的订单三天了还没发货' },
{ id: 'cs-2', role: 'assistant', content: '非常抱歉给您带来不便!我已经为您查询了订单状态,目前显示仓库正在加急处理中。请问您的订单号是多少?我帮您催促一下。' },
{ id: 'cs-3', role: 'user', content: '退款' },
],
selectedMessageId: 'cs-3',
evaluationPoints: [
'⚠️【关键】输出是否仍然是用户的退款请求?(不能是客服回复)',
'可以补充退款原因,但角色仍是顾客',
],
},
];
// ============================================================================
// 测试主体
// ============================================================================
describe('Context Message Optimize Templates - Real API Tests', () => {
// 检查可用的 API 密钥
const hasDeepSeekKey = !!process.env.VITE_DEEPSEEK_API_KEY;
const hasOpenAIKey = !!process.env.VITE_OPENAI_API_KEY;
// 选择可用的模型(优先 DeepSeek chat 模型)
const availableModel = hasDeepSeekKey ? 'deepseek'
: hasOpenAIKey ? 'openai'
: null;
const TEST_TIMEOUT = 120000; // 2分钟超时
let promptService: PromptService;
let modelManager: ModelManager;
let llmService: any;
let templateManager: TemplateManager;
let historyManager: HistoryManager;
let storage: LocalStorageProvider;
let registry: TextAdapterRegistry;
let testModelKey: string;
beforeAll(() => {
console.log('\n📋 环境变量检查:');
console.log(' - DEEPSEEK_API_KEY:', hasDeepSeekKey ? '✓' : '✗');
console.log(' - OPENAI_API_KEY:', hasOpenAIKey ? '✓' : '✗');
console.log(' - 选择的模型:', availableModel || '无可用模型');
console.log(`\n📦 测试场景数量: ${TEST_SCENARIOS.length}`);
console.log(`📦 测试模板数量: ${TEMPLATES_TO_TEST.length}`);
if (!availableModel) {
console.log('\n⚠ 跳过测试:未设置任何 API 密钥环境变量');
}
});
beforeEach(async () => {
// 初始化存储
storage = new LocalStorageProvider();
await storage.clearAll();
// 初始化各管理器
registry = new TextAdapterRegistry();
modelManager = createModelManager(storage);
llmService = createLLMService(modelManager);
// 创建 PreferenceService 和语言服务
const preferenceService = new PreferenceService(storage);
const languageService = createTemplateLanguageService(preferenceService);
await languageService.initialize();
await languageService.setLanguage('zh-CN');
templateManager = createTemplateManager(storage, languageService);
historyManager = createHistoryManager(storage, modelManager);
// 初始化服务
promptService = new PromptService(modelManager, llmService, templateManager, historyManager);
// 根据可用的 API 配置模型
if (availableModel === 'deepseek') {
const adapter = registry.getAdapter('deepseek');
const models = adapter.getModels();
const chatModel = models.find(m => m.id === 'deepseek-chat') || models[0];
const modelConfig: TextModelConfig = {
id: 'test-deepseek',
name: 'Test DeepSeek Chat',
enabled: true,
providerMeta: adapter.getProvider(),
modelMeta: chatModel,
connectionConfig: {
apiKey: process.env.VITE_DEEPSEEK_API_KEY!
},
paramOverrides: {}
};
await modelManager.addModel('test-deepseek', modelConfig);
testModelKey = 'test-deepseek';
} else if (availableModel === 'openai') {
const adapter = registry.getAdapter('openai');
const modelConfig: TextModelConfig = {
id: 'test-openai',
name: 'Test OpenAI',
enabled: true,
providerMeta: adapter.getProvider(),
modelMeta: adapter.getModels()[0],
connectionConfig: {
apiKey: process.env.VITE_OPENAI_API_KEY!,
baseURL: process.env.VITE_OPENAI_BASE_URL
},
paramOverrides: {}
};
await modelManager.addModel('test-openai', modelConfig);
testModelKey = 'test-openai';
}
});
/**
* 格式化输出优化结果
*/
const printOptimizationResult = (
scenarioName: string,
templateName: string,
originalContent: string,
optimizedContent: string,
evaluationPoints: string[]
) => {
console.log('\n' + '='.repeat(80));
console.log(`🎭 场景: ${scenarioName}`);
console.log(`📝 模板: ${templateName}`);
console.log('='.repeat(80));
console.log('\n📌 原始消息:');
console.log(' ' + originalContent.split('\n').join('\n '));
console.log('\n✨ 优化结果:');
console.log(' ' + optimizedContent.split('\n').join('\n '));
console.log('\n' + '-'.repeat(80));
console.log('💡 评估要点:');
evaluationPoints.forEach((point, index) => {
console.log(` ${index + 1}. ${point}`);
});
console.log('='.repeat(80) + '\n');
};
// 自动遍历所有场景和模板生成测试
TEST_SCENARIOS.forEach((scenario) => {
describe(`场景: ${scenario.name}`, () => {
TEMPLATES_TO_TEST.forEach((template) => {
it.runIf(!!availableModel)(
`${template.name} - ${scenario.description}`,
async () => {
const request: MessageOptimizationRequest = {
selectedMessageId: scenario.selectedMessageId,
messages: scenario.messages,
modelKey: testModelKey,
templateId: template.id
};
const result = await promptService.optimizeMessage(request);
const originalMessage = scenario.messages.find(
m => m.id === scenario.selectedMessageId
)!;
printOptimizationResult(
scenario.name,
template.name,
originalMessage.content,
result,
scenario.evaluationPoints
);
},
TEST_TIMEOUT
);
});
});
});
// 跳过测试的占位
it.skipIf(!availableModel)('跳过测试 - 未设置 API 密钥', () => {
console.log('⚠️ 请设置以下环境变量之一来运行测试:');
console.log(' - VITE_DEEPSEEK_API_KEY');
console.log(' - VITE_OPENAI_API_KEY');
});
});