mirror of
https://github.com/linshenkx/prompt-optimizer.git
synced 2026-05-13 09:20:28 +08:00
- 添加「优化 ≠ 回复」核心原则,确保保持原消息角色 - 添加「适度优化原则」,避免过度复杂化 - 添加变量占位符保留要求 - 优化模板命名,明确适用场景 - 调整模板导入顺序,通用模板优先 - 新增集成测试覆盖关键场景
287 lines
11 KiB
TypeScript
287 lines
11 KiB
TypeScript
import { describe, it, beforeEach, beforeAll } from 'vitest';
|
||
import { PromptService } from '../../../src/services/prompt/service';
|
||
import { ModelManager } from '../../../src/services/model/manager';
|
||
import { TemplateManager } from '../../../src/services/template/manager';
|
||
import { HistoryManager } from '../../../src/services/history/manager';
|
||
import { LocalStorageProvider } from '../../../src/services/storage/localStorageProvider';
|
||
import { PreferenceService } from '../../../src/services/preference/service';
|
||
import { createLLMService } from '../../../src/services/llm/service';
|
||
import { createTemplateManager } from '../../../src/services/template/manager';
|
||
import { createTemplateLanguageService } from '../../../src/services/template/languageService';
|
||
import { createModelManager } from '../../../src/services/model/manager';
|
||
import { createHistoryManager } from '../../../src/services/history/manager';
|
||
import { TextAdapterRegistry } from '../../../src/services/llm/adapters/registry';
|
||
import { TextModelConfig } from '../../../src/services/model/types';
|
||
import type { MessageOptimizationRequest, ConversationMessage } from '../../../src/services/prompt/types';
|
||
|
||
/**
|
||
* 高级-多对话模式 优化模板测试(精简版)
|
||
*
|
||
* 本测试用于验证三个上下文消息优化模板的效果:
|
||
* 1. context-message-optimize - 通用消息优化(推荐)
|
||
* 2. context-analytical-optimize - 分析型优化(技术场景)
|
||
* 3. context-output-format-optimize - 格式化优化(数据场景)
|
||
*
|
||
* 测试场景:4场景 × 3模板 = 12测试
|
||
*
|
||
* 运行方式:
|
||
* pnpm -F @prompt-optimizer/core test -- --run context-message-optimize
|
||
*/
|
||
|
||
// ============================================================================
|
||
// 测试用例定义
|
||
// ============================================================================
|
||
|
||
interface TestScenario {
|
||
/** 场景名称 */
|
||
name: string;
|
||
/** 场景描述 */
|
||
description: string;
|
||
/** 对话消息 */
|
||
messages: ConversationMessage[];
|
||
/** 要优化的消息ID */
|
||
selectedMessageId: string;
|
||
/** 评估要点 */
|
||
evaluationPoints: string[];
|
||
}
|
||
|
||
/** 要测试的模板列表 */
|
||
const TEMPLATES_TO_TEST = [
|
||
{ id: 'context-message-optimize', name: '通用消息优化(推荐)' },
|
||
{ id: 'context-analytical-optimize', name: '分析型优化(技术场景)' },
|
||
{ id: 'context-output-format-optimize', name: '格式化优化(数据场景)' },
|
||
];
|
||
|
||
/** 测试场景数组 - 精简版(4场景 × 3模板 = 12测试) */
|
||
const TEST_SCENARIOS: TestScenario[] = [
|
||
// 场景1: 猫娘风格(有变量)- 风格保持 + 变量保留
|
||
{
|
||
name: '猫娘风格(有变量)',
|
||
description: '测试风格保持和变量保留',
|
||
messages: [
|
||
{ id: 'cat-1', role: 'user', content: '今天天气怎么样?' },
|
||
{ id: 'cat-2', role: 'assistant', content: '喵~今天天气很不错呢,阳光暖暖的,很适合晒太阳喵!主人要出门的话记得带伞哦~(●\'◡\'●)' },
|
||
{ id: 'cat-3', role: 'user', content: '那你现在给我{{要完成的指令}}' },
|
||
],
|
||
selectedMessageId: 'cat-3',
|
||
evaluationPoints: [
|
||
'是否保持了轻松/可爱风格?',
|
||
'是否保留了变量占位符 {{要完成的指令}}?',
|
||
'是否过度复杂化?',
|
||
],
|
||
},
|
||
|
||
// 场景2: 代码审查风格(有变量)- 技术场景
|
||
{
|
||
name: '代码审查风格(有变量)',
|
||
description: '测试技术场景的优化效果',
|
||
messages: [
|
||
{ id: 'tech-1', role: 'user', content: '帮我看看这段代码有什么问题' },
|
||
{ id: 'tech-2', role: 'assistant', content: '好的,请把代码发给我,我来帮你分析。' },
|
||
{ id: 'tech-3', role: 'user', content: '```javascript\nfunction add(a, b) { return a + b }\n```\n这个函数需要{{优化方向}}' },
|
||
],
|
||
selectedMessageId: 'tech-3',
|
||
evaluationPoints: [
|
||
'代码块是否被正确保留?',
|
||
'是否保留了变量占位符 {{优化方向}}?',
|
||
'分析型模板是否适度添加了技术分析维度?',
|
||
],
|
||
},
|
||
|
||
// 场景3: 傲娇风格(无变量)- ⚠️ 角色保持测试
|
||
{
|
||
name: '傲娇风格(无变量)',
|
||
description: '⚠️ 关键测试:用户请求不能变成助手回复',
|
||
messages: [
|
||
{ id: 'tsun-1', role: 'user', content: '帮我查一下明天的天气' },
|
||
{ id: 'tsun-2', role: 'assistant', content: '哼,不是我想帮你查的啦!只是正好闲着没事...明天多云转晴,气温18-25度,别以为我是特意帮你查的!' },
|
||
{ id: 'tsun-3', role: 'user', content: '推荐几首歌' },
|
||
],
|
||
selectedMessageId: 'tsun-3',
|
||
evaluationPoints: [
|
||
'⚠️【关键】输出是否仍然是用户请求?(不能是傲娇回复)',
|
||
'是否保持简洁?',
|
||
],
|
||
},
|
||
|
||
// 场景4: 客服场景(无变量)- ⚠️ 角色对立测试
|
||
{
|
||
name: '客服场景(无变量)',
|
||
description: '⚠️ 关键测试:用户投诉不能变成客服回复',
|
||
messages: [
|
||
{ id: 'cs-1', role: 'user', content: '我的订单三天了还没发货' },
|
||
{ id: 'cs-2', role: 'assistant', content: '非常抱歉给您带来不便!我已经为您查询了订单状态,目前显示仓库正在加急处理中。请问您的订单号是多少?我帮您催促一下。' },
|
||
{ id: 'cs-3', role: 'user', content: '退款' },
|
||
],
|
||
selectedMessageId: 'cs-3',
|
||
evaluationPoints: [
|
||
'⚠️【关键】输出是否仍然是用户的退款请求?(不能是客服回复)',
|
||
'可以补充退款原因,但角色仍是顾客',
|
||
],
|
||
},
|
||
];
|
||
|
||
// ============================================================================
|
||
// 测试主体
|
||
// ============================================================================
|
||
|
||
describe('Context Message Optimize Templates - Real API Tests', () => {
|
||
// 检查可用的 API 密钥
|
||
const hasDeepSeekKey = !!process.env.VITE_DEEPSEEK_API_KEY;
|
||
const hasOpenAIKey = !!process.env.VITE_OPENAI_API_KEY;
|
||
|
||
// 选择可用的模型(优先 DeepSeek chat 模型)
|
||
const availableModel = hasDeepSeekKey ? 'deepseek'
|
||
: hasOpenAIKey ? 'openai'
|
||
: null;
|
||
|
||
const TEST_TIMEOUT = 120000; // 2分钟超时
|
||
|
||
let promptService: PromptService;
|
||
let modelManager: ModelManager;
|
||
let llmService: any;
|
||
let templateManager: TemplateManager;
|
||
let historyManager: HistoryManager;
|
||
let storage: LocalStorageProvider;
|
||
let registry: TextAdapterRegistry;
|
||
let testModelKey: string;
|
||
|
||
beforeAll(() => {
|
||
console.log('\n📋 环境变量检查:');
|
||
console.log(' - DEEPSEEK_API_KEY:', hasDeepSeekKey ? '✓' : '✗');
|
||
console.log(' - OPENAI_API_KEY:', hasOpenAIKey ? '✓' : '✗');
|
||
console.log(' - 选择的模型:', availableModel || '无可用模型');
|
||
console.log(`\n📦 测试场景数量: ${TEST_SCENARIOS.length}`);
|
||
console.log(`📦 测试模板数量: ${TEMPLATES_TO_TEST.length}`);
|
||
|
||
if (!availableModel) {
|
||
console.log('\n⚠️ 跳过测试:未设置任何 API 密钥环境变量');
|
||
}
|
||
});
|
||
|
||
beforeEach(async () => {
|
||
// 初始化存储
|
||
storage = new LocalStorageProvider();
|
||
await storage.clearAll();
|
||
|
||
// 初始化各管理器
|
||
registry = new TextAdapterRegistry();
|
||
modelManager = createModelManager(storage);
|
||
llmService = createLLMService(modelManager);
|
||
|
||
// 创建 PreferenceService 和语言服务
|
||
const preferenceService = new PreferenceService(storage);
|
||
const languageService = createTemplateLanguageService(preferenceService);
|
||
await languageService.initialize();
|
||
await languageService.setLanguage('zh-CN');
|
||
|
||
templateManager = createTemplateManager(storage, languageService);
|
||
historyManager = createHistoryManager(storage, modelManager);
|
||
|
||
// 初始化服务
|
||
promptService = new PromptService(modelManager, llmService, templateManager, historyManager);
|
||
|
||
// 根据可用的 API 配置模型
|
||
if (availableModel === 'deepseek') {
|
||
const adapter = registry.getAdapter('deepseek');
|
||
const models = adapter.getModels();
|
||
const chatModel = models.find(m => m.id === 'deepseek-chat') || models[0];
|
||
const modelConfig: TextModelConfig = {
|
||
id: 'test-deepseek',
|
||
name: 'Test DeepSeek Chat',
|
||
enabled: true,
|
||
providerMeta: adapter.getProvider(),
|
||
modelMeta: chatModel,
|
||
connectionConfig: {
|
||
apiKey: process.env.VITE_DEEPSEEK_API_KEY!
|
||
},
|
||
paramOverrides: {}
|
||
};
|
||
await modelManager.addModel('test-deepseek', modelConfig);
|
||
testModelKey = 'test-deepseek';
|
||
} else if (availableModel === 'openai') {
|
||
const adapter = registry.getAdapter('openai');
|
||
const modelConfig: TextModelConfig = {
|
||
id: 'test-openai',
|
||
name: 'Test OpenAI',
|
||
enabled: true,
|
||
providerMeta: adapter.getProvider(),
|
||
modelMeta: adapter.getModels()[0],
|
||
connectionConfig: {
|
||
apiKey: process.env.VITE_OPENAI_API_KEY!,
|
||
baseURL: process.env.VITE_OPENAI_BASE_URL
|
||
},
|
||
paramOverrides: {}
|
||
};
|
||
await modelManager.addModel('test-openai', modelConfig);
|
||
testModelKey = 'test-openai';
|
||
}
|
||
});
|
||
|
||
/**
|
||
* 格式化输出优化结果
|
||
*/
|
||
const printOptimizationResult = (
|
||
scenarioName: string,
|
||
templateName: string,
|
||
originalContent: string,
|
||
optimizedContent: string,
|
||
evaluationPoints: string[]
|
||
) => {
|
||
console.log('\n' + '='.repeat(80));
|
||
console.log(`🎭 场景: ${scenarioName}`);
|
||
console.log(`📝 模板: ${templateName}`);
|
||
console.log('='.repeat(80));
|
||
console.log('\n📌 原始消息:');
|
||
console.log(' ' + originalContent.split('\n').join('\n '));
|
||
console.log('\n✨ 优化结果:');
|
||
console.log(' ' + optimizedContent.split('\n').join('\n '));
|
||
console.log('\n' + '-'.repeat(80));
|
||
console.log('💡 评估要点:');
|
||
evaluationPoints.forEach((point, index) => {
|
||
console.log(` ${index + 1}. ${point}`);
|
||
});
|
||
console.log('='.repeat(80) + '\n');
|
||
};
|
||
|
||
// 自动遍历所有场景和模板生成测试
|
||
TEST_SCENARIOS.forEach((scenario) => {
|
||
describe(`场景: ${scenario.name}`, () => {
|
||
TEMPLATES_TO_TEST.forEach((template) => {
|
||
it.runIf(!!availableModel)(
|
||
`${template.name} - ${scenario.description}`,
|
||
async () => {
|
||
const request: MessageOptimizationRequest = {
|
||
selectedMessageId: scenario.selectedMessageId,
|
||
messages: scenario.messages,
|
||
modelKey: testModelKey,
|
||
templateId: template.id
|
||
};
|
||
|
||
const result = await promptService.optimizeMessage(request);
|
||
const originalMessage = scenario.messages.find(
|
||
m => m.id === scenario.selectedMessageId
|
||
)!;
|
||
|
||
printOptimizationResult(
|
||
scenario.name,
|
||
template.name,
|
||
originalMessage.content,
|
||
result,
|
||
scenario.evaluationPoints
|
||
);
|
||
},
|
||
TEST_TIMEOUT
|
||
);
|
||
});
|
||
});
|
||
});
|
||
|
||
// 跳过测试的占位
|
||
it.skipIf(!availableModel)('跳过测试 - 未设置 API 密钥', () => {
|
||
console.log('⚠️ 请设置以下环境变量之一来运行测试:');
|
||
console.log(' - VITE_DEEPSEEK_API_KEY');
|
||
console.log(' - VITE_OPENAI_API_KEY');
|
||
});
|
||
});
|