From 25e2ae3f20c06e874649f3a0f01008f62b3e6230 Mon Sep 17 00:00:00 2001 From: Michael D'Angelo Date: Sat, 25 Jan 2025 23:33:01 -0800 Subject: [PATCH] feat(integration): add promptfoo LLM testing framework Add promptfoo to the awesome-deepseek-integration library with: - English and Chinese documentation - Basic setup and usage guides - Example configuration for DeepSeek model testing - Integration entry in both README.md and README_cn.md --- README.md | 5 +++ README_cn.md | 7 +++- docs/promptfoo/README.md | 70 +++++++++++++++++++++++++++++++++++++ docs/promptfoo/README_cn.md | 70 +++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 docs/promptfoo/README.md create mode 100644 docs/promptfoo/README_cn.md diff --git a/README.md b/README.md index 4af271f..e3e28a5 100644 --- a/README.md +++ b/README.md @@ -323,4 +323,9 @@ English/[简体中文](https://github.com/deepseek-ai/awesome-deepseek-integrati Geneplore AI Geneplore AI runs one of the largest AI Discord bots, now with Deepseek v3 and R1. + + Icon + promptfoo + Test and evaluate LLM prompts, including DeepSeek models. Compare different LLM providers, catch regressions, and evaluate responses. + \ No newline at end of file diff --git a/README_cn.md b/README_cn.md index 5fb57ba..2765cef 100644 --- a/README_cn.md +++ b/README_cn.md @@ -100,7 +100,7 @@ Icon LiberSonora - LiberSonora,寓意“自由的声音”,是一个 AI 赋能的、强大的、开源有声书工具集,包含智能字幕提取、AI标题生成、多语言翻译等功能,支持 GPU 加速、批量离线处理 + LiberSonora,寓意"自由的声音",是一个 AI 赋能的、强大的、开源有声书工具集,包含智能字幕提取、AI标题生成、多语言翻译等功能,支持 GPU 加速、批量离线处理 Icon @@ -242,4 +242,9 @@ n8n-nodes-deepseek 一个 N8N 的社区节点,支持直接使用 DeepSeek API 集成到工作流中 + + Icon + promptfoo + 测试和评估LLM提示,包括DeepSeek模型。比较不同的LLM提供商,捕获回归,并评估响应。 + diff --git a/docs/promptfoo/README.md b/docs/promptfoo/README.md new file mode 100644 index 0000000..05ead39 --- /dev/null +++ b/docs/promptfoo/README.md @@ -0,0 +1,70 @@ +# promptfoo + +[promptfoo](https://promptfoo.dev) is an open-source framework for testing and evaluating LLM outputs. It helps you compare DeepSeek models with other LLMs (like o1, GPT-4o, Claude 3.5, Llama3.3, and Gemini) and test LLMs and LLM applications for security vulnerabilities. You can: + +- Run side-by-side comparisons between models +- Check output quality and consistency +- Generate test reports + +## Setup + +1. Install promptfoo: + +```bash +npm install -g promptfoo +# or +brew install promptfoo +``` + +2. Configure API keys: + +```bash +export DEEPSEEK_API_KEY=your_api_key +# Add other API keys as needed +``` + +## Quick Start + +Create a configuration file `promptfooconfig.yaml`: + +```yaml +providers: + - deepseek:deepseek-reasoner # DeepSeek-R1 + - openai:o1 + +prompts: + - 'Solve this step by step: {{math_problem}}' + +tests: + - vars: + math_problem: 'What is the derivative of x^3 + 2x with respect to x?' + assert: + - type: contains + value: '3x^2' # Check for correct answer + - type: llm-rubric + value: 'Response shows clear steps' + - type: cost + threshold: 0.05 # Maximum cost per request +``` + +Run tests: + +```bash +promptfoo eval +``` + +View results in your browser: + +```bash +promptfoo view +``` + +## Example Project + +Check out our [example](https://github.com/promptfoo/promptfoo/tree/main/examples/deepseek-r1-vs-openai-o1) that compares r1 and o1 on MMLU. + +## Resources + +- [Documentation](https://promptfoo.dev/docs/providers/deepseek) +- [GitHub Repository](https://github.com/promptfoo/promptfoo) +- [Community Discord](https://discord.gg/promptfoo) diff --git a/docs/promptfoo/README_cn.md b/docs/promptfoo/README_cn.md new file mode 100644 index 0000000..9ead3ae --- /dev/null +++ b/docs/promptfoo/README_cn.md @@ -0,0 +1,70 @@ +# promptfoo + +[promptfoo](https://promptfoo.dev) 是一个开源框架,用于测试和评估 LLM 输出。它可以帮助您将 DeepSeek 模型与其他 LLM(如 o1、GPT-4o、Claude 3.5、Llama 3.3 和 Gemini)进行比较,并测试 LLM 及其应用的安全漏洞。您可以: + +- 对不同模型进行并排比较 +- 检查输出质量和一致性 +- 生成测试报告 + +## 安装设置 + +1. 安装 promptfoo: + +```bash +npm install -g promptfoo +# 或者使用 brew +brew install promptfoo +``` + +2. 配置 API 密钥: + +```bash +export DEEPSEEK_API_KEY=your_api_key +# 根据需要添加其他 API 密钥 +``` + +## 快速开始 + +创建配置文件 `promptfooconfig.yaml`: + +```yaml +providers: + - deepseek:deepseek-reasoner # DeepSeek-R1 + - openai:o1 + +prompts: + - '请逐步解决这个问题:{{math_problem}}' + +tests: + - vars: + math_problem: '求 x^3 + 2x 对 x 的导数' + assert: + - type: contains + value: '3x^2' # 检查正确答案 + - type: llm-rubric + value: '回答需要展示清晰的步骤' + - type: cost + threshold: 0.05 # 每次请求的最大成本 +``` + +运行测试: + +```bash +promptfoo eval +``` + +在浏览器中查看结果: + +```bash +promptfoo view +``` + +## 示例项目 + +查看我们的[示例](https://github.com/promptfoo/promptfoo/tree/main/examples/deepseek-r1-vs-openai-o1),展示了 r1 和 o1 在 MMLU 上的比较。 + +## 资源 + +- [文档](https://promptfoo.dev/docs/providers/deepseek) +- [GitHub 仓库](https://github.com/promptfoo/promptfoo) +- [社区 Discord](https://discord.gg/promptfoo)