Testing Your Prompts
Validate prompt quality with automated testing.
Testing During Development
Use Ollama for fast, free local testing:
import { createProvider } from "@mzhub/promptc";
// Free local testing
const testProvider = createProvider("ollama", {
defaultModel: "llama3.2"
});
// Production provider
const prodProvider = createProvider("openai", {
apiKey: process.env.OPENAI_API_KEY
});
// Use testProvider during development
const program = new ChainOfThought(schema, testProvider);Unit Testing with Vitest
tests/extractor.test.js
import { describe, it, expect } from "vitest";
import { extractNames } from "../src/services/extractor.js";
describe("extractNames", () => {
it("should extract single name", async () => {
const names = await extractNames("Bill Gates is a philanthropist.");
expect(names).toContain("Bill Gates");
});
it("should extract multiple names", async () => {
const names = await extractNames(
"Steve Jobs and Tim Cook led Apple."
);
expect(names).toContain("Steve Jobs");
expect(names).toContain("Tim Cook");
});
it("should return empty array for no names", async () => {
const names = await extractNames("The weather is nice today.");
expect(names).toEqual([]);
});
});API Costs
Running tests against real APIs costs money. Consider mocking for unit tests and using real APIs only for integration tests.
Mocking LLM Responses
tests/mock-provider.js
// Create a mock provider for testing
function createMockProvider(responses) {
let callIndex = 0;
return {
name: "mock",
defaultModel: "mock-model",
async complete({ prompt }) {
const response = responses[callIndex % responses.length];
callIndex++;
return {
content: JSON.stringify(response),
usage: { inputTokens: 10, outputTokens: 20 }
};
}
};
}
// Usage in tests
const mockProvider = createMockProvider([
{ names: ["Alice", "Bob"] },
{ names: [] },
{ names: ["Charlie"] }
]);
const program = new Predict(schema, mockProvider);
const result = await program.run({ text: "test" });
expect(result.result.names).toEqual(["Alice", "Bob"]);Eval Sets
Maintain a test dataset to measure prompt quality:
tests/eval-set.json
[
{
"input": { "text": "Elon Musk runs Tesla." },
"expected": { "names": ["Elon Musk"] }
},
{
"input": { "text": "No names here." },
"expected": { "names": [] }
},
{
"input": { "text": "Meeting with Sarah and John." },
"expected": { "names": ["Sarah", "John"] }
}
]tests/eval.test.js
import { describe, it, expect } from "vitest";
import { exactMatch, arrayOverlap } from "@mzhub/promptc";
import evalSet from "./eval-set.json";
import { extractNames } from "../src/services/extractor.js";
describe("Eval Set", () => {
const evaluator = arrayOverlap();
it("should pass evaluation set", async () => {
let totalScore = 0;
for (const { input, expected } of evalSet) {
const result = await extractNames(input.text);
const score = evaluator(result, expected.names);
totalScore += score;
}
const avgScore = totalScore / evalSet.length;
console.log(`Average score: ${(avgScore * 100).toFixed(1)}%`);
// Require at least 80% average
expect(avgScore).toBeGreaterThan(0.8);
});
});Regression Testing
Ensure prompt updates don't break existing functionality:
// scripts/regression-test.js
import { readFileSync } from "fs";
import evalSet from "../tests/eval-set.json";
async function runRegressionTest() {
// Load current production prompt
const currentPrompt = JSON.parse(
readFileSync("prompts/current.json", "utf-8")
);
// Load new candidate prompt
const newPrompt = JSON.parse(
readFileSync("prompts/candidate.json", "utf-8")
);
const currentScore = await evaluatePrompt(currentPrompt, evalSet);
const newScore = await evaluatePrompt(newPrompt, evalSet);
console.log(`Current: ${currentScore.toFixed(2)}`);
console.log(`New: ${newScore.toFixed(2)}`);
if (newScore < currentScore * 0.95) {
console.error("❌ Regression detected! New prompt is >5% worse.");
process.exit(1);
}
console.log("✅ No regression detected.");
}CI Integration
.github/workflows/test.yml
name: Test Prompts
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install dependencies
run: npm ci
# Use mock provider for unit tests
- name: Unit tests
run: npm test
# Use real API for integration tests (on main only)
- name: Integration tests
if: github.ref == 'refs/heads/main'
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: npm run test:integrationBack to: Documentation Home