Evaluators API

Built-in functions to score LLM outputs against expected values.

Evaluator Type

type Evaluator<O> = (
  prediction: O,
  groundTruth: O
) => number | Promise<number>;

// Returns a score between 0 (no match) and 1 (perfect match)

exactMatch()

Returns 1.0 if prediction deeply equals groundTruth, 0.0 otherwise.

function exactMatch<O>(): Evaluator<O>
import { exactMatch } from "@mzhub/promptc";

const evaluator = exactMatch();

evaluator({ a: 1 }, { a: 1 });  // 1.0
evaluator({ a: 1 }, { a: 2 });  // 0.0
evaluator([1, 2], [1, 2]);      // 1.0
evaluator([1, 2], [2, 1]);      // 0.0 (order matters)

partialMatch()

Returns fraction of object fields that match.

function partialMatch<O extends object>(): Evaluator<O>
import { partialMatch } from "@mzhub/promptc";

const evaluator = partialMatch();

evaluator(
  { a: 1, b: 2, c: 3 },
  { a: 1, b: 2, c: 4 }
);  // 0.666 (2/3 fields match)

evaluator(
  { a: 1, b: 2 },
  { a: 1, b: 2 }
);  // 1.0

arrayOverlap()

Computes Jaccard similarity (intersection over union) for arrays.

function arrayOverlap<T>(): Evaluator<T[]>
import { arrayOverlap } from "@mzhub/promptc";

const evaluator = arrayOverlap();

evaluator(["a", "b", "c"], ["a", "b", "c"]);  // 1.0
evaluator(["a", "b"], ["b", "c"]);            // 0.33 (1 shared / 3 unique)
evaluator(["a"], []);                         // 0.0
evaluator([], []);                            // 1.0 (both empty)

llmJudge()

Uses an LLM to score output quality. Returns async Promise.

function llmJudge<O>(config: LLMJudgeConfig): Evaluator<O>

interface LLMJudgeConfig {
  provider: LLMProvider;
  criteria?: string;  // Custom evaluation criteria
}
import { llmJudge, createProvider } from "@mzhub/promptc";

const provider = createProvider("openai", {
  apiKey: process.env.OPENAI_API_KEY
});

const evaluator = llmJudge({
  provider,
  criteria: "accuracy, completeness, and clarity"
});

// Async - returns Promise<number>
const score = await evaluator(
  { summary: "Generated summary..." },
  { summary: "Expected summary..." }
);  // e.g., 0.85

Creating Custom Evaluators

// Simple custom evaluator
const containsAllKeywords = (prediction, groundTruth) => {
  const keywords = groundTruth.keywords || [];
  const text = (prediction.text || "").toLowerCase();
  
  if (keywords.length === 0) return 1.0;
  
  const found = keywords.filter(k => 
    text.includes(k.toLowerCase())
  ).length;
  
  return found / keywords.length;
};

// Async custom evaluator
const asyncEvaluator = async (prediction, groundTruth) => {
  // Do something async...
  const response = await fetch("https://my-eval-api.com/score", {
    method: "POST",
    body: JSON.stringify({ prediction, groundTruth })
  });
  const { score } = await response.json();
  return score;
};

// Use with compiler
const compiler = new BootstrapFewShot(containsAllKeywords);