Examples
This page contains practical examples for common use cases with the Ashr Labs TypeScript SDK.
Table of Contents
- Basic Setup
- Working with Datasets
- Building Runs with RunBuilder
- VM Stream Logs
- Observability — Production Tracing
- Managing Test Runs
- Submitting Requests
- CI/CD Integration
- Batch Operations
- Monitoring and Reporting
Basic Setup
Quickest Start
import { AshrLabsClient } from "ashr-labs";
// Only need your API key — everything else is automatic
const client = new AshrLabsClient("tp_your_api_key_here");
const datasets = await client.listDatasets();
From Environment Variables
import { AshrLabsClient } from "ashr-labs";
// Reads ASHR_LABS_API_KEY (required) and ASHR_LABS_BASE_URL (optional)
const client = AshrLabsClient.fromEnv();
const datasets = await client.listDatasets();
Explicit Session Info
import { AshrLabsClient } from "ashr-labs";
const client = new AshrLabsClient("tp_your_api_key_here");
// init() is called automatically on first API call, but you can call it explicitly
const session = await client.init();
const user = session.user as Record<string, unknown>;
const tenant = session.tenant as Record<string, unknown>;
console.log(`User: ${user.email}`);
console.log(`Tenant: ${tenant.tenant_name}`);
Working with Datasets
List All Datasets with Pagination
async function getAllDatasets(client: AshrLabsClient) {
const allDatasets: Record<string, unknown>[] = [];
let offset = 0;
const limit = 50;
while (true) {
const response = await client.listDatasets(undefined, limit, offset);
const datasets = response.datasets as Record<string, unknown>[];
allDatasets.push(...datasets);
if (datasets.length < limit) break;
offset += limit;
}
return allDatasets;
}
// Usage
const datasets = await getAllDatasets(client);
console.log(`Total datasets: ${datasets.length}`);
Download Dataset Media Files
import { writeFile, mkdir } from "node:fs/promises";
import { join } from "node:path";
async function downloadDatasetFiles(
client: AshrLabsClient,
datasetId: number,
outputDir: string,
) {
await mkdir(outputDir, { recursive: true });
// Get dataset with signed URLs
const dataset = await client.getDataset(datasetId, true, 3600);
const source = (dataset.dataset_source ?? {}) as Record<string, unknown>;
const actions = (source.actions ?? []) as Record<string, unknown>[];
const downloaded: string[] = [];
for (const action of actions) {
const url = action.signed_url as string | undefined;
if (!url) continue;
const actionId = action.action_id as string;
// Determine file extension from URL or default to .bin
let ext = ".bin";
if (url.includes(".mp3")) ext = ".mp3";
else if (url.includes(".wav")) ext = ".wav";
else if (url.includes(".json")) ext = ".json";
const filename = join(outputDir, `${actionId}${ext}`);
console.log(`Downloading ${actionId}...`);
const response = await fetch(url);
const buffer = Buffer.from(await response.arrayBuffer());
await writeFile(filename, buffer);
downloaded.push(filename);
}
return downloaded;
}
// Usage
const files = await downloadDatasetFiles(client, 42, "./downloads");
console.log(`Downloaded ${files.length} files`);
Search Datasets by Name
async function findDatasetsByName(client: AshrLabsClient, searchTerm: string) {
const allDatasets = await getAllDatasets(client);
return allDatasets.filter((d) =>
((d.name as string) ?? "").toLowerCase().includes(searchTerm.toLowerCase()),
);
}
// Usage
const datasets = await findDatasetsByName(client, "audio");
for (const d of datasets) {
console.log(`- ${d.name} (ID: ${d.id})`);
}
Building Runs with RunBuilder
Basic RunBuilder Usage
import { AshrLabsClient, RunBuilder } from "ashr-labs";
const client = new AshrLabsClient("tp_...");
const run = new RunBuilder();
run.start();
const test = run.addTest("bank_analysis");
test.start();
// Record user actions
test.addUserFile(
"datasets/tenant_1/dataset_9/bank_analysis/action_0.pdf",
"User submits bank statement PDF",
);
test.addUserText(
"Please analyze this bank statement and summarize key transactions.",
"User asks for analysis",
);
// Record agent tool calls with expected vs actual
test.addToolCall(
{ name: "extract_pdf_content", arguments: { file_path: "bank_statement.pdf" } },
{ name: "extract_pdf_content", arguments: { file_path: "bank_statement.pdf", pages: "all" } },
"partial",
"Extra 'pages' argument in actual call",
);
test.addToolCall(
{ name: "analyze_transactions", arguments: { account_holder: "Rohan", period: "last_month" } },
{ name: "analyze_transactions", arguments: { account_holder: "Rohan", period: "last_month" } },
"exact",
);
// Record agent text responses
test.addAgentResponse(
{ summary: "Based on the bank statement analysis..." },
{ summary: "After analyzing the bank statement..." },
"similar",
0.89,
"Slightly different wording but same key information",
);
test.complete();
run.complete();
// Deploy to the API
const createdRun = await run.deploy(client, 42);
console.log(`Run #${createdRun.id} created`);
Multiple Tests in a Single Run
import { RunBuilder } from "ashr-labs";
const run = new RunBuilder();
run.start();
// First test
const test1 = run.addTest("pdf_extraction");
test1.start();
test1.addUserFile("data/invoice.pdf", "Upload invoice");
test1.addToolCall(
{ name: "extract_pdf", arguments: { file: "invoice.pdf" } },
{ name: "extract_pdf", arguments: { file: "invoice.pdf" } },
"exact",
);
test1.complete();
// Second test
const test2 = run.addTest("summary_generation");
test2.start();
test2.addUserText("Summarize the invoice", "User request");
test2.addAgentResponse(
{ summary: "Invoice total: $500" },
{ summary: "The invoice totals $500" },
"similar",
0.93,
);
test2.complete();
run.complete();
// Inspect the built result before deploying
const result = run.build();
const metrics = result.aggregate_metrics as Record<string, unknown>;
console.log(`Total tests: ${metrics.total_tests}`);
console.log(`Tests passed: ${metrics.tests_passed}`);
Using build() for Inspection Before Deploy
const run = new RunBuilder();
run.start();
const test = run.addTest("my_test");
test.start();
test.addUserText("Hello", "Greeting");
test.addToolCall(
{ name: "greet", arguments: {} },
{ name: "greet", arguments: { formal: true } },
"partial",
"Extra 'formal' argument",
);
test.complete();
run.complete();
// Inspect the result object
const result = run.build();
console.log(result.aggregate_metrics);
// {
// total_tests: 1,
// tests_passed: 1,
// tests_failed: 0,
// average_similarity_score: null,
// total_tool_call_divergence: 1,
// total_response_divergence: 0,
// }
// Only deploy if satisfied
const metrics = result.aggregate_metrics as Record<string, unknown>;
if ((metrics.tests_passed as number) > 0) {
await run.deploy(client, 42);
}
VM Stream Logs
Attach virtual machine session logs to test results for browser-based or desktop-based agents.
Browser Agent with VM Logs
import { AshrLabsClient, RunBuilder } from "ashr-labs";
const client = new AshrLabsClient("tp_...");
const run = new RunBuilder();
run.start();
const test = run.addTest("checkout_flow");
test.start();
// Record agent actions
test.addToolCall(
{ name: "search_product", arguments: { query: "blue shoes" } },
{ name: "search_product", arguments: { query: "blue shoes size 10" } },
"partial",
"Extra size filter in actual call",
);
test.addAgentResponse(
{ text: "I found the blue shoes. Adding size 10 to cart." },
{ text: "Found them! Adding the blue shoes size 10 to your cart now." },
"similar",
0.87,
);
// Attach the VM session logs from your browser provider
test.setVmStream("browserbase", {
sessionId: "sess_abc123def456",
durationMs: 12000,
logs: [
{ ts: 0, type: "navigation", data: { url: "https://shop.example.com" } },
{ ts: 800, type: "action", data: { action: "click", selector: ".product-card.blue-shoes" } },
{ ts: 1500, type: "navigation", data: { url: "https://shop.example.com/shoes/blue-runner" } },
{ ts: 2200, type: "action", data: { action: "select", selector: "#size-dropdown", value: "10" } },
{ ts: 2800, type: "action", data: { action: "click", selector: "#add-to-cart" } },
{ ts: 3500, type: "network", data: { method: "POST", url: "/api/cart/add", status: 200 } },
],
metadata: {
browser: "chromium",
viewport: { width: 1280, height: 720 },
},
});
test.complete();
run.complete();
// Deploy — the VM logs are included in the run result
const created = await run.deploy(client, 42);
Log Types
Common log entry types your VM provider might emit:
| Type | Description | Example data |
|---|---|---|
navigation | Page navigation | { url: "https://..." } |
action | User interaction | { action: "click", selector: "#btn" } |
network | HTTP request | { method: "POST", url: "/api/...", status: 200 } |
console | Browser console | { level: "warn", message: "Deprecated API" } |
error | Error occurred | { message: "Element not found: #submit" } |
screenshot | Screenshot taken | { s3_key: "vm-streams/.../frame.png" } |
Kernel Browser Session
Use setKernelVm() for Kernel browser sessions — it sets the provider and exposes Kernel-specific metadata as named parameters:
test.setKernelVm("kern_sess_abc123", {
durationMs: 15000,
logs: [
{ ts: 0, type: "navigation", data: { url: "https://app.example.com" } },
{ ts: 1200, type: "action", data: { action: "click", selector: "#login" } },
{ ts: 2500, type: "action", data: { action: "type", selector: "#email", value: "user@example.com" } },
{ ts: 3800, type: "action", data: { action: "click", selector: "#submit" } },
{ ts: 5000, type: "navigation", data: { url: "https://app.example.com/dashboard" } },
{ ts: 8000, type: "screenshot", data: { s3_key: "vm-streams/.../dashboard.png" } },
],
replayId: "replay_abc123",
replayViewUrl: "https://www.kernel.sh/replays/replay_abc123",
stealth: true,
viewport: { width: 1920, height: 1080 },
});
Minimal VM Stream (Logs Only)
You don't need all fields — at minimum just pass the provider and logs:
test.setVmStream("steel", {
logs: [
{ ts: 0, type: "navigation", data: { url: "https://app.example.com" } },
{ ts: 5000, type: "error", data: { message: "Login failed: invalid credentials" } },
],
});
Observability — Production Tracing
Full Agent Trace with wrap()
import { AshrLabsClient } from "ashr-labs";
const client = new AshrLabsClient("tp_...");
await client.trace("support-agent", {
userId: "user_42",
sessionId: "conv_001",
metadata: { version: "1.0" },
tags: ["prod"],
}).wrap(async (trace) => {
// LLM call: classify intent
const gen = trace.generation("classify-intent", {
model: "claude-sonnet-4-6",
input: [{ role: "user", content: "I can't log in" }],
});
const classification = await callLlm(...);
gen.end({
output: { intent: "account_lockout", confidence: 0.95 },
usage: { input_tokens: 45, output_tokens: 18 },
});
// Tool call: lookup account
await trace.span("tool:lookup_account", { input: { user_id: "user_42" } }).wrap(async (s) => {
const result = await lookupAccount("user_42");
s.end({ output: { status: "locked" } });
});
// Tool call: unlock account
await trace.span("tool:unlock_account", { input: { user_id: "user_42" } }).wrap(async (s) => {
await unlockAccount("user_42");
s.end({ output: { success: true } });
});
// Nested: compose response with guardrail
await trace.span("compose-response").wrap(async (compose) => {
const gen2 = compose.generation("generate-reply", { model: "claude-sonnet-4-6" });
const reply = await callLlm(...);
gen2.end({ output: { content: reply }, usage: { input_tokens: 80, output_tokens: 25 } });
compose.event("guardrail:pii-check", { input: { pii_detected: false } });
compose.end({ output: { response: reply } });
});
trace.event("guardrail:toxicity", { input: { toxic: false }, level: "DEFAULT" });
});
// trace.end() called automatically — never rejects
Error Tracking
If wrap() callback throws, the span auto-ends with level: "ERROR":
await client.trace("risky-agent").wrap(async (trace) => {
await trace.span("tool:external_api").wrap(async (s) => {
const response = await callExternalApi(...); // if this throws...
s.end({ output: response });
});
// ...the span auto-ends with level="ERROR" and the error re-throws
});
// The trace still flushes — you can see the error in analytics
Analytics
const analytics = await client.getObservabilityAnalytics(7);
const { overview, tool_performance, model_usage } = analytics;
console.log(`Traces: ${overview.total_traces}`);
console.log(`Tokens: ${overview.total_input_tokens} in / ${overview.total_output_tokens} out`);
console.log(`Error rate: ${overview.error_rate}`);
for (const tool of tool_performance) {
console.log(` ${tool.tool_name}: ${tool.total_calls} calls, ${tool.error_rate} error rate`);
}
for (const model of model_usage) {
console.log(` ${model.model}: ${model.total_calls} calls, ${model.total_tokens} tokens`);
}
// Error log
const errors = await client.getObservabilityErrors({ days: 7, limit: 10 });
for (const t of errors.traces) {
console.log(` ${t.trace_name}: ${t.error_count} errors`);
}
Filtering Traces
// By user
const traces = await client.listObservabilityTraces({ userId: "user_42" });
// By session
const session = await client.listObservabilityTraces({ sessionId: "conv_001" });
// Pagination
const page1 = await client.listObservabilityTraces({ limit: 20, page: 1 });
Managing Test Runs
Create a Comprehensive Test Run
async function submitTestResults(
client: AshrLabsClient,
datasetId: number,
testResults: Record<string, unknown>,
metadata?: Record<string, unknown>,
) {
const result = {
timestamp: new Date().toISOString(),
status: testResults.status ?? "unknown",
score: testResults.score,
metrics: testResults.metrics ?? {},
test_cases: testResults.test_cases ?? [],
metadata: metadata ?? {},
environment: {
node_version: process.version,
platform: process.platform,
},
};
return client.createRun(datasetId, result);
}
// Usage
const testResults = {
status: "passed",
score: 0.95,
metrics: {
accuracy: 0.98,
precision: 0.96,
recall: 0.94,
f1_score: 0.95,
},
test_cases: [
{ name: "test_audio_quality", passed: true, duration_ms: 150 },
{ name: "test_voice_match", passed: true, duration_ms: 200 },
{ name: "test_latency", passed: true, duration_ms: 50 },
],
};
const run = await submitTestResults(client, 42, testResults, {
version: "1.0.0",
branch: "main",
});
Compare Test Runs
async function compareRuns(
client: AshrLabsClient,
runId1: number,
runId2: number,
) {
const run1 = await client.getRun(runId1);
const run2 = await client.getRun(runId2);
const metrics1 = ((run1.result as Record<string, unknown>)?.metrics ?? {}) as Record<string, number>;
const metrics2 = ((run2.result as Record<string, unknown>)?.metrics ?? {}) as Record<string, number>;
const comparison: Record<string, Record<string, number>> = {};
const allKeys = new Set([...Object.keys(metrics1), ...Object.keys(metrics2)]);
for (const key of allKeys) {
const val1 = metrics1[key];
const val2 = metrics2[key];
if (val1 != null && val2 != null) {
const diff = val2 - val1;
const pctChange = val1 !== 0 ? (diff / val1) * 100 : 0;
comparison[key] = {
run_1: val1,
run_2: val2,
diff,
pct_change: Math.round(pctChange * 100) / 100,
};
}
}
return comparison;
}
// Usage
const comparison = await compareRuns(client, 100, 101);
for (const [metric, values] of Object.entries(comparison)) {
const sign = values.pct_change >= 0 ? "+" : "";
console.log(`${metric}: ${values.run_1} -> ${values.run_2} (${sign}${values.pct_change}%)`);
}
Get Latest Run for Dataset
async function getLatestRun(client: AshrLabsClient, datasetId: number) {
const response = await client.listRuns(datasetId, undefined, 1);
const runs = (response.runs ?? []) as Record<string, unknown>[];
return runs.length > 0 ? runs[0] : null;
}
// Usage
const latest = await getLatestRun(client, 42);
if (latest) {
const result = latest.result as Record<string, unknown>;
console.log(`Latest run: #${latest.id} - ${result.status}`);
}
Submitting Requests
Audio Generation Request
async function requestAudioGeneration(
client: AshrLabsClient,
text: string,
voice = "alloy",
format = "mp3",
) {
return client.createRequest(
`Audio: ${text.slice(0, 30)}...`,
{
agent: { name: "Audio Generator", description: "Generates audio from text" },
context: { domain: "audio", use_case: "Text to speech" },
type: "audio_generation",
text,
voice,
format,
speed: 1.0,
},
);
}
// Usage
const req = await requestAudioGeneration(client, "Welcome to our testing platform!", "nova");
console.log(`Request #${req.id} submitted`);
Poll for Request Completion
async function waitForCompletion(
client: AshrLabsClient,
requestId: number,
timeout = 300,
) {
const start = Date.now();
while ((Date.now() - start) / 1000 < timeout) {
const request = await client.getRequest(requestId);
const status = request.request_status as string;
if (status === "completed") return request;
if (status === "failed") {
throw new Error(`Request failed: ${request.error}`);
}
await new Promise((r) => setTimeout(r, 5000)); // Poll every 5 seconds
}
throw new Error(`Request ${requestId} did not complete within ${timeout}s`);
}
// Usage
const req = await requestAudioGeneration(client, "Hello");
const completed = await waitForCompletion(client, req.id as number);
console.log(`Request completed: ${JSON.stringify(completed)}`);
CI/CD Integration
GitHub Actions Integration
#!/usr/bin/env npx tsx
/**
* CI/CD script for running tests against Ashr Labs.
*/
import { AshrLabsClient, AshrLabsError } from "ashr-labs";
async function main() {
// Get configuration from environment
const client = AshrLabsClient.fromEnv(); // reads ASHR_LABS_API_KEY
const datasetId = parseInt(process.env.ASHR_LABS_DATASET_ID!);
// Run your tests (placeholder)
console.log("Running tests...");
const testResults = runTests(); // Your test function
// Submit results
try {
const run = await client.createRun(datasetId, {
status: testResults.success ? "passed" : "failed",
score: testResults.score,
metrics: testResults.metrics,
commit: process.env.GITHUB_SHA ?? "unknown",
branch: process.env.GITHUB_REF_NAME ?? "unknown",
workflow: process.env.GITHUB_WORKFLOW ?? "unknown",
});
console.log(`Results submitted: Run #${run.id}`);
// Exit with appropriate code
process.exit(testResults.success ? 0 : 1);
} catch (e) {
if (e instanceof AshrLabsError) {
console.log(`Failed to submit results: ${e}`);
}
process.exit(1);
}
}
function runTests() {
// Placeholder for your actual test logic
return {
success: true,
score: 0.95,
metrics: { accuracy: 0.98 },
};
}
main();
GitHub Actions Workflow
# .github/workflows/test.yml
name: Run Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"
- name: Install dependencies
run: npm ci
- name: Run tests and submit results
env:
ASHR_LABS_API_KEY: ${{ secrets.ASHR_LABS_API_KEY }}
ASHR_LABS_DATASET_ID: ${{ vars.DATASET_ID }}
run: npx tsx scripts/run_tests.ts
Batch Operations
Batch Create Runs
async function batchCreateRuns(
client: AshrLabsClient,
runConfigs: { datasetId: number; result: Record<string, unknown> }[],
maxConcurrency = 5,
) {
const results: Record<string, unknown>[] = [];
const errors: { config: typeof runConfigs[number]; error: string }[] = [];
// Process in batches
for (let i = 0; i < runConfigs.length; i += maxConcurrency) {
const batch = runConfigs.slice(i, i + maxConcurrency);
const batchResults = await Promise.allSettled(
batch.map((config) =>
client.createRun(config.datasetId, config.result),
),
);
for (let j = 0; j < batchResults.length; j++) {
const result = batchResults[j];
if (result.status === "fulfilled") {
results.push(result.value);
} else {
errors.push({ config: batch[j], error: String(result.reason) });
}
}
}
return { created: results, errors };
}
// Usage
const configs = [
{ datasetId: 1, result: { score: 0.95 } },
{ datasetId: 2, result: { score: 0.87 } },
{ datasetId: 3, result: { score: 0.92 } },
];
const result = await batchCreateRuns(client, configs);
console.log(`Created ${result.created.length} runs, ${result.errors.length} errors`);
Export All Runs to CSV
import { writeFile } from "node:fs/promises";
async function exportRunsToCsv(client: AshrLabsClient, outputFile: string) {
// Fetch all runs
const allRuns: Record<string, unknown>[] = [];
let offset = 0;
const limit = 100;
while (true) {
const response = await client.listRuns(undefined, undefined, limit, offset);
const runs = (response.runs ?? []) as Record<string, unknown>[];
allRuns.push(...runs);
if (runs.length < limit) break;
offset += limit;
}
// Write to CSV
const rows = ["ID,Dataset,Created At,Status,Score"];
for (const run of allRuns) {
const result = (run.result ?? {}) as Record<string, unknown>;
rows.push(
[
run.id,
run.dataset,
run.created_at,
result.status ?? "unknown",
result.score ?? "N/A",
].join(","),
);
}
await writeFile(outputFile, rows.join("\n"));
return allRuns.length;
}
// Usage
const count = await exportRunsToCsv(client, "runs_export.csv");
console.log(`Exported ${count} runs to runs_export.csv`);
Monitoring and Reporting
Generate Test Report
async function generateWeeklyReport(client: AshrLabsClient) {
// Get runs from the API
const response = await client.listRuns(undefined, undefined, 1000);
const allRuns = (response.runs ?? []) as Record<string, unknown>[];
// Filter to last 7 days
const cutoff = Date.now() - 7 * 24 * 60 * 60 * 1000;
const recentRuns = allRuns.filter((r) => {
const created = new Date(r.created_at as string).getTime();
return created > cutoff;
});
// Aggregate stats
let passed = 0;
let failed = 0;
const scores: number[] = [];
const byDataset = new Map<number, { passed: number; failed: number; scores: number[] }>();
for (const run of recentRuns) {
const result = (run.result ?? {}) as Record<string, unknown>;
const status = result.status as string;
const score = result.score as number | undefined;
const datasetId = run.dataset as number;
if (!byDataset.has(datasetId)) {
byDataset.set(datasetId, { passed: 0, failed: 0, scores: [] });
}
const dsStats = byDataset.get(datasetId)!;
if (status === "passed") {
passed++;
dsStats.passed++;
} else if (status === "failed") {
failed++;
dsStats.failed++;
}
if (score != null) {
scores.push(score);
dsStats.scores.push(score);
}
}
const avgScore =
scores.length > 0
? scores.reduce((a, b) => a + b, 0) / scores.length
: null;
const passRate =
recentRuns.length > 0 ? (passed / recentRuns.length) * 100 : 0;
return { total_runs: recentRuns.length, passed, failed, avgScore, passRate, byDataset };
}
// Usage
const report = await generateWeeklyReport(client);
console.log("Weekly Report");
console.log("=============");
console.log(`Total Runs: ${report.total_runs}`);
console.log(`Pass Rate: ${report.passRate.toFixed(1)}%`);
console.log(`Average Score: ${report.avgScore?.toFixed(3) ?? "N/A"}`);
Monitor API Key Usage
async function checkApiKeyHealth(client: AshrLabsClient) {
const keys = await client.listApiKeys(true);
const report = {
total: keys.length,
active: 0,
inactive: 0,
expiringSoon: [] as Record<string, unknown>[],
neverUsed: [] as Record<string, unknown>[],
};
for (const key of keys) {
if (key.is_active) {
report.active++;
// Check if expiring within 7 days
if (key.expires_at) {
const expires = new Date(key.expires_at as string).getTime();
const sevenDaysFromNow = Date.now() + 7 * 24 * 60 * 60 * 1000;
if (expires < sevenDaysFromNow) {
report.expiringSoon.push(key);
}
}
// Check if never used
if (!key.last_used_at) {
report.neverUsed.push(key);
}
} else {
report.inactive++;
}
}
return report;
}
// Usage
const health = await checkApiKeyHealth(client);
console.log("API Key Health Report");
console.log(`Active: ${health.active}, Inactive: ${health.inactive}`);
if (health.expiringSoon.length > 0) {
console.log("\nKeys expiring soon:");
for (const key of health.expiringSoon) {
console.log(` - ${key.name} (${key.key_prefix}...)`);
}
}
if (health.neverUsed.length > 0) {
console.log("\nKeys never used:");
for (const key of health.neverUsed) {
console.log(` - ${key.name} (${key.key_prefix}...)`);
}
}