Quick Start Guide
This guide will help you get started with the Ashr Labs SDK in just a few minutes.
For a complete end-to-end walkthrough of testing your agent, see Testing Your Agent.
Step 1: Get Your API Key
Before using the SDK, you need an API key:
- Log in at lab.ashr.io
- Click API Keys in the sidebar
- Click Create New Key
- Give it a name and pick an expiration
- Copy the key (it starts with
tp_) — you won't be able to see it again!
Step 2: Initialize the Client
from ashr_labs import AshrLabsClient
# Only need your API key — base_url defaults to production
client = AshrLabsClient(api_key="tp_your_api_key_here")
# Or load from environment variable
# client = AshrLabsClient.from_env() # reads ASHR_LABS_API_KEY
Step 3: List Datasets
# tenant_id is auto-resolved from your API key
datasets = client.list_datasets()
print(f"Found {len(datasets['datasets'])} datasets:")
for d in datasets["datasets"]:
print(f" - {d['name']} (ID: {d['id']})")
Step 4: Get a Dataset
Retrieve a dataset with signed URLs for downloading media files:
dataset = client.get_dataset(
dataset_id=42,
include_signed_urls=True,
url_expires_seconds=3600 # URLs valid for 1 hour
)
print(f"Dataset: {dataset['name']}")
# Access test scenarios
source = dataset["dataset_source"]
for run_id, scenario in source["runs"].items():
print(f" Scenario: {scenario['title']} ({len(scenario['actions'])} actions)")
Step 5: Run an Eval with EvalRunner
The easiest way to test your agent. Any object with respond() and reset() methods works:
from ashr_labs import EvalRunner
# Fetch a dataset and run your agent against it
runner = EvalRunner.from_dataset(client, dataset_id=42)
run = runner.run(my_agent)
# Submit results — grading happens server-side
created = run.deploy(client, dataset_id=42)
# Wait for grading to complete (typically 1-3 minutes)
graded = client.poll_run(created["id"])
metrics = graded["result"]["aggregate_metrics"]
print(f"Passed: {metrics['tests_passed']}/{metrics['total_tests']}")
Your agent just needs these two methods:
class MyAgent:
def respond(self, message: str) -> dict:
# Call your LLM, return {"text": "...", "tool_calls": [...]}
return {"text": "response", "tool_calls": []}
def reset(self) -> None:
# Clear conversation history
pass
See Testing Your Agent for a full walkthrough with a complete agent example.
Step 6: Build and Deploy Manually (Advanced)
For custom eval loops, use RunBuilder directly:
from ashr_labs import RunBuilder
run = RunBuilder()
run.start()
test = run.add_test("bank_analysis")
test.start()
test.add_user_text(
text="Please analyze this bank statement.",
description="User asks for analysis"
)
test.add_tool_call(
expected={"name": "extract_pdf_content", "arguments_json": '{"file": "statement.pdf"}'},
actual={"name": "extract_pdf_content", "arguments_json": '{"file": "statement.pdf", "pages": "all"}'},
match_status="partial",
divergence_notes="Extra 'pages' argument in actual call",
)
test.add_agent_response(
expected_response={"text": "Based on the bank statement analysis..."},
actual_response={"text": "After analyzing the bank statement..."},
match_status="similar",
semantic_similarity=0.89,
)
test.complete()
run.complete()
run.deploy(client, dataset_id=42)
Step 7: Submit a Request
Create a generation request:
request = client.create_request(
request_name="Audio Generation Request",
request={
"text": "Welcome to Ashr Labs!",
"voice": "alloy",
"format": "mp3",
}
)
print(f"Request #{request['id']} created: {request['request_status']}")
Step 8: Organize with Agents (Optional)
Agents group your datasets and configure how they're graded. If you're running evals for the same agent across multiple datasets, create an agent to keep things consistent:
# Create an agent with grading config
agent = client.create_agent(
name="Support Bot",
description="Spanish-language scheduling agent",
config={
"tool_definitions": [
{"name": "fetch_kareo_data", "required": True},
{"name": "end_session", "required": False},
],
"grading_config": {
"tool_strictness": {
"fetch_kareo_data": "required",
"end_session": "optional",
},
},
},
)
# Link your dataset and deploy runs under this agent
client.set_dataset_agent(dataset_id=42, agent_id=agent["id"])
run.deploy(client, dataset_id=42, agent_id=agent["id"])
The grading system uses the agent's config to make smarter decisions — optional tools that the agent handles via text responses are recovered as partial matches instead of failures.
Complete Example
from ashr_labs import AshrLabsClient, EvalRunner, NotFoundError
client = AshrLabsClient(api_key="tp_your_api_key_here")
def main():
# 1. Fetch the dataset and run the eval
try:
runner = EvalRunner.from_dataset(client, dataset_id=42)
except NotFoundError:
print("Dataset not found!")
return
# 2. Run agent against all scenarios
run = runner.run(
my_agent,
on_scenario=lambda sid, s: print(f"Running: {s.get('title', sid)}"),
)
# 3. Submit and wait for grading
created = run.deploy(client, dataset_id=42)
print(f"Run #{created['id']} submitted — waiting for grading...")
graded = client.poll_run(created["id"])
metrics = graded["result"]["aggregate_metrics"]
print(f"Passed: {metrics['tests_passed']}/{metrics['total_tests']}")
print(f"Tool divergences: {metrics.get('total_tool_call_divergence', 0)}")
if __name__ == "__main__":
main()
Next Steps
- Testing Your Agent — full end-to-end integration guide
- VM Integration — browser/desktop agents with VM stream logging
- Authentication — env vars,
from_env(), security best practices - API Reference — complete method documentation (includes Agents, VM Streams)
- Examples — CI/CD, batch operations, reporting