ashr Python SDK

Programmatically run evals, manage datasets, and submit test results for your AI agents.

The ashr SDK provides a simple, Pythonic interface for interacting with the ashr API. Use it to:

Retrieve datasets – Access test datasets and their associated media files
Manage test runs – Create, retrieve, and delete test runs with results
Build runs incrementally – Use RunBuilder to construct run results as your agent executes
Submit requests – Create and track generation requests
Manage API keys – List and revoke API keys for your tenant

Requirements

Python 3.10 or higher
No external dependencies required

Quick Install

pip install ashr-labs

Quick Example

from ashr_labs import AshrLabsClient, RunBuilder

# Initialize the client
client = AshrLabsClient(
    api_key="tp_your_api_key_here",
    base_url="https://api.ashr.io/testing-platform-api"
)

# List your datasets
response = client.list_datasets(tenant_id=1)
for dataset in response["datasets"]:
    print(f"- {dataset['name']} (ID: {dataset['id']})")

# Build a run incrementally as your agent executes
run = RunBuilder()
run.start()

test = run.add_test("bank_analysis")
test.start()
test.add_user_text(text="Analyze this statement", description="User prompt")
test.add_tool_call(
    expected={"tool_name": "extract_pdf", "arguments": {"file": "a.pdf"}},
    actual={"tool_name": "extract_pdf", "arguments": {"file": "a.pdf"}},
    match_status="exact",
)
test.complete()

run.complete()
run.deploy(client, tenant_id=1, dataset_id=42)

Quick Links

Installation

Install the SDK from PyPI or source

Quick Start

Get up and running in minutes

Authentication

API key setup and best practices

API Reference

Full method and type reference

Error Handling

Exception classes and retry patterns

Examples

CI/CD, batch ops, and more

Installation

Install the ashr SDK and get ready to build.

Requirements

Python 3.10 or higher
No external dependencies (uses only Python standard library)

Install from PyPI

pip install ashr-labs

Install from Source

git clone https://github.com/ashr-labs/ashr-labs.git
cd ashr-labs/sdk
pip install -e .

Install with Dev Dependencies

If you want to contribute or run tests:

pip install -e ".[dev]"

This installs additional packages:

pytest – Testing framework
pytest-cov – Code coverage
black – Code formatter
mypy – Type checker
ruff – Linter

Verify Installation

from ashr_labs import AshrLabsClient, __version__

print(f"ashr SDK v{__version__}")

Virtual Environment (Recommended)

# Create virtual environment
python -m venv venv

# Activate it
source venv/bin/activate  # Linux/macOS
# or
venv\Scripts\activate     # Windows

# Install the SDK
pip install ashr-labs

Upgrading

pip install --upgrade ashr-labs

Quick Start Guide

Get started with the ashr SDK in just a few minutes.

Step 1: Get Your API Key

Log in to the ashr web interface
Navigate to Settings > API Keys
Click Create API Key
Give it a name and select the appropriate scopes
Copy the key (it starts with tp_) – you won't be able to see it again!

Step 2: Initialize the Client

from ashr_labs import AshrLabsClient

client = AshrLabsClient(
    api_key="tp_your_api_key_here",
    base_url="https://api.ashr.io/testing-platform-api"
)

Step 3: Initialize Session

Validate your credentials and get user/tenant information:

session = client.init()

print(f"Logged in as: {session['user']['email']}")
print(f"Tenant: {session['tenant']['name']}")
print(f"User ID: {session['user']['id']}")
print(f"Tenant ID: {session['tenant']['id']}")

Step 4: List Datasets

response = client.list_datasets(tenant_id=1)

print(f"Found {len(response['datasets'])} datasets:")
for dataset in response["datasets"]:
    print(f"  - {dataset['name']} (ID: {dataset['id']})")

Step 5: Get a Specific Dataset

dataset = client.get_dataset(
    dataset_id=42,
    include_signed_urls=True,
    url_expires_seconds=3600
)

print(f"Dataset: {dataset['name']}")

source = dataset.get("dataset_source", {})
for action in source.get("actions", []):
    print(f"  Action: {action['action_id']}")
    if "signed_url" in action:
        print(f"    Download URL: {action['signed_url']}")

Step 6: Build and Deploy a Test Run

Use the RunBuilder to incrementally construct run results as your agent executes, then deploy them when ready:

from ashr_labs import RunBuilder

run = RunBuilder()
run.start()

test = run.add_test("bank_analysis")
test.start()

# Record user inputs
test.add_user_file(
    file_path="datasets/tenant_1/dataset_9/bank_analysis/action_0.pdf",
    description="User submits bank statement PDF"
)
test.add_user_text(
    text="Please analyze this bank statement and summarize key transactions.",
    description="User asks for analysis"
)

# Record agent tool calls (expected vs actual)
test.add_tool_call(
    expected={"tool_name": "extract_pdf_content", "arguments": {"file_path": "bank_statement.pdf"}},
    actual={"tool_name": "extract_pdf_content", "arguments": {"file_path": "bank_statement.pdf", "pages": "all"}},
    match_status="partial",
    divergence_notes="Extra 'pages' argument in actual call",
)

# Record agent text responses
test.add_agent_response(
    expected_response={"summary": "Based on the bank statement analysis..."},
    actual_response={"summary": "After analyzing the bank statement..."},
    match_status="similar",
    semantic_similarity=0.89,
    divergence_notes="Slightly different wording but same key information",
)

test.complete()
run.complete()

# Deploy the run to the API
run.deploy(client, tenant_id=1, dataset_id=42)

Step 7: Submit a Request

request = client.create_request(
    tenant_id=1,
    requestor_id=5,
    request_name="Audio Generation Request",
    request={
        "text": "Welcome to the Ashr Labs!",
        "voice": "alloy",
        "format": "mp3",
        "speed": 1.0
    }
)

print(f"Request #{request['id']} created with status: {request['request_status']}")

Complete Example

from ashr_labs import AshrLabsClient, RunBuilder, NotFoundError

client = AshrLabsClient(
    api_key="tp_your_api_key_here",
    base_url="https://api.ashr.io/testing-platform-api"
)

def main():
    # 1. Initialize session
    session = client.init()
    tenant_id = session['tenant']['id']

    # 2. Fetch the dataset
    try:
        dataset = client.get_dataset(dataset_id=42, include_signed_urls=True)
        print(f"Dataset: {dataset['name']}")
    except NotFoundError:
        print("Dataset not found!")
        return

    # 3. Build run results incrementally
    run_builder = RunBuilder()
    run_builder.start()

    test = run_builder.add_test("test_1")
    test.start()
    test.add_user_text(text="Analyze this data", description="User prompt")
    test.add_tool_call(
        expected={"tool_name": "analyze", "arguments": {"data": "input"}},
        actual={"tool_name": "analyze", "arguments": {"data": "input"}},
        match_status="exact",
    )
    test.complete()
    run_builder.complete()

    # 4. Deploy the run
    run = run_builder.deploy(client, tenant_id=tenant_id, dataset_id=42)
    print(f"Run #{run['id']} created!")

if __name__ == "__main__":
    main()

Authentication

API key setup, security best practices, and key management.

API Key Format

Prefix: tp_ (ashr)
Length: 32+ characters after the prefix
Example: tp_abc123def456ghi789jkl012mno345pq

Creating an API Key

API keys can only be created through the web interface using OAuth authentication:

Log in to the ashr web interface
Navigate to Settings > API Keys
Click Create API Key
Configure your key: name, scopes, and optional expiration date
Click Create
Important: Copy the key immediately – it will only be shown once!

Using the API Key

Basic Usage

from ashr_labs import AshrLabsClient

client = AshrLabsClient(
    api_key="tp_your_api_key_here",
    base_url="https://api.ashr.io/testing-platform-api"
)

Environment Variables (Recommended)

# Set environment variables
export ASHR_LABS_API_KEY="tp_your_api_key_here"
export ASHR_LABS_BASE_URL="https://api.ashr.io/testing-platform-api"

import os
from ashr_labs import AshrLabsClient

client = AshrLabsClient(
    api_key=os.environ["ASHR_LABS_API_KEY"],
    base_url=os.environ["ASHR_LABS_BASE_URL"]
)

Security tip: Never commit API keys to git. Add .env and config.py to your .gitignore.

API Key Permissions

Operation	API Key Access
`get_dataset`	Yes
`list_datasets`	Yes
`create_run`	Yes
`get_run`	Yes
`list_runs`	Yes
`delete_run`	Yes
`create_request`	Yes
`get_request`	Yes
`list_requests`	Yes
`list_api_keys`	Yes
`revoke_api_key`	Yes
`create_api_key`	No (OAuth only)
Tenant management	No (OAuth only)
User management	No (OAuth only)
File uploads	No (OAuth only)

Managing API Keys

List Your API Keys

api_keys = client.list_api_keys()
for key in api_keys:
    print(f"Key: {key['key_prefix']}... | Name: {key['name']}")
    print(f"  Created: {key['created_at']}")
    print(f"  Active: {key['is_active']}")

Revoke an API Key

client.revoke_api_key(api_key_id=123)
print("API key revoked successfully")

Security Best Practices

1. Never Commit API Keys

# .gitignore
.env
config.py
*_secret*
*_key*

2. Use Environment Variables in CI/CD

# GitHub Actions example
jobs:
  test:
    runs-on: ubuntu-latest
    env:
      ASHR_LABS_API_KEY: ${{ secrets.ASHR_LABS_API_KEY }}
    steps:
      - run: python run_tests.py

3. Rotate Keys Regularly

Create new keys periodically and revoke old ones. Set expiration dates when creating keys through the web interface.

Troubleshooting

Invalid API Key

from ashr_labs import AshrLabsClient, AuthenticationError

try:
    client = AshrLabsClient(
        api_key="invalid_key",
        base_url="https://api.ashr.io/testing-platform-api"
    )
except ValueError as e:
    print(f"Invalid key format: {e}")

# If the key format is valid but the key itself is invalid:
try:
    datasets = client.list_datasets(tenant_id=1)
except AuthenticationError as e:
    print(f"Authentication failed: {e}")

API Reference

Complete reference for all classes and methods in the ashr SDK.

AshrLabsClient

The main client class for interacting with the ashr API.

Constructor

AshrLabsClient(api_key: str, base_url: str, timeout: int = 30)

Parameter	Type	Required	Default	Description
`api_key`	`str`	Yes	–	Your API key (must start with `tp_`)
`base_url`	`str`	Yes	–	Base URL of the API
`timeout`	`int`	No	`30`	Request timeout in seconds

Session Methods

init()

Initialize a session and validate authentication.

init() -> Session

Returns: Session – Session information containing user and tenant data.

Raises: AuthenticationError if the API key is invalid or expired.

session = client.init()
print(f"User ID: {session['user']['id']}")
print(f"Email: {session['user']['email']}")
print(f"Tenant ID: {session['tenant']['id']}")

Dataset Methods

get_dataset()

get_dataset(dataset_id: int, include_signed_urls: bool = False, url_expires_seconds: int = 3600) -> Dataset

Parameter	Type	Required	Default	Description
`dataset_id`	`int`	Yes	–	The ID of the dataset
`include_signed_urls`	`bool`	No	`False`	Include signed S3 URLs for media
`url_expires_seconds`	`int`	No	`3600`	URL expiration time in seconds

Raises: NotFoundError, AuthorizationError

list_datasets()

list_datasets(tenant_id: int, limit: int = 50, offset: int = 0, include_signed_urls: bool = False, url_expires_seconds: int = 3600) -> dict

Parameter	Type	Required	Default	Description
`tenant_id`	`int`	Yes	–	The tenant ID
`limit`	`int`	No	`50`	Maximum results to return
`offset`	`int`	No	`0`	Number of results to skip
`include_signed_urls`	`bool`	No	`False`	Include signed S3 URLs
`url_expires_seconds`	`int`	No	`3600`	URL expiration time

Returns: dict with keys status and datasets.

Run Methods

create_run()

create_run(tenant_id: int, dataset_id: int, result: dict, runner_id: int | None = None) -> Run

Parameter	Type	Required	Default	Description
`tenant_id`	`int`	Yes	–	The tenant ID
`dataset_id`	`int`	Yes	–	The dataset ID
`result`	`dict`	Yes	–	Run results (metrics, status, etc.)
`runner_id`	`int`	No	`None`	ID of user who ran the test

get_run()

get_run(run_id: int) -> Run

Raises: NotFoundError

list_runs()

list_runs(tenant_id: int | None = None, dataset_id: int | None = None, limit: int = 50, offset: int = 0) -> dict

delete_run()

delete_run(run_id: int) -> dict

Raises: NotFoundError

Request Methods

create_request()

create_request(tenant_id: int, requestor_id: int, request_name: str, request: dict, request_input_schema: dict | None = None) -> Request

Parameter	Type	Required	Default	Description
`tenant_id`	`int`	Yes	–	The tenant ID
`requestor_id`	`int`	Yes	–	ID of requesting user
`request_name`	`str`	Yes	–	Name/title for the request
`request`	`dict`	Yes	–	The request payload
`request_input_schema`	`dict`	No	`None`	JSON schema for validation

get_request()

get_request(request_id: int) -> Request

Raises: NotFoundError

list_requests()

list_requests(tenant_id: int, status: str | None = None, limit: int = 50, offset: int = 0) -> dict

API Key Methods

list_api_keys()

list_api_keys(include_inactive: bool = False) -> list[APIKey]

For security, only the key prefix is returned, not the full key.

revoke_api_key()

revoke_api_key(api_key_id: int) -> dict

Raises: NotFoundError

Utility Methods

health_check()

health_check() -> dict

status = client.health_check()
print(f"API Status: {status['status']}")

RunBuilder

A builder for incrementally constructing run result objects as an agent executes tests. Once complete, the result can be deployed via the client.

Constructor

RunBuilder()

No parameters. Creates a run in "pending" status.

start()

run.start() -> RunBuilder

Mark the run as started. Records the current timestamp. Returns self for chaining.

add_test()

run.add_test(test_id: str) -> TestBuilder

Create and register a new test within this run. Returns a TestBuilder for the individual test.

complete()

run.complete(status: str = "completed") -> RunBuilder

Mark the run as completed. status can be "completed" or "failed".

build()

run.build() -> dict

Serialize the full run result to a dict. Aggregate metrics are computed automatically from action results.

deploy()

run.deploy(client: AshrLabsClient, tenant_id: int, dataset_id: int, runner_id: int | None = None) -> dict

Build the result and submit it as a new run via the API.

TestBuilder

Builds a single test result incrementally. Returned by RunBuilder.add_test().

start()

test.start() -> TestBuilder

add_user_file()

test.add_user_file(file_path: str, description: str, action_index: int | None = None) -> TestBuilder

add_user_text()

test.add_user_text(text: str, description: str, action_index: int | None = None) -> TestBuilder

add_tool_call()

test.add_tool_call(expected: dict, actual: dict, match_status: str, divergence_notes: str | None = None, action_index: int | None = None) -> TestBuilder

Parameter	Type	Description
`expected`	`dict`	Expected tool call (`tool_name`, `arguments`)
`actual`	`dict`	Actual tool call made by the agent
`match_status`	`str`	`"exact"`, `"partial"`, or `"mismatch"`
`divergence_notes`	`str`	Notes explaining the divergence

add_agent_response()

test.add_agent_response(expected_response: dict, actual_response: dict, match_status: str, semantic_similarity: float | None = None, divergence_notes: str | None = None, action_index: int | None = None) -> TestBuilder

Parameter	Type	Description
`expected_response`	`dict`	The expected response content
`actual_response`	`dict`	The actual response from the agent
`match_status`	`str`	`"exact"`, `"similar"`, or `"divergent"`
`semantic_similarity`	`float`	Similarity score (0.0 to 1.0)

complete()

test.complete(status: str = "completed") -> TestBuilder

build()

test.build() -> dict

Returns a dict with test_id, status, action_results, started_at, and completed_at.

Data Types

class User(TypedDict, total=False):
    id: int
    created_at: str
    email: str
    name: str | None
    tenant: int
    is_active: bool

class Tenant(TypedDict, total=False):
    id: int
    created_at: str
    name: str
    is_active: bool

class Session(TypedDict):
    status: str
    user: User
    tenant: Tenant

class Dataset(TypedDict, total=False):
    id: int
    created_at: str
    tenant: int
    creator: int
    name: str
    description: str | None
    dataset_source: dict[str, Any]

class Run(TypedDict, total=False):
    id: int
    created_at: str
    dataset: int
    tenant: int
    runner: int
    result: dict[str, Any]

class Request(TypedDict, total=False):
    id: int
    created_at: str
    requestor_id: int
    requestor_tenant: int
    request_name: str
    request_status: str
    request_input_schema: dict[str, Any] | None
    request: dict[str, Any]

class APIKey(TypedDict, total=False):
    id: int
    key: str  # Only present on creation
    key_prefix: str
    name: str
    scopes: list[str]
    user_id: int
    tenant_id: int
    created_at: str
    last_used_at: str | None
    expires_at: str | None
    is_active: bool

Error Handling

Exception classes and patterns for handling errors gracefully.

Exception Hierarchy

AshrLabsError (base) ├── AuthenticationError (401) ├── AuthorizationError (403) ├── NotFoundError (404) ├── ValidationError (422) ├── RateLimitError (429) └── ServerError (5xx)

Exception Classes

AshrLabsError

Base exception for all SDK errors.

Attributes:

message (str) – Error message
status_code (int | None) – HTTP status code if applicable
response (dict | None) – Raw response body if available

from ashr_labs import AshrLabsError

try:
    result = client.get_dataset(dataset_id=42)
except AshrLabsError as e:
    print(f"Error: {e.message}")
    print(f"Status Code: {e.status_code}")
    print(f"Response: {e.response}")

AuthenticationError

Raised when API key authentication fails (HTTP 401). Common causes: invalid, expired, or revoked API key.

AuthorizationError

Raised when the API key lacks permission (HTTP 403). Common causes: accessing resources in a different tenant, using API key for OAuth-only endpoints.

NotFoundError

Raised when a requested resource doesn't exist (HTTP 404).

ValidationError

Raised when request validation fails (HTTP 422). Common causes: missing required fields, invalid field types, schema validation failure.

RateLimitError

Raised when rate limits are exceeded (HTTP 429).

ServerError

Raised when the server encounters an internal error (HTTP 5xx).

Best Practices

Catch Specific Exceptions

from ashr_labs import (
    AuthenticationError,
    AuthorizationError,
    NotFoundError,
    ValidationError,
    RateLimitError,
    ServerError,
    AshrLabsError,
)

try:
    dataset = client.get_dataset(dataset_id=42)
except AuthenticationError:
    handle_auth_failure()
except AuthorizationError:
    show_permission_error()
except NotFoundError:
    show_not_found_message()
except ValidationError as e:
    show_validation_errors(e.response)
except RateLimitError:
    schedule_retry()
except ServerError:
    show_temporary_error()
except AshrLabsError as e:
    log_unexpected_error(e)

Retry with Exponential Backoff

import time
from ashr_labs import RateLimitError, ServerError

def robust_request(func, *args, max_retries=3, **kwargs):
    last_error = None
    for attempt in range(max_retries):
        try:
            return func(*args, **kwargs)
        except (RateLimitError, ServerError) as e:
            last_error = e
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            continue
        except AshrLabsError:
            raise
    raise last_error

# Usage
dataset = robust_request(client.get_dataset, dataset_id=42)

User-Friendly Error Messages

ERROR_MESSAGES = {
    AuthenticationError: "Your session has expired. Please log in again.",
    AuthorizationError: "You don't have permission to perform this action.",
    NotFoundError: "The requested item could not be found.",
    ValidationError: "Please check your input and try again.",
    RateLimitError: "Too many requests. Please wait a moment and try again.",
    ServerError: "We're experiencing technical difficulties. Please try again later.",
}

def get_user_message(error):
    for error_type, message in ERROR_MESSAGES.items():
        if isinstance(error, error_type):
            return message
    return "An unexpected error occurred."

Debugging Tips

import logging

# Enable debug logging for the SDK
logging.basicConfig(level=logging.DEBUG)

# Check the raw response in exceptions
try:
    result = client.get_dataset(dataset_id=42)
except AshrLabsError as e:
    print(f"Status: {e.status_code}")
    print(f"Message: {e.message}")
    print(f"Raw response: {e.response}")

Examples

Practical examples for common use cases with the ashr SDK.

Basic Setup

Environment-Based Configuration

import os
from ashr_labs import AshrLabsClient

def get_client():
    api_key = os.environ.get("ASHR_LABS_API_KEY")
    base_url = os.environ.get("ASHR_LABS_URL", "https://api.ashr.io/testing-platform-api")

    if not api_key:
        raise RuntimeError("ASHR_LABS_API_KEY environment variable not set")

    return AshrLabsClient(api_key=api_key, base_url=base_url)

client = get_client()

Initialize Session

session = client.init()

print(f"User: {session['user']['email']}")
print(f"Tenant: {session['tenant']['name']}")

# Use tenant_id from session for subsequent calls
tenant_id = session['tenant']['id']
datasets = client.list_datasets(tenant_id=tenant_id)

Working with Datasets

List All Datasets with Pagination

def get_all_datasets(client, tenant_id: int):
    all_datasets = []
    offset = 0
    limit = 50

    while True:
        response = client.list_datasets(
            tenant_id=tenant_id, limit=limit, offset=offset
        )
        datasets = response["datasets"]
        all_datasets.extend(datasets)

        if len(datasets) < limit:
            break
        offset += limit

    return all_datasets

datasets = get_all_datasets(client, tenant_id=1)
print(f"Total datasets: {len(datasets)}")

Download Dataset Media Files

import urllib.request
from pathlib import Path

def download_dataset_files(client, dataset_id: int, output_dir: str):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    dataset = client.get_dataset(
        dataset_id=dataset_id,
        include_signed_urls=True,
        url_expires_seconds=3600
    )

    source = dataset.get("dataset_source", {})
    for action in source.get("actions", []):
        if "signed_url" not in action:
            continue
        action_id = action["action_id"]
        url = action["signed_url"]
        filename = output_path / f"{action_id}.bin"
        urllib.request.urlretrieve(url, filename)

files = download_dataset_files(client, dataset_id=42, output_dir="./downloads")

Building Runs with RunBuilder

Multiple Tests in a Single Run

from ashr_labs import RunBuilder

run = RunBuilder()
run.start()

# First test
test1 = run.add_test("pdf_extraction")
test1.start()
test1.add_user_file(file_path="data/invoice.pdf", description="Upload invoice")
test1.add_tool_call(
    expected={"tool_name": "extract_pdf", "arguments": {"file": "invoice.pdf"}},
    actual={"tool_name": "extract_pdf", "arguments": {"file": "invoice.pdf"}},
    match_status="exact",
)
test1.complete()

# Second test
test2 = run.add_test("summary_generation")
test2.start()
test2.add_user_text(text="Summarize the invoice", description="User request")
test2.add_agent_response(
    expected_response={"summary": "Invoice total: $500"},
    actual_response={"summary": "The invoice totals $500"},
    match_status="similar",
    semantic_similarity=0.93,
)
test2.complete()

run.complete()

# Inspect before deploying
result = run.build()
print(f"Total tests: {result['aggregate_metrics']['total_tests']}")
print(f"Tests passed: {result['aggregate_metrics']['tests_passed']}")

Inspect Before Deploy

result = run.build()
print(result["aggregate_metrics"])
# {'total_tests': 1, 'tests_passed': 1, 'tests_failed': 0,
#  'average_similarity_score': None, 'total_tool_call_divergence': 1,
#  'total_response_divergence': 0}

# Only deploy if satisfied
if result["aggregate_metrics"]["tests_passed"] > 0:
    run.deploy(client, tenant_id=1, dataset_id=42)

CI/CD Integration

GitHub Actions Script

#!/usr/bin/env python3
import os, sys
from ashr_labs import AshrLabsClient, AshrLabsError

def main():
    api_key = os.environ["ASHR_LABS_API_KEY"]
    base_url = os.environ.get("ASHR_LABS_URL", "https://api.ashr.io/testing-platform-api")
    tenant_id = int(os.environ["ASHR_LABS_TENANT_ID"])
    dataset_id = int(os.environ["ASHR_LABS_DATASET_ID"])

    client = AshrLabsClient(api_key=api_key, base_url=base_url)

    test_results = run_tests()  # Your test function

    try:
        run = client.create_run(
            tenant_id=tenant_id,
            dataset_id=dataset_id,
            result={
                "status": "passed" if test_results["success"] else "failed",
                "score": test_results["score"],
                "commit": os.environ.get("GITHUB_SHA", "unknown"),
                "branch": os.environ.get("GITHUB_REF_NAME", "unknown"),
            }
        )
        print(f"Results submitted: Run #{run['id']}")
        sys.exit(0 if test_results["success"] else 1)
    except AshrLabsError as e:
        print(f"Failed to submit results: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()

GitHub Actions Workflow

# .github/workflows/test.yml
name: Run Tests

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Install dependencies
        run: pip install ashr-labs

      - name: Run tests and submit results
        env:
          ASHR_LABS_API_KEY: ${{ secrets.ASHR_LABS_API_KEY }}
          ASHR_LABS_TENANT_ID: ${{ secrets.TENANT_ID }}
          ASHR_LABS_DATASET_ID: ${{ vars.DATASET_ID }}
        run: python scripts/run_tests.py

Batch Operations

Batch Create Runs

from concurrent.futures import ThreadPoolExecutor, as_completed

def batch_create_runs(client, tenant_id, run_configs, max_workers=5):
    results = []
    errors = []

    def create_single_run(config):
        return client.create_run(
            tenant_id=tenant_id,
            dataset_id=config["dataset_id"],
            result=config["result"]
        )

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_config = {
            executor.submit(create_single_run, c): c
            for c in run_configs
        }
        for future in as_completed(future_to_config):
            try:
                results.append(future.result())
            except Exception as e:
                errors.append(str(e))

    return {"created": results, "errors": errors}

Export Runs to CSV

import csv

def export_runs_to_csv(client, tenant_id, output_file):
    all_runs = []
    offset = 0

    while True:
        response = client.list_runs(tenant_id=tenant_id, limit=100, offset=offset)
        runs = response.get("runs", [])
        all_runs.extend(runs)
        if len(runs) < 100:
            break
        offset += 100

    with open(output_file, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["ID", "Dataset", "Created At", "Status", "Score"])
        for run in all_runs:
            result = run.get("result", {})
            writer.writerow([
                run["id"], run["dataset"], run["created_at"],
                result.get("status", "unknown"), result.get("score", "N/A")
            ])

    return len(all_runs)

Monitoring and Reporting

Generate Weekly Report

from collections import defaultdict
from datetime import datetime, timedelta

def generate_weekly_report(client, tenant_id):
    response = client.list_runs(tenant_id=tenant_id, limit=1000)
    all_runs = response.get("runs", [])

    cutoff = datetime.utcnow() - timedelta(days=7)
    recent_runs = [
        r for r in all_runs
        if datetime.fromisoformat(
            r["created_at"].replace("Z", "+00:00")
        ).replace(tzinfo=None) > cutoff
    ]

    stats = {"total": len(recent_runs), "passed": 0, "failed": 0, "scores": []}
    for run in recent_runs:
        result = run.get("result", {})
        if result.get("status") == "passed":
            stats["passed"] += 1
        elif result.get("status") == "failed":
            stats["failed"] += 1
        if result.get("score") is not None:
            stats["scores"].append(result["score"])

    if stats["scores"]:
        stats["avg_score"] = sum(stats["scores"]) / len(stats["scores"])

    return stats

report = generate_weekly_report(client, tenant_id=1)
print(f"Total Runs: {report['total']}")
print(f"Pass Rate: {report['passed'] / report['total'] * 100:.1f}%")

Monitor API Key Health

from datetime import datetime, timedelta

def check_api_key_health(client):
    keys = client.list_api_keys(include_inactive=True)
    active = [k for k in keys if k["is_active"]]
    inactive = [k for k in keys if not k["is_active"]]

    print(f"Active: {len(active)}, Inactive: {len(inactive)}")

    for key in active:
        if not key.get("last_used_at"):
            print(f"  Never used: {key['name']} ({key['key_prefix']}...)")

check_api_key_health(client)