Error Handling

ModelRelay returns structured errors with consistent formats across all endpoints. This guide covers error response formats, error codes, retry strategies, and best practices for production applications.

Error Response Format

All API errors return a JSON object with this structure:

{
  "error": "NOT_FOUND",
  "code": "NOT_FOUND",
  "message": "Customer not found",
  "request_id": "req_abc123"
}

Field	Description
`error`	Error type (matches `code`)
`code`	Machine-readable error code for programmatic handling
`message`	Human-readable error description
`request_id`	Unique request identifier for debugging
`fields`	Array of field-level errors (validation errors only)

Validation Errors

Validation errors include a fields array with specific field failures:

{
  "error": "VALIDATION_ERROR",
  "code": "VALIDATION_ERROR",
  "message": "Invalid request",
  "fields": [
    { "field": "email", "message": "Invalid email format" },
    { "field": "tier_id", "message": "Tier not found" }
  ]
}

HTTP Status Codes

Status	Description	Retry?
`400`	Bad Request - Invalid input or validation error	No
`401`	Unauthorized - Invalid or missing credentials	No
`402`	Payment Required - Customer quota exceeded	No
`403`	Forbidden - Insufficient permissions	No
`404`	Not Found - Resource doesn’t exist	No
`405`	Method Not Allowed - HTTP method not supported	No
`409`	Conflict - Resource already exists	No
`429`	Too Many Requests - Rate limited	Yes (with backoff)
`500`	Internal Server Error - Server-side failure	Yes (with backoff)
`502`	Bad Gateway - Upstream provider error	Yes (with backoff)
`503`	Service Unavailable - Temporary overload	Yes (with backoff)
`504`	Gateway Timeout - Upstream timeout	Yes (with backoff)

Error Codes

General Errors

Code	HTTP Status	Description
`NOT_FOUND`	404	Resource not found
`VALIDATION_ERROR`	400	Request validation failed
`INVALID_INPUT`	400	Invalid request parameters
`UNAUTHORIZED`	401	Invalid or missing authentication
`FORBIDDEN`	403	Insufficient permissions for this operation
`CONFLICT`	409	Resource conflict (e.g., duplicate key)
`RATE_LIMIT`	429	Rate limit exceeded
`PAYMENT_REQUIRED`	402	Customer usage quota exceeded
`METHOD_NOT_ALLOWED`	405	HTTP method not supported
`INTERNAL_ERROR`	500	Internal server error
`SERVICE_UNAVAILABLE`	503	Service temporarily unavailable

Identity/Provisioning Errors

Code	HTTP Status	Description
`IDENTITY_REQUIRED`	400	Identity provider and subject required
`EMAIL_REQUIRED`	400	Email required for auto-provisioning
`AUTO_PROVISION_DISABLED`	403	Auto-provisioning disabled for project
`AUTO_PROVISION_MISCONFIGURED`	500	Auto-provision tier not configured correctly

Model Errors

Code	HTTP Status	Description
`MODEL_CAPABILITY_UNSUPPORTED`	400	Model doesn’t support requested capability

SDK Error Types

The SDKs provide typed error classes for different failure modes:

import {
  ModelRelay,
  APIError,
  TransportError,
  ConfigError,
  StreamTimeoutError,
  ErrorCodes,
} from "@modelrelay/sdk";

try {
  const response = await mr.responses.text(
    "claude-sonnet-4-20250514",
    "You are helpful.",
    "Hello!"
  );
} catch (error) {
  if (error instanceof APIError) {
    // Server returned an error response
    console.log("Status:", error.status);
    console.log("Code:", error.code);
    console.log("Message:", error.message);
    console.log("Request ID:", error.requestId);

    // Check specific error types
    if (error.isRateLimit()) {
      console.log("Rate limited, retry later");
    } else if (error.isUnauthorized()) {
      console.log("Invalid API key");
    } else if (error.isValidation()) {
      console.log("Validation errors:", error.fields);
    } else if (error.isNotFound()) {
      console.log("Resource not found");
    }
  } else if (error instanceof TransportError) {
    // Network or connection error
    console.log("Transport error:", error.message);
    console.log("Kind:", error.kind); // "timeout" | "request" | "response"
  } else if (error instanceof StreamTimeoutError) {
    // Streaming timeout
    console.log("Stream timeout:", error.streamKind); // "ttft" | "idle" | "total"
    console.log("Timeout:", error.timeoutMs, "ms");
  } else if (error instanceof ConfigError) {
    // SDK configuration error
    console.log("Config error:", error.message);
  }
}

import (
    "errors"
    "log"

    sdk "github.com/modelrelay/sdk-go"
)

response, err := client.Responses.Text(ctx, model, system, user)
if err != nil {
    var apiErr sdk.APIError
    if errors.As(err, &apiErr) {
        // Server returned an error response
        log.Printf("Status: %d", apiErr.Status)
        log.Printf("Code: %s", apiErr.Code)
        log.Printf("Message: %s", apiErr.Message)
        log.Printf("Request ID: %s", apiErr.RequestID)

        // Check specific error types
        switch apiErr.Code {
        case sdk.ErrCodeRateLimit:
            log.Println("Rate limited, retry later")
        case sdk.ErrCodeUnauthorized:
            log.Println("Invalid API key")
        case sdk.ErrCodeValidation, sdk.ErrCodeInvalidInput:
            log.Println("Validation errors:", apiErr.Fields)
        case sdk.ErrCodeNotFound:
            log.Println("Resource not found")
        case sdk.ErrCodePaymentRequired:
            log.Println("Customer quota exceeded")
        }
        return
    }

    var transportErr sdk.TransportError
    if errors.As(err, &transportErr) {
        log.Printf("Transport error: %s", transportErr.Message)
        return
    }

    var timeoutErr sdk.StreamTimeoutError
    if errors.As(err, &timeoutErr) {
        log.Printf("Stream %s timeout after %s", timeoutErr.Kind, timeoutErr.Timeout)
        return
    }

    var configErr sdk.ConfigError
    if errors.As(err, &configErr) {
        log.Printf("Config error: %s", configErr.Reason)
        return
    }

    log.Fatal(err)
}

use modelrelay::{Client, ResponseBuilder, Error};

let result = ResponseBuilder::new()
    .model("claude-sonnet-4-20250514")
    .system("You are helpful.")
    .user("Hello!")
    .send(&client.responses())
    .await;

match result {
    Ok(response) => println!("{}", response.text()),
    Err(Error::Api(e)) => {
        // Server returned an error response
        eprintln!("Status: {}", e.status);
        eprintln!("Code: {}", e.code);
        eprintln!("Message: {}", e.message);
        if let Some(req_id) = &e.request_id {
            eprintln!("Request ID: {}", req_id);
        }

        // Check specific error types
        match e.code.as_str() {
            "RATE_LIMIT" => eprintln!("Rate limited, retry later"),
            "UNAUTHORIZED" => eprintln!("Invalid API key"),
            "VALIDATION_ERROR" | "INVALID_INPUT" => {
                eprintln!("Validation errors: {:?}", e.fields);
            }
            "NOT_FOUND" => eprintln!("Resource not found"),
            "PAYMENT_REQUIRED" => eprintln!("Customer quota exceeded"),
            _ => {}
        }
    }
    Err(Error::Transport(e)) => {
        // Network or connection error
        eprintln!("Transport error: {}", e.message);
    }
    Err(Error::StreamTimeout(e)) => {
        // Streaming timeout
        eprintln!("Stream {:?} timeout after {:?}", e.kind, e.timeout);
    }
    Err(Error::Config(e)) => {
        // SDK configuration error
        eprintln!("Config error: {}", e.reason);
    }
    Err(e) => return Err(e.into()),
}

Rate Limiting

When you exceed rate limits, the API returns a 429 status with a Retry-After header indicating how many seconds to wait:

HTTP/1.1 429 Too Many Requests
Retry-After: 30
Content-Type: application/json

{
  "error": "RATE_LIMIT",
  "code": "RATE_LIMIT",
  "message": "Rate limit exceeded. Retry after 30 seconds."
}

Handling Rate Limits

async function callWithRetry<T>(
  fn: () => Promise<T>,
  maxRetries = 3
): Promise<T> {
  let lastError: Error | undefined;

  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    try {
      return await fn();
    } catch (error) {
      lastError = error as Error;

      if (error instanceof APIError && error.isRateLimit()) {
        // Exponential backoff: 1s, 2s, 4s, 8s...
        const delay = Math.pow(2, attempt) * 1000;
        console.log(`Rate limited, retrying in ${delay}ms...`);
        await new Promise((resolve) => setTimeout(resolve, delay));
        continue;
      }

      // Don't retry other errors
      throw error;
    }
  }

  throw lastError;
}

// Usage
const response = await callWithRetry(() =>
  mr.responses.text(model, system, user)
);

func callWithRetry[T any](ctx context.Context, fn func() (T, error), maxRetries int) (T, error) {
    var zero T
    var lastErr error

    for attempt := 0; attempt <= maxRetries; attempt++ {
        result, err := fn()
        if err == nil {
            return result, nil
        }
        lastErr = err

        var apiErr sdk.APIError
        if errors.As(err, &apiErr) && apiErr.IsRateLimit() {
            // Exponential backoff: 1s, 2s, 4s, 8s...
            delay := time.Duration(1<<attempt) * time.Second
            log.Printf("Rate limited, retrying in %s...", delay)

            select {
            case <-ctx.Done():
                return zero, ctx.Err()
            case <-time.After(delay):
                continue
            }
        }

        // Don't retry other errors
        return zero, err
    }

    return zero, lastErr
}

// Usage
response, err := callWithRetry(ctx, func() (*sdk.Response, error) {
    return client.Responses.Text(ctx, model, system, user)
}, 3)

use std::time::Duration;
use tokio::time::sleep;
use modelrelay::{Client, ResponseBuilder, Error};

async fn call_with_retry<T, F, Fut>(
    mut f: F,
    max_retries: u32,
) -> Result<T, Error>
where
    F: FnMut() -> Fut,
    Fut: std::future::Future<Output = Result<T, Error>>,
{
    let mut last_error = None;

    for attempt in 0..=max_retries {
        match f().await {
            Ok(result) => return Ok(result),
            Err(Error::Api(ref e)) if e.code == "RATE_LIMIT" => {
                // Exponential backoff: 1s, 2s, 4s, 8s...
                let delay = Duration::from_secs(1 << attempt);
                eprintln!("Rate limited, retrying in {:?}...", delay);
                sleep(delay).await;
                last_error = Some(Error::Api(e.clone()));
            }
            Err(e) => return Err(e),
        }
    }

    Err(last_error.unwrap())
}

// Usage
let response = call_with_retry(
    || async {
        ResponseBuilder::new()
            .model(model)
            .system(system)
            .user(user)
            .send(&client.responses())
            .await
    },
    3,
).await?;

Retry Strategies

Retryable vs Non-Retryable Errors

Retry these errors (with exponential backoff):

429 Rate Limit
500 Internal Server Error
502 Bad Gateway
503 Service Unavailable
504 Gateway Timeout
Network timeouts
Connection errors

Don’t retry these errors:

400 Bad Request (fix your request)
401 Unauthorized (fix credentials)
402 Payment Required (customer quota)
403 Forbidden (permissions issue)
404 Not Found (resource doesn’t exist)
409 Conflict (resolve conflict first)

Exponential Backoff

Use exponential backoff with jitter to avoid thundering herd problems:

function calculateBackoff(attempt: number, baseMs = 1000, maxMs = 30000): number {
  // Exponential: 1s, 2s, 4s, 8s, 16s...
  const exponential = Math.min(baseMs * Math.pow(2, attempt), maxMs);

  // Add jitter: ±25%
  const jitter = exponential * 0.25 * (Math.random() * 2 - 1);

  return Math.round(exponential + jitter);
}

SDK Built-in Retry

The SDKs support automatic retry with configurable policies:

const mr = ModelRelay.fromSecretKey(apiKey, {
  retry: {
    maxRetries: 3,
    initialDelayMs: 1000,
    maxDelayMs: 30000,
    backoffMultiplier: 2,
  },
});

client, err := sdk.NewClientFromSecretKey(apiKey,
    sdk.WithRetryConfig(sdk.RetryConfig{
        MaxAttempts:        3,
        InitialDelay:       time.Second,
        MaxDelay:           30 * time.Second,
        BackoffMultiplier:  2.0,
    }),
)

use std::time::Duration;
use modelrelay::{Client, RetryConfig};

let client = Client::from_secret_key(api_key)?
    .retry_config(RetryConfig {
        max_attempts: 3,
        initial_delay: Duration::from_secs(1),
        max_delay: Duration::from_secs(30),
        backoff_multiplier: 2.0,
    })
    .build()?;

Timeout Handling

Request Timeouts

Configure request timeouts to prevent hung connections:

const mr = ModelRelay.fromSecretKey(apiKey, {
  connectTimeoutMs: 5000,  // 5s to establish connection
  requestTimeoutMs: 60000, // 60s for request completion
});

client, err := sdk.NewClientFromSecretKey(apiKey,
    sdk.WithConnectTimeout(5*time.Second),
    sdk.WithRequestTimeout(60*time.Second),
)

use std::time::Duration;
use modelrelay::Client;

let client = Client::from_secret_key(api_key)?
    .connect_timeout(Duration::from_secs(5))   // 5s to establish connection
    .request_timeout(Duration::from_secs(60))  // 60s for request completion
    .build()?;

Stream Timeouts

For streaming responses, configure granular timeouts:

Timeout	Description	Recommended
TTFT	Time to first token	30-60s
Idle	Max gap between tokens	10-30s
Total	Total stream duration	2-5min

const stream = await mr.responses.stream(req, {
  streamTimeouts: {
    ttftMs: 30000,   // 30s to first token
    idleMs: 15000,   // 15s max between tokens
    totalMs: 120000, // 2min total
  },
});

try {
  for await (const event of stream) {
    // Process events
  }
} catch (error) {
  if (error instanceof StreamTimeoutError) {
    switch (error.streamKind) {
      case "ttft":
        console.log("Model took too long to start responding");
        break;
      case "idle":
        console.log("Stream went silent");
        break;
      case "total":
        console.log("Response exceeded time limit");
        break;
    }
  }
}

stream, err := client.Responses.Stream(ctx, req,
    sdk.WithStreamTTFTTimeout(30*time.Second),
    sdk.WithStreamIdleTimeout(15*time.Second),
    sdk.WithStreamTotalTimeout(2*time.Minute),
)
if err != nil {
    log.Fatal(err)
}
defer stream.Close()

for {
    event, ok, err := stream.Next()
    if err != nil {
        var timeoutErr sdk.StreamTimeoutError
        if errors.As(err, &timeoutErr) {
            switch timeoutErr.Kind {
            case sdk.StreamTimeoutTTFT:
                log.Println("Model took too long to start responding")
            case sdk.StreamTimeoutIdle:
                log.Println("Stream went silent")
            case sdk.StreamTimeoutTotal:
                log.Println("Response exceeded time limit")
            }
        }
        break
    }
    if !ok {
        break
    }
    // Process event
}

use std::time::Duration;
use futures_util::StreamExt;
use modelrelay::{ResponseBuilder, Error, StreamTimeoutKind};

let mut stream = ResponseBuilder::new()
    .model("claude-sonnet-4-20250514")
    .system("You are helpful.")
    .user("Hello!")
    .stream_ttft_timeout(Duration::from_secs(30))   // 30s to first token
    .stream_idle_timeout(Duration::from_secs(15))   // 15s max between tokens
    .stream_total_timeout(Duration::from_secs(120)) // 2min total
    .stream(&client.responses())
    .await?;

while let Some(event) = stream.next().await {
    match event {
        Ok(e) => {
            // Process event
        }
        Err(Error::StreamTimeout(e)) => {
            match e.kind {
                StreamTimeoutKind::TTFT => {
                    eprintln!("Model took too long to start responding");
                }
                StreamTimeoutKind::Idle => {
                    eprintln!("Stream went silent");
                }
                StreamTimeoutKind::Total => {
                    eprintln!("Response exceeded time limit");
                }
            }
            break;
        }
        Err(e) => return Err(e.into()),
    }
}

Best Practices

1. Always Check Error Types

Don’t just check for error != nil. Inspect the error type to handle different failures appropriately:

if (error instanceof APIError) {
  if (error.isRateLimit()) {
    // Retry with backoff
  } else if (error.isValidation()) {
    // Fix request and retry
  } else if (error.isUnauthorized()) {
    // Re-authenticate
  }
}

2. Log Request IDs

Always log the request_id from error responses. This helps support debug issues:

console.error(`Request ${error.requestId} failed: ${error.message}`);

3. Set Appropriate Timeouts

Configure timeouts based on your use case:

Chat applications: Lower TTFT timeout, moderate total timeout
Batch processing: Higher timeouts, more retries
Real-time systems: Aggressive timeouts, fast failover

4. Implement Circuit Breakers

For high-throughput systems, use circuit breakers to prevent cascade failures:

class CircuitBreaker {
  private failures = 0;
  private lastFailure = 0;
  private readonly threshold = 5;
  private readonly resetMs = 30000;

  async call<T>(fn: () => Promise<T>): Promise<T> {
    if (this.isOpen()) {
      throw new Error("Circuit breaker open");
    }

    try {
      const result = await fn();
      this.failures = 0;
      return result;
    } catch (error) {
      this.failures++;
      this.lastFailure = Date.now();
      throw error;
    }
  }

  private isOpen(): boolean {
    if (this.failures < this.threshold) return false;
    if (Date.now() - this.lastFailure > this.resetMs) {
      this.failures = 0;
      return false;
    }
    return true;
  }
}

5. Handle Quota Exhaustion

When customers exceed their quota, provide clear feedback:

if (error instanceof APIError && error.code === ErrorCodes.PAYMENT_REQUIRED) {
  // Show upgrade prompt or usage dashboard
  showUpgradeModal(customer);
}

Next Steps

Streaming - Handle streaming responses
Webhooks - React to events asynchronously
Customer Billing - Manage quotas and subscriptions