Error Handling

ModelRelay returns structured errors with consistent formats across all endpoints. This guide covers error response formats, error codes, retry strategies, and best practices for production applications.

Error Response Format

All API errors return a JSON object with this structure:

{
  "error": "NOT_FOUND",
  "code": "NOT_FOUND",
  "message": "Customer not found",
  "request_id": "req_abc123"
}
Field Description
error Error type (matches code)
code Machine-readable error code for programmatic handling
message Human-readable error description
request_id Unique request identifier for debugging
fields Array of field-level errors (validation errors only)

Validation Errors

Validation errors include a fields array with specific field failures:

{
  "error": "VALIDATION_ERROR",
  "code": "VALIDATION_ERROR",
  "message": "Invalid request",
  "fields": [
    { "field": "email", "message": "Invalid email format" },
    { "field": "tier_id", "message": "Tier not found" }
  ]
}

HTTP Status Codes

Status Description Retry?
400 Bad Request - Invalid input or validation error No
401 Unauthorized - Invalid or missing credentials No
402 Payment Required - Customer quota exceeded No
403 Forbidden - Insufficient permissions No
404 Not Found - Resource doesn’t exist No
405 Method Not Allowed - HTTP method not supported No
409 Conflict - Resource already exists No
429 Too Many Requests - Rate limited Yes (with backoff)
500 Internal Server Error - Server-side failure Yes (with backoff)
502 Bad Gateway - Upstream provider error Yes (with backoff)
503 Service Unavailable - Temporary overload Yes (with backoff)
504 Gateway Timeout - Upstream timeout Yes (with backoff)

Error Codes

General Errors

Code HTTP Status Description
NOT_FOUND 404 Resource not found
VALIDATION_ERROR 400 Request validation failed
INVALID_INPUT 400 Invalid request parameters
UNAUTHORIZED 401 Invalid or missing authentication
FORBIDDEN 403 Insufficient permissions for this operation
CONFLICT 409 Resource conflict (e.g., duplicate key)
RATE_LIMIT 429 Rate limit exceeded
PAYMENT_REQUIRED 402 Customer usage quota exceeded
METHOD_NOT_ALLOWED 405 HTTP method not supported
INTERNAL_ERROR 500 Internal server error
SERVICE_UNAVAILABLE 503 Service temporarily unavailable

Identity/Provisioning Errors

Code HTTP Status Description
IDENTITY_REQUIRED 400 Identity provider and subject required
EMAIL_REQUIRED 400 Email required for auto-provisioning
AUTO_PROVISION_DISABLED 403 Auto-provisioning disabled for project
AUTO_PROVISION_MISCONFIGURED 500 Auto-provision tier not configured correctly

Model Errors

Code HTTP Status Description
MODEL_CAPABILITY_UNSUPPORTED 400 Model doesn’t support requested capability

SDK Error Types

The SDKs provide typed error classes for different failure modes:

import {
  ModelRelay,
  APIError,
  TransportError,
  ConfigError,
  StreamTimeoutError,
  ErrorCodes,
} from "@modelrelay/sdk";

try {
  const response = await mr.responses.text(
    "claude-sonnet-4-20250514",
    "You are helpful.",
    "Hello!"
  );
} catch (error) {
  if (error instanceof APIError) {
    // Server returned an error response
    console.log("Status:", error.status);
    console.log("Code:", error.code);
    console.log("Message:", error.message);
    console.log("Request ID:", error.requestId);

    // Check specific error types
    if (error.isRateLimit()) {
      console.log("Rate limited, retry later");
    } else if (error.isUnauthorized()) {
      console.log("Invalid API key");
    } else if (error.isValidation()) {
      console.log("Validation errors:", error.fields);
    } else if (error.isNotFound()) {
      console.log("Resource not found");
    }
  } else if (error instanceof TransportError) {
    // Network or connection error
    console.log("Transport error:", error.message);
    console.log("Kind:", error.kind); // "timeout" | "request" | "response"
  } else if (error instanceof StreamTimeoutError) {
    // Streaming timeout
    console.log("Stream timeout:", error.streamKind); // "ttft" | "idle" | "total"
    console.log("Timeout:", error.timeoutMs, "ms");
  } else if (error instanceof ConfigError) {
    // SDK configuration error
    console.log("Config error:", error.message);
  }
}
import (
    "errors"
    "log"

    sdk "github.com/modelrelay/sdk-go"
)

response, err := client.Responses.Text(ctx, model, system, user)
if err != nil {
    var apiErr sdk.APIError
    if errors.As(err, &apiErr) {
        // Server returned an error response
        log.Printf("Status: %d", apiErr.Status)
        log.Printf("Code: %s", apiErr.Code)
        log.Printf("Message: %s", apiErr.Message)
        log.Printf("Request ID: %s", apiErr.RequestID)

        // Check specific error types
        switch apiErr.Code {
        case sdk.ErrCodeRateLimit:
            log.Println("Rate limited, retry later")
        case sdk.ErrCodeUnauthorized:
            log.Println("Invalid API key")
        case sdk.ErrCodeValidation, sdk.ErrCodeInvalidInput:
            log.Println("Validation errors:", apiErr.Fields)
        case sdk.ErrCodeNotFound:
            log.Println("Resource not found")
        case sdk.ErrCodePaymentRequired:
            log.Println("Customer quota exceeded")
        }
        return
    }

    var transportErr sdk.TransportError
    if errors.As(err, &transportErr) {
        log.Printf("Transport error: %s", transportErr.Message)
        return
    }

    var timeoutErr sdk.StreamTimeoutError
    if errors.As(err, &timeoutErr) {
        log.Printf("Stream %s timeout after %s", timeoutErr.Kind, timeoutErr.Timeout)
        return
    }

    var configErr sdk.ConfigError
    if errors.As(err, &configErr) {
        log.Printf("Config error: %s", configErr.Reason)
        return
    }

    log.Fatal(err)
}
use modelrelay::{Client, ResponseBuilder, Error};

let result = ResponseBuilder::new()
    .model("claude-sonnet-4-20250514")
    .system("You are helpful.")
    .user("Hello!")
    .send(&client.responses())
    .await;

match result {
    Ok(response) => println!("{}", response.text()),
    Err(Error::Api(e)) => {
        // Server returned an error response
        eprintln!("Status: {}", e.status);
        eprintln!("Code: {}", e.code);
        eprintln!("Message: {}", e.message);
        if let Some(req_id) = &e.request_id {
            eprintln!("Request ID: {}", req_id);
        }

        // Check specific error types
        match e.code.as_str() {
            "RATE_LIMIT" => eprintln!("Rate limited, retry later"),
            "UNAUTHORIZED" => eprintln!("Invalid API key"),
            "VALIDATION_ERROR" | "INVALID_INPUT" => {
                eprintln!("Validation errors: {:?}", e.fields);
            }
            "NOT_FOUND" => eprintln!("Resource not found"),
            "PAYMENT_REQUIRED" => eprintln!("Customer quota exceeded"),
            _ => {}
        }
    }
    Err(Error::Transport(e)) => {
        // Network or connection error
        eprintln!("Transport error: {}", e.message);
    }
    Err(Error::StreamTimeout(e)) => {
        // Streaming timeout
        eprintln!("Stream {:?} timeout after {:?}", e.kind, e.timeout);
    }
    Err(Error::Config(e)) => {
        // SDK configuration error
        eprintln!("Config error: {}", e.reason);
    }
    Err(e) => return Err(e.into()),
}

Rate Limiting

When you exceed rate limits, the API returns a 429 status with a Retry-After header indicating how many seconds to wait:

HTTP/1.1 429 Too Many Requests
Retry-After: 30
Content-Type: application/json

{
  "error": "RATE_LIMIT",
  "code": "RATE_LIMIT",
  "message": "Rate limit exceeded. Retry after 30 seconds."
}

Handling Rate Limits

async function callWithRetry<T>(
  fn: () => Promise<T>,
  maxRetries = 3
): Promise<T> {
  let lastError: Error | undefined;

  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    try {
      return await fn();
    } catch (error) {
      lastError = error as Error;

      if (error instanceof APIError && error.isRateLimit()) {
        // Exponential backoff: 1s, 2s, 4s, 8s...
        const delay = Math.pow(2, attempt) * 1000;
        console.log(`Rate limited, retrying in ${delay}ms...`);
        await new Promise((resolve) => setTimeout(resolve, delay));
        continue;
      }

      // Don't retry other errors
      throw error;
    }
  }

  throw lastError;
}

// Usage
const response = await callWithRetry(() =>
  mr.responses.text(model, system, user)
);
func callWithRetry[T any](ctx context.Context, fn func() (T, error), maxRetries int) (T, error) {
    var zero T
    var lastErr error

    for attempt := 0; attempt <= maxRetries; attempt++ {
        result, err := fn()
        if err == nil {
            return result, nil
        }
        lastErr = err

        var apiErr sdk.APIError
        if errors.As(err, &apiErr) && apiErr.IsRateLimit() {
            // Exponential backoff: 1s, 2s, 4s, 8s...
            delay := time.Duration(1<<attempt) * time.Second
            log.Printf("Rate limited, retrying in %s...", delay)

            select {
            case <-ctx.Done():
                return zero, ctx.Err()
            case <-time.After(delay):
                continue
            }
        }

        // Don't retry other errors
        return zero, err
    }

    return zero, lastErr
}

// Usage
response, err := callWithRetry(ctx, func() (*sdk.Response, error) {
    return client.Responses.Text(ctx, model, system, user)
}, 3)
use std::time::Duration;
use tokio::time::sleep;
use modelrelay::{Client, ResponseBuilder, Error};

async fn call_with_retry<T, F, Fut>(
    mut f: F,
    max_retries: u32,
) -> Result<T, Error>
where
    F: FnMut() -> Fut,
    Fut: std::future::Future<Output = Result<T, Error>>,
{
    let mut last_error = None;

    for attempt in 0..=max_retries {
        match f().await {
            Ok(result) => return Ok(result),
            Err(Error::Api(ref e)) if e.code == "RATE_LIMIT" => {
                // Exponential backoff: 1s, 2s, 4s, 8s...
                let delay = Duration::from_secs(1 << attempt);
                eprintln!("Rate limited, retrying in {:?}...", delay);
                sleep(delay).await;
                last_error = Some(Error::Api(e.clone()));
            }
            Err(e) => return Err(e),
        }
    }

    Err(last_error.unwrap())
}

// Usage
let response = call_with_retry(
    || async {
        ResponseBuilder::new()
            .model(model)
            .system(system)
            .user(user)
            .send(&client.responses())
            .await
    },
    3,
).await?;

Retry Strategies

Retryable vs Non-Retryable Errors

Retry these errors (with exponential backoff):

  • 429 Rate Limit
  • 500 Internal Server Error
  • 502 Bad Gateway
  • 503 Service Unavailable
  • 504 Gateway Timeout
  • Network timeouts
  • Connection errors

Don’t retry these errors:

  • 400 Bad Request (fix your request)
  • 401 Unauthorized (fix credentials)
  • 402 Payment Required (customer quota)
  • 403 Forbidden (permissions issue)
  • 404 Not Found (resource doesn’t exist)
  • 409 Conflict (resolve conflict first)

Exponential Backoff

Use exponential backoff with jitter to avoid thundering herd problems:

function calculateBackoff(attempt: number, baseMs = 1000, maxMs = 30000): number {
  // Exponential: 1s, 2s, 4s, 8s, 16s...
  const exponential = Math.min(baseMs * Math.pow(2, attempt), maxMs);

  // Add jitter: ±25%
  const jitter = exponential * 0.25 * (Math.random() * 2 - 1);

  return Math.round(exponential + jitter);
}

SDK Built-in Retry

The SDKs support automatic retry with configurable policies:

const mr = ModelRelay.fromSecretKey(apiKey, {
  retry: {
    maxRetries: 3,
    initialDelayMs: 1000,
    maxDelayMs: 30000,
    backoffMultiplier: 2,
  },
});
client, err := sdk.NewClientFromSecretKey(apiKey,
    sdk.WithRetryConfig(sdk.RetryConfig{
        MaxAttempts:        3,
        InitialDelay:       time.Second,
        MaxDelay:           30 * time.Second,
        BackoffMultiplier:  2.0,
    }),
)
use std::time::Duration;
use modelrelay::{Client, RetryConfig};

let client = Client::from_secret_key(api_key)?
    .retry_config(RetryConfig {
        max_attempts: 3,
        initial_delay: Duration::from_secs(1),
        max_delay: Duration::from_secs(30),
        backoff_multiplier: 2.0,
    })
    .build()?;

Timeout Handling

Request Timeouts

Configure request timeouts to prevent hung connections:

const mr = ModelRelay.fromSecretKey(apiKey, {
  connectTimeoutMs: 5000,  // 5s to establish connection
  requestTimeoutMs: 60000, // 60s for request completion
});
client, err := sdk.NewClientFromSecretKey(apiKey,
    sdk.WithConnectTimeout(5*time.Second),
    sdk.WithRequestTimeout(60*time.Second),
)
use std::time::Duration;
use modelrelay::Client;

let client = Client::from_secret_key(api_key)?
    .connect_timeout(Duration::from_secs(5))   // 5s to establish connection
    .request_timeout(Duration::from_secs(60))  // 60s for request completion
    .build()?;

Stream Timeouts

For streaming responses, configure granular timeouts:

Timeout Description Recommended
TTFT Time to first token 30-60s
Idle Max gap between tokens 10-30s
Total Total stream duration 2-5min
const stream = await mr.responses.stream(req, {
  streamTimeouts: {
    ttftMs: 30000,   // 30s to first token
    idleMs: 15000,   // 15s max between tokens
    totalMs: 120000, // 2min total
  },
});

try {
  for await (const event of stream) {
    // Process events
  }
} catch (error) {
  if (error instanceof StreamTimeoutError) {
    switch (error.streamKind) {
      case "ttft":
        console.log("Model took too long to start responding");
        break;
      case "idle":
        console.log("Stream went silent");
        break;
      case "total":
        console.log("Response exceeded time limit");
        break;
    }
  }
}
stream, err := client.Responses.Stream(ctx, req,
    sdk.WithStreamTTFTTimeout(30*time.Second),
    sdk.WithStreamIdleTimeout(15*time.Second),
    sdk.WithStreamTotalTimeout(2*time.Minute),
)
if err != nil {
    log.Fatal(err)
}
defer stream.Close()

for {
    event, ok, err := stream.Next()
    if err != nil {
        var timeoutErr sdk.StreamTimeoutError
        if errors.As(err, &timeoutErr) {
            switch timeoutErr.Kind {
            case sdk.StreamTimeoutTTFT:
                log.Println("Model took too long to start responding")
            case sdk.StreamTimeoutIdle:
                log.Println("Stream went silent")
            case sdk.StreamTimeoutTotal:
                log.Println("Response exceeded time limit")
            }
        }
        break
    }
    if !ok {
        break
    }
    // Process event
}
use std::time::Duration;
use futures_util::StreamExt;
use modelrelay::{ResponseBuilder, Error, StreamTimeoutKind};

let mut stream = ResponseBuilder::new()
    .model("claude-sonnet-4-20250514")
    .system("You are helpful.")
    .user("Hello!")
    .stream_ttft_timeout(Duration::from_secs(30))   // 30s to first token
    .stream_idle_timeout(Duration::from_secs(15))   // 15s max between tokens
    .stream_total_timeout(Duration::from_secs(120)) // 2min total
    .stream(&client.responses())
    .await?;

while let Some(event) = stream.next().await {
    match event {
        Ok(e) => {
            // Process event
        }
        Err(Error::StreamTimeout(e)) => {
            match e.kind {
                StreamTimeoutKind::TTFT => {
                    eprintln!("Model took too long to start responding");
                }
                StreamTimeoutKind::Idle => {
                    eprintln!("Stream went silent");
                }
                StreamTimeoutKind::Total => {
                    eprintln!("Response exceeded time limit");
                }
            }
            break;
        }
        Err(e) => return Err(e.into()),
    }
}

Best Practices

1. Always Check Error Types

Don’t just check for error != nil. Inspect the error type to handle different failures appropriately:

if (error instanceof APIError) {
  if (error.isRateLimit()) {
    // Retry with backoff
  } else if (error.isValidation()) {
    // Fix request and retry
  } else if (error.isUnauthorized()) {
    // Re-authenticate
  }
}

2. Log Request IDs

Always log the request_id from error responses. This helps support debug issues:

console.error(`Request ${error.requestId} failed: ${error.message}`);

3. Set Appropriate Timeouts

Configure timeouts based on your use case:

  • Chat applications: Lower TTFT timeout, moderate total timeout
  • Batch processing: Higher timeouts, more retries
  • Real-time systems: Aggressive timeouts, fast failover

4. Implement Circuit Breakers

For high-throughput systems, use circuit breakers to prevent cascade failures:

class CircuitBreaker {
  private failures = 0;
  private lastFailure = 0;
  private readonly threshold = 5;
  private readonly resetMs = 30000;

  async call<T>(fn: () => Promise<T>): Promise<T> {
    if (this.isOpen()) {
      throw new Error("Circuit breaker open");
    }

    try {
      const result = await fn();
      this.failures = 0;
      return result;
    } catch (error) {
      this.failures++;
      this.lastFailure = Date.now();
      throw error;
    }
  }

  private isOpen(): boolean {
    if (this.failures < this.threshold) return false;
    if (Date.now() - this.lastFailure > this.resetMs) {
      this.failures = 0;
      return false;
    }
    return true;
  }
}

5. Handle Quota Exhaustion

When customers exceed their quota, provide clear feedback:

if (error instanceof APIError && error.code === ErrorCodes.PAYMENT_REQUIRED) {
  // Show upgrade prompt or usage dashboard
  showUpgradeModal(customer);
}

Next Steps