Guides

OpenAI Agents SDK

Define convert_receipt as a function tool in the OpenAI Agents SDK and let your agent parse receipts autonomously — from watching an email inbox to writing expense reports.

Prerequisites

pip install openai requests

You need an OpenAI API key and a ReceiptConverter API key. Get the ReceiptConverter key from your dashboard.

Define the tool

The OpenAI Agents SDK uses function tools defined with JSON Schema. Here's the complete tool definition for ReceiptConverter:

import json
import requests

RECEIPT_API_KEY = "sk_live_your_key_here"
RECEIPT_API_URL = "https://receiptconverter.com/api/v1/convert"

# Tool definition — passed to the agent
convert_receipt_tool = {
    "type": "function",
    "function": {
        "name": "convert_receipt",
        "description": (
            "Parse a receipt or invoice image/PDF into structured JSON. "
            "Returns vendor name, date, total, subtotal, tip, currency, "
            "payment method, expense category, line items, and taxes. "
            "Accepts a public URL or a local file path."
        ),
        "parameters": {
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "Public URL of the receipt image or PDF",
                },
                "file_path": {
                    "type": "string",
                    "description": "Absolute path to a local receipt file",
                },
            },
        },
    },
}

# Tool implementation — called when the agent invokes it
def run_convert_receipt(url: str = None, file_path: str = None) -> dict:
    headers = {"Authorization": f"Bearer {RECEIPT_API_KEY}"}

    if url:
        resp = requests.post(
            RECEIPT_API_URL,
            headers={**headers, "Content-Type": "application/json"},
            json={"url": url},
            timeout=60,
        )
    elif file_path:
        with open(file_path, "rb") as f:
            resp = requests.post(
                RECEIPT_API_URL,
                headers=headers,
                files={"file": f},
                timeout=60,
            )
    else:
        return {"error": "Provide either url or file_path"}

    if resp.status_code == 429:
        return {"error": "Rate limit reached. Upgrade at https://receiptconverter.com/pricing"}
    if not resp.ok:
        return {"error": f"API error {resp.status_code}: {resp.json().get('error', resp.reason)}"}

    return resp.json()["data"]

Basic agent loop

Connect the tool to the OpenAI chat completions API with tool calling enabled:

from openai import OpenAI

client = OpenAI()

def run_agent(user_message: str) -> str:
    messages = [{"role": "user", "content": user_message}]

    while True:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=[convert_receipt_tool],
            tool_choice="auto",
        )

        choice = response.choices[0]

        # Done — return final answer
        if choice.finish_reason == "stop":
            return choice.message.content

        # Agent wants to call a tool
        if choice.finish_reason == "tool_calls":
            messages.append(choice.message)

            for tool_call in choice.message.tool_calls:
                args = json.loads(tool_call.function.arguments)
                result = run_convert_receipt(**args)

                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": json.dumps(result),
                })

# Example usage
answer = run_agent(
    "Parse this receipt and tell me the vendor, total, and all line items: "
    "https://example.com/receipt.jpg"
)
print(answer)

Full example: expense categorization agent

A practical agent that parses a batch of receipts, categorizes them, and produces a structured expense report:

import json
from openai import OpenAI
from pathlib import Path

client = OpenAI()

SYSTEM_PROMPT = """You are an expense management assistant.
When given receipt URLs or file paths, use the convert_receipt tool to parse each one.
After parsing all receipts, produce a JSON expense report with:
- Total spend by category
- A list of all receipts with: vendor, date, amount, category
- Any receipts that failed to parse (with reason)
Return only valid JSON."""

def parse_expense_batch(receipt_sources: list[str]) -> dict:
    """Parse a list of receipt URLs/paths and return a categorized expense report."""
    
    sources_text = "\n".join(f"- {s}" for s in receipt_sources)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Parse these receipts and create an expense report:\n{sources_text}"},
    ]

    while True:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=[convert_receipt_tool],
            tool_choice="auto",
        )

        choice = response.choices[0]

        if choice.finish_reason == "stop":
            try:
                return json.loads(choice.message.content)
            except json.JSONDecodeError:
                return {"raw": choice.message.content}

        if choice.finish_reason == "tool_calls":
            messages.append(choice.message)
            for tool_call in choice.message.tool_calls:
                args = json.loads(tool_call.function.arguments)
                result = run_convert_receipt(**args)
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": json.dumps(result),
                })

# Run it
receipts = [
    "https://example.com/starbucks.jpg",
    "https://example.com/uber.jpg",
    "https://example.com/hotel.pdf",
    "/Users/me/Downloads/dinner.jpg",
]

report = parse_expense_batch(receipts)
print(json.dumps(report, indent=2))

Error handling & retries

import time

def run_convert_receipt_with_retry(
    url: str = None,
    file_path: str = None,
    max_retries: int = 3,
) -> dict:
    for attempt in range(max_retries):
        result = run_convert_receipt(url=url, file_path=file_path)

        if "error" not in result:
            return result

        error = result["error"]

        # Rate limit — wait and retry
        if "Rate limit" in error:
            wait = 2 ** attempt  # exponential backoff: 1s, 2s, 4s
            time.sleep(wait)
            continue

        # Scanned PDF — can't retry, return error
        if "scanned_pdf" in error:
            return {"error": "Scanned PDF — convert to JPG first", "retryable": False}

        # Other errors — don't retry
        return result

    return {"error": "Max retries exceeded"}

Related: LangChain guide · Python guide · API reference · MCP server