Guides
OpenAI Agents SDK
Define convert_receipt as a function tool in the OpenAI Agents SDK and let your agent parse receipts autonomously — from watching an email inbox to writing expense reports.
Prerequisites
pip install openai requestsYou need an OpenAI API key and a ReceiptConverter API key. Get the ReceiptConverter key from your dashboard.
Define the tool
The OpenAI Agents SDK uses function tools defined with JSON Schema. Here's the complete tool definition for ReceiptConverter:
import json
import requests
RECEIPT_API_KEY = "sk_live_your_key_here"
RECEIPT_API_URL = "https://receiptconverter.com/api/v1/convert"
# Tool definition — passed to the agent
convert_receipt_tool = {
"type": "function",
"function": {
"name": "convert_receipt",
"description": (
"Parse a receipt or invoice image/PDF into structured JSON. "
"Returns vendor name, date, total, subtotal, tip, currency, "
"payment method, expense category, line items, and taxes. "
"Accepts a public URL or a local file path."
),
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Public URL of the receipt image or PDF",
},
"file_path": {
"type": "string",
"description": "Absolute path to a local receipt file",
},
},
},
},
}
# Tool implementation — called when the agent invokes it
def run_convert_receipt(url: str = None, file_path: str = None) -> dict:
headers = {"Authorization": f"Bearer {RECEIPT_API_KEY}"}
if url:
resp = requests.post(
RECEIPT_API_URL,
headers={**headers, "Content-Type": "application/json"},
json={"url": url},
timeout=60,
)
elif file_path:
with open(file_path, "rb") as f:
resp = requests.post(
RECEIPT_API_URL,
headers=headers,
files={"file": f},
timeout=60,
)
else:
return {"error": "Provide either url or file_path"}
if resp.status_code == 429:
return {"error": "Rate limit reached. Upgrade at https://receiptconverter.com/pricing"}
if not resp.ok:
return {"error": f"API error {resp.status_code}: {resp.json().get('error', resp.reason)}"}
return resp.json()["data"]Basic agent loop
Connect the tool to the OpenAI chat completions API with tool calling enabled:
from openai import OpenAI
client = OpenAI()
def run_agent(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
while True:
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=[convert_receipt_tool],
tool_choice="auto",
)
choice = response.choices[0]
# Done — return final answer
if choice.finish_reason == "stop":
return choice.message.content
# Agent wants to call a tool
if choice.finish_reason == "tool_calls":
messages.append(choice.message)
for tool_call in choice.message.tool_calls:
args = json.loads(tool_call.function.arguments)
result = run_convert_receipt(**args)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result),
})
# Example usage
answer = run_agent(
"Parse this receipt and tell me the vendor, total, and all line items: "
"https://example.com/receipt.jpg"
)
print(answer)Full example: expense categorization agent
A practical agent that parses a batch of receipts, categorizes them, and produces a structured expense report:
import json
from openai import OpenAI
from pathlib import Path
client = OpenAI()
SYSTEM_PROMPT = """You are an expense management assistant.
When given receipt URLs or file paths, use the convert_receipt tool to parse each one.
After parsing all receipts, produce a JSON expense report with:
- Total spend by category
- A list of all receipts with: vendor, date, amount, category
- Any receipts that failed to parse (with reason)
Return only valid JSON."""
def parse_expense_batch(receipt_sources: list[str]) -> dict:
"""Parse a list of receipt URLs/paths and return a categorized expense report."""
sources_text = "\n".join(f"- {s}" for s in receipt_sources)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Parse these receipts and create an expense report:\n{sources_text}"},
]
while True:
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=[convert_receipt_tool],
tool_choice="auto",
)
choice = response.choices[0]
if choice.finish_reason == "stop":
try:
return json.loads(choice.message.content)
except json.JSONDecodeError:
return {"raw": choice.message.content}
if choice.finish_reason == "tool_calls":
messages.append(choice.message)
for tool_call in choice.message.tool_calls:
args = json.loads(tool_call.function.arguments)
result = run_convert_receipt(**args)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result),
})
# Run it
receipts = [
"https://example.com/starbucks.jpg",
"https://example.com/uber.jpg",
"https://example.com/hotel.pdf",
"/Users/me/Downloads/dinner.jpg",
]
report = parse_expense_batch(receipts)
print(json.dumps(report, indent=2))Error handling & retries
import time
def run_convert_receipt_with_retry(
url: str = None,
file_path: str = None,
max_retries: int = 3,
) -> dict:
for attempt in range(max_retries):
result = run_convert_receipt(url=url, file_path=file_path)
if "error" not in result:
return result
error = result["error"]
# Rate limit — wait and retry
if "Rate limit" in error:
wait = 2 ** attempt # exponential backoff: 1s, 2s, 4s
time.sleep(wait)
continue
# Scanned PDF — can't retry, return error
if "scanned_pdf" in error:
return {"error": "Scanned PDF — convert to JPG first", "retryable": False}
# Other errors — don't retry
return result
return {"error": "Max retries exceeded"}Related: LangChain guide · Python guide · API reference · MCP server