Back to Blog
AI Cost Optimization: Managing API Expenses in Production
AI Costs Add Up Fast
AI API calls are priced per token. Without careful management, costs can spiral quickly. A chatbot handling thousands of conversations or a content system generating millions of descriptions needs cost optimization built in.
Model Selection Strategy
class ModelSelector
{
public function select(string $taskType): string
{
return match ($taskType) {
'simple_classification' => 'gpt-4o-mini',
'code_generation' => 'gpt-4o',
'content_creation' => 'gpt-4o-mini',
'complex_reasoning' => 'gpt-4o',
'embedding' => 'text-embedding-3-small',
default => 'gpt-4o-mini',
};
}
}
Caching Aggressively
class CachedAIService
{
public function generate(string $prompt, array $options = []): string
{
$cacheKey = $this->buildCacheKey($prompt, $options);
return Cache::remember($cacheKey, $this->getTTL($options), function () use ($prompt, $options) {
return $this->ai->generate($prompt, $options);
});
}
private function buildCacheKey(string $prompt, array $options): string
{
return 'ai_' . md5(serialize([
'prompt' => $prompt,
'model' => $options['model'] ?? 'default',
'temperature' => $options['temperature'] ?? 0.7,
]));
}
}
Token Budgeting
class TokenBudget
{
public function check(string $userId, int $estimatedTokens): bool
{
$used = Cache::get("tokens_used_{$userId}_" . now()->format('Y-m'), 0);
$limit = $this->getUserLimit($userId);
return ($used + $estimatedTokens) <= $limit;
}
public function record(string $userId, int $tokensUsed): void
{
$key = "tokens_used_{$userId}_" . now()->format('Y-m');
Cache::increment($key, $tokensUsed);
}
}
Prompt Optimization
class PromptOptimizer
{
public function optimize(string $prompt): string
{
// Remove redundant whitespace
$prompt = preg_replace('/\s+/', ' ', $prompt);
// Use shorter instruction variants
$prompt = str_replace(
'Please provide a detailed response that includes',
'Include:',
$prompt
);
return trim($prompt);
}
public function estimateTokens(string $text): int
{
// Rough estimate: 1 token ≈ 4 characters for English
return (int) ceil(strlen($text) / 4);
}
}
Batching Requests
class BatchProcessor
{
public function processBatch(array $prompts): array
{
// Combine into single request where possible
$combined = implode("\n---\n", array_map(
fn ($p, $i) => "Item {$i}:\n{$p}",
$prompts,
array_keys($prompts)
));
$response = $this->ai->generate("Process each item:\n{$combined}");
return $this->parseMultipleResponses($response);
}
}
Usage Monitoring
class UsageMonitor
{
public function record(AIRequest $request, AIResponse $response): void
{
AIUsage::create([
'model' => $request->model,
'prompt_tokens' => $response->promptTokens,
'completion_tokens' => $response->completionTokens,
'cost' => $this->calculateCost($request->model, $response),
'endpoint' => $request->endpoint,
]);
}
public function getDailyReport(): array
{
return AIUsage::whereDate('created_at', today())
->selectRaw('model, SUM(cost) as total_cost, COUNT(*) as requests')
->groupBy('model')
->get()
->toArray();
}
}
Conclusion
AI cost optimization requires a multi-pronged approach: model selection, caching, token budgets, prompt optimization, and monitoring. Build these practices into your AI architecture from the start.
Related Articles
Need Help With Your Project?
I respond to all inquiries within 24 hours. Let's discuss how I can help build your production-ready system.
Get In Touch