SYS://VISION.ACTIVE
VIEWPORT.01
LAT 28.0222° N
SIGNAL.NOMINAL
VISION Loading
Back to Blog

AI Cost Optimization: Managing API Expenses in Production

Shane Barron

Shane Barron

Laravel Developer & AI Integration Specialist

AI Costs Add Up Fast

AI API calls are priced per token. Without careful management, costs can spiral quickly. A chatbot handling thousands of conversations or a content system generating millions of descriptions needs cost optimization built in.

Model Selection Strategy

class ModelSelector
{
    public function select(string $taskType): string
    {
        return match ($taskType) {
            'simple_classification' => 'gpt-4o-mini',
            'code_generation' => 'gpt-4o',
            'content_creation' => 'gpt-4o-mini',
            'complex_reasoning' => 'gpt-4o',
            'embedding' => 'text-embedding-3-small',
            default => 'gpt-4o-mini',
        };
    }
}

Caching Aggressively

class CachedAIService
{
    public function generate(string $prompt, array $options = []): string
    {
        $cacheKey = $this->buildCacheKey($prompt, $options);

        return Cache::remember($cacheKey, $this->getTTL($options), function () use ($prompt, $options) {
            return $this->ai->generate($prompt, $options);
        });
    }

    private function buildCacheKey(string $prompt, array $options): string
    {
        return 'ai_' . md5(serialize([
            'prompt' => $prompt,
            'model' => $options['model'] ?? 'default',
            'temperature' => $options['temperature'] ?? 0.7,
        ]));
    }
}

Token Budgeting

class TokenBudget
{
    public function check(string $userId, int $estimatedTokens): bool
    {
        $used = Cache::get("tokens_used_{$userId}_" . now()->format('Y-m'), 0);
        $limit = $this->getUserLimit($userId);

        return ($used + $estimatedTokens) <= $limit;
    }

    public function record(string $userId, int $tokensUsed): void
    {
        $key = "tokens_used_{$userId}_" . now()->format('Y-m');
        Cache::increment($key, $tokensUsed);
    }
}

Prompt Optimization

class PromptOptimizer
{
    public function optimize(string $prompt): string
    {
        // Remove redundant whitespace
        $prompt = preg_replace('/\s+/', ' ', $prompt);

        // Use shorter instruction variants
        $prompt = str_replace(
            'Please provide a detailed response that includes',
            'Include:',
            $prompt
        );

        return trim($prompt);
    }

    public function estimateTokens(string $text): int
    {
        // Rough estimate: 1 token ≈ 4 characters for English
        return (int) ceil(strlen($text) / 4);
    }
}

Batching Requests

class BatchProcessor
{
    public function processBatch(array $prompts): array
    {
        // Combine into single request where possible
        $combined = implode("\n---\n", array_map(
            fn ($p, $i) => "Item {$i}:\n{$p}",
            $prompts,
            array_keys($prompts)
        ));

        $response = $this->ai->generate("Process each item:\n{$combined}");

        return $this->parseMultipleResponses($response);
    }
}

Usage Monitoring

class UsageMonitor
{
    public function record(AIRequest $request, AIResponse $response): void
    {
        AIUsage::create([
            'model' => $request->model,
            'prompt_tokens' => $response->promptTokens,
            'completion_tokens' => $response->completionTokens,
            'cost' => $this->calculateCost($request->model, $response),
            'endpoint' => $request->endpoint,
        ]);
    }

    public function getDailyReport(): array
    {
        return AIUsage::whereDate('created_at', today())
            ->selectRaw('model, SUM(cost) as total_cost, COUNT(*) as requests')
            ->groupBy('model')
            ->get()
            ->toArray();
    }
}

Conclusion

AI cost optimization requires a multi-pronged approach: model selection, caching, token budgets, prompt optimization, and monitoring. Build these practices into your AI architecture from the start.

Share this article
Shane Barron

Shane Barron

Strategic Technology Architect with 40 years of experience building production systems. Specializing in Laravel, AI integration, and enterprise architecture.

Need Help With Your Project?

I respond to all inquiries within 24 hours. Let's discuss how I can help build your production-ready system.

Get In Touch