Java9R: Spring AI Token Usage and Cost Tracking — Monitor and Control AI API Spend

Spring AI Token Usage and Cost Tracking — Monitor and Control AI API Spend

AI API costs are driven entirely by token usage. Without tracking, a single misbehaving feature can generate unexpected bills. This tutorial builds a complete token tracking system that records usage per user, per feature, and per model — and provides a dashboard endpoint and budget alerts.

Understanding Token Costs

OpenAI GPT-4o (June 2025 pricing):
  Input  : $2.50 per 1M tokens
  Output : $10.00 per 1M tokens

GPT-4o-mini:
  Input  : $0.15 per 1M tokens
  Output : $0.60 per 1M tokens

Claude Sonnet 4.5:
  Input  : $3.00 per 1M tokens
  Output : $15.00 per 1M tokens

Example calculation:
  1 question = 100 input + 300 output tokens = 400 tokens
  GPT-4o-mini: (100 × 0.15 + 300 × 0.60) / 1,000,000 = $0.000195 per call
  1,000 calls/day = $0.195/day = ~$5.85/month

Token Usage Entity

@Entity
@Table(name = "ai_token_usage")
public class AiTokenUsage {

    @Id @GeneratedValue
    private Long id;

    private String  userId;
    private String  feature;      // "chat", "rag", "classify"
    private String  model;
    private long    inputTokens;
    private long    outputTokens;
    private double  costUsd;
    private LocalDateTime timestamp;

    // getters, setters...
}

Cost Calculation Service

@Component
public class TokenCostCalculator {

    // Cost per 1M tokens (in USD)
    private static final Map<String, double[]> PRICING = Map.of(
        "gpt-4o",          new double[]{2.50, 10.00},
        "gpt-4o-mini",     new double[]{0.15,  0.60},
        "claude-opus-4-5", new double[]{15.00, 75.00},
        "claude-sonnet-4-5", new double[]{3.00, 15.00},
        "claude-haiku-4-5",  new double[]{0.25,  1.25}
    );

    public double calculateCost(String model, long inputTokens, long outputTokens) {
        double[] prices = PRICING.getOrDefault(model, new double[]{0.0, 0.0});
        return (inputTokens * prices[0] + outputTokens * prices[1]) / 1_000_000.0;
    }
}

Token Tracking Advisor

import org.springframework.ai.chat.client.advisor.api.*;

@Component
public class TokenTrackingAdvisor implements CallAroundAdvisor {

    private final AiTokenUsageRepository repository;
    private final TokenCostCalculator    calculator;

    public TokenTrackingAdvisor(AiTokenUsageRepository repository,
                                 TokenCostCalculator calculator) {
        this.repository = repository;
        this.calculator = calculator;
    }

    @Override
    public AdvisedResponse aroundCall(AdvisedRequest request, CallAroundAdvisorChain chain) {
        AdvisedResponse response = chain.nextAroundCall(request);

        ChatResponse chatResponse = response.response();
        if (chatResponse != null && chatResponse.getMetadata().getUsage() != null) {
            Usage usage = chatResponse.getMetadata().getUsage();

            String model   = (String) request.adviseContext().getOrDefault("model", "unknown");
            String userId  = (String) request.adviseContext().getOrDefault("userId", "anonymous");
            String feature = (String) request.adviseContext().getOrDefault("feature", "general");

            AiTokenUsage record = new AiTokenUsage();
            record.setUserId(userId);
            record.setFeature(feature);
            record.setModel(model);
            record.setInputTokens(usage.getPromptTokens());
            record.setOutputTokens(usage.getGenerationTokens());
            record.setCostUsd(calculator.calculateCost(model,
                    usage.getPromptTokens(), usage.getGenerationTokens()));
            record.setTimestamp(LocalDateTime.now());

            repository.save(record);
        }

        return response;
    }

    @Override
    public int getOrder() { return Ordered.LOWEST_PRECEDENCE; }

    @Override
    public String getName() { return "TokenTrackingAdvisor"; }
}

Usage-Aware AI Service

@Service
public class TrackedAiService {

    private final ChatClient chatClient;

    public TrackedAiService(ChatClient.Builder builder,
                             TokenTrackingAdvisor trackingAdvisor) {
        this.chatClient = builder
                .defaultAdvisors(trackingAdvisor)
                .build();
    }

    public String ask(String userId, String feature, String question) {
        return chatClient.prompt()
                .user(question)
                .advisors(a -> a
                        .param("userId",  userId)
                        .param("feature", feature)
                        .param("model",   "gpt-4o-mini"))
                .call()
                .content();
    }
}

Usage Dashboard Controller

@RestController
@RequestMapping("/admin/ai-usage")
@PreAuthorize("hasRole('ADMIN')")
public class AiUsageDashboardController {

    private final AiTokenUsageRepository repository;
    private final TokenCostCalculator    calculator;

    @GetMapping("/summary")
    public Map<String, Object> getSummary() {
        LocalDateTime since = LocalDateTime.now().minusDays(30);
        List<AiTokenUsage> usages = repository.findByTimestampAfter(since);

        long totalInput    = usages.stream().mapToLong(AiTokenUsage::getInputTokens).sum();
        long totalOutput   = usages.stream().mapToLong(AiTokenUsage::getOutputTokens).sum();
        double totalCost   = usages.stream().mapToDouble(AiTokenUsage::getCostUsd).sum();

        Map<String, Double> costByUser = usages.stream()
                .collect(Collectors.groupingBy(AiTokenUsage::getUserId,
                        Collectors.summingDouble(AiTokenUsage::getCostUsd)));

        Map<String, Double> costByFeature = usages.stream()
                .collect(Collectors.groupingBy(AiTokenUsage::getFeature,
                        Collectors.summingDouble(AiTokenUsage::getCostUsd)));

        return Map.of(
                "period",          "last 30 days",
                "totalCalls",      usages.size(),
                "totalInputTokens",  totalInput,
                "totalOutputTokens", totalOutput,
                "totalCostUsd",    String.format("$%.4f", totalCost),
                "costByUser",      costByUser,
                "costByFeature",   costByFeature
        );
    }
}

Output

GET /admin/ai-usage/summary

{
  "period": "last 30 days",
  "totalCalls": 2847,
  "totalInputTokens": 892430,
  "totalOutputTokens": 1247890,
  "totalCostUsd": "$1.2847",
  "costByUser": {
    "user-001": 0.342,
    "user-042": 0.178,
    "user-099": 0.091
  },
  "costByFeature": {
    "chat":     0.612,
    "rag":      0.491,
    "classify": 0.181
  }
}

Budget Alert Service

@Service
@Scheduled(cron = "0 0 * * * *")  // run every hour
public class BudgetAlertService {

    private final AiTokenUsageRepository repository;

    private static final double DAILY_BUDGET_USD = 10.0;

    public void checkBudget() {
        LocalDateTime since = LocalDateTime.now().truncatedTo(ChronoUnit.DAYS);
        double todayCost = repository.sumCostSince(since);

        if (todayCost > DAILY_BUDGET_USD * 0.8) {
            System.out.printf("WARNING: Daily AI spend at $%.4f (%.0f%% of $%.2f budget)%n",
                    todayCost, (todayCost / DAILY_BUDGET_USD) * 100, DAILY_BUDGET_USD);
            // Send Slack alert, email, or PagerDuty here
        }

        if (todayCost > DAILY_BUDGET_USD) {
            System.out.printf("ALERT: Daily AI budget EXCEEDED! $%.4f spent vs $%.2f limit%n",
                    todayCost, DAILY_BUDGET_USD);
            // Optionally disable AI features via feature flag
        }
    }
}

Key Points

Record token usage per call — chatResponse.getMetadata().getUsage() gives exact input and output token counts
Use a custom CallAroundAdvisor to intercept every ChatClient call and record usage without modifying service code
Track by feature (chat, RAG, batch) to identify which parts of your app drive the most cost
Set up daily budget alerts at 80% threshold — waiting until 100% is too late if usage spikes
Prefer gpt-4o-mini or claude-haiku-4-5 for classification and extraction — they're 10-50x cheaper than flagship models with comparable accuracy on structured tasks

Spring AI Token Usage and Cost Tracking — Monitor and Control AI API Spend

Spring AI Token Usage and Cost Tracking — Monitor and Control AI API Spend

Understanding Token Costs

Token Usage Entity

Cost Calculation Service

Token Tracking Advisor

Usage-Aware AI Service

Usage Dashboard Controller

Output

Budget Alert Service

Key Points

Comments