Java SpringAI

Spring AI with Spring Cloud — Distribute AI Features Across Microservices

Spring AI with Spring Cloud — Distribute AI Features Across Microservices

In a microservices architecture, AI capabilities should be shared services rather than repeated per service. This tutorial shows how to build a centralized AI microservice discoverable via Spring Cloud Eureka, load-balanced with Spring Cloud LoadBalancer, and called through Spring Cloud OpenFeign — so every service in your ecosystem can use AI without embedding provider configuration everywhere.

Architecture — Centralized AI Service

Eureka Server (service registry)
         │
    ┌────┴────┐
    │         │
[AI Service]  [Other Microservices]
(spring-ai)   (product-service, order-service, notification-service)
    │               │
    │   ←── Feign ──┘
    │
OpenAI / Claude / Gemini

AI Service — Standalone Spring Boot App

<!-- ai-service/pom.xml -->
<dependency>
    <groupId>org.springframework.ai</groupId>
    <artifactId>spring-ai-openai-spring-boot-starter</artifactId>
</dependency>
<dependency>
    <groupId>org.springframework.cloud</groupId>
    <artifactId>spring-cloud-starter-netflix-eureka-client</artifactId>
</dependency>
# ai-service/application.properties
spring.application.name=ai-service
spring.ai.openai.api-key=${OPENAI_API_KEY}
eureka.client.service-url.defaultZone=http://localhost:8761/eureka/
server.port=8085
@SpringBootApplication
@EnableDiscoveryClient
public class AiServiceApplication {
    public static void main(String[] args) {
        SpringApplication.run(AiServiceApplication.class, args);
    }
}

@RestController
@RequestMapping("/ai")
public class AiServiceController {

    private final ChatClient chatClient;

    public AiServiceController(ChatClient.Builder builder) {
        this.chatClient = builder.build();
    }

    @PostMapping("/chat")
    public AiResponse chat(@RequestBody AiRequest req) {
        String response = chatClient.prompt()
                .system(req.systemPrompt() != null ? req.systemPrompt() :
                        "You are a helpful assistant.")
                .user(req.userMessage())
                .call()
                .content();
        return new AiResponse(response, "gpt-4o-mini");
    }

    @PostMapping("/classify")
    public ClassificationResponse classify(@RequestBody ClassificationRequest req) {
        String category = chatClient.prompt()
                .user("Classify: '%s'. Options: %s. Output only the category."
                        .formatted(req.text(), String.join(", ", req.categories())))
                .call()
                .content()
                .trim();
        return new ClassificationResponse(category, req.text());
    }

    @PostMapping("/summarize")
    public SummaryResponse summarize(@RequestBody SummaryRequest req) {
        String summary = chatClient.prompt()
                .system("Summarize in " + req.maxSentences() + " sentences.")
                .user(req.text())
                .call()
                .content();
        return new SummaryResponse(summary);
    }
}

record AiRequest(String systemPrompt, String userMessage) {}
record AiResponse(String response, String model) {}
record ClassificationRequest(String text, List<String> categories) {}
record ClassificationResponse(String category, String input) {}
record SummaryRequest(String text, int maxSentences) {}
record SummaryResponse(String summary) {}

Feign Client — Consume AI Service from Other Microservices

<!-- other-service/pom.xml -->
<dependency>
    <groupId>org.springframework.cloud</groupId>
    <artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
    <groupId>org.springframework.cloud</groupId>
    <artifactId>spring-cloud-starter-loadbalancer</artifactId>
</dependency>
@FeignClient(name = "ai-service")  // matches spring.application.name
public interface AiServiceClient {

    @PostMapping("/ai/chat")
    AiResponse chat(@RequestBody AiRequest request);

    @PostMapping("/ai/classify")
    ClassificationResponse classify(@RequestBody ClassificationRequest request);

    @PostMapping("/ai/summarize")
    SummaryResponse summarize(@RequestBody SummaryRequest request);
}

Using AI in the Product Service

@SpringBootApplication
@EnableFeignClients
@EnableDiscoveryClient
public class ProductServiceApplication {
    public static void main(String[] args) {
        SpringApplication.run(ProductServiceApplication.class, args);
    }
}

@Service
public class ProductService {

    private final ProductRepository productRepo;
    private final AiServiceClient   aiClient;    // Feign client — calls AI service

    public ProductService(ProductRepository productRepo, AiServiceClient aiClient) {
        this.productRepo = productRepo;
        this.aiClient    = aiClient;
    }

    // Classify customer review sentiment
    public ReviewSentiment analyzeSentiment(String reviewText) {
        ClassificationResponse response = aiClient.classify(
                new ClassificationRequest(reviewText,
                        List.of("positive", "negative", "neutral"))
        );
        return ReviewSentiment.valueOf(response.category().toUpperCase());
    }

    // Generate product description from raw attributes
    public String generateDescription(String productName, List<String> features) {
        String prompt = "Generate a compelling product description for '%s' with features: %s"
                .formatted(productName, String.join(", ", features));

        AiResponse response = aiClient.chat(
                new AiRequest("You are a product copywriter.", prompt)
        );
        return response.response();
    }
}

Circuit Breaker on AI Feign Client

@FeignClient(name = "ai-service", fallback = AiServiceFallback.class)
public interface AiServiceClient {
    @PostMapping("/ai/chat")     AiResponse chat(@RequestBody AiRequest r);
    @PostMapping("/ai/classify") ClassificationResponse classify(@RequestBody ClassificationRequest r);
}

@Component
public class AiServiceFallback implements AiServiceClient {

    @Override
    public AiResponse chat(AiRequest request) {
        return new AiResponse("AI service is temporarily unavailable.", "fallback");
    }

    @Override
    public ClassificationResponse classify(ClassificationRequest request) {
        return new ClassificationResponse("unknown", request.text());
    }
}
# Enable Resilience4j circuit breaker for Feign
spring.cloud.openfeign.circuitbreaker.enabled=true
resilience4j.circuitbreaker.instances.ai-service.failure-rate-threshold=50
resilience4j.circuitbreaker.instances.ai-service.wait-duration-in-open-state=30s

Output

// Eureka dashboard shows:
ai-service      UP (1)   http://localhost:8085
product-service UP (2)   http://localhost:8081, http://localhost:8082
order-service   UP (1)   http://localhost:8083

// product-service calling ai-service via Feign (load balanced):
POST http://ai-service/ai/classify  (Eureka resolves to 8085)
→ {"category": "positive", "input": "Great product, very fast!"}

// When ai-service is down, fallback activates:
→ {"response": "AI service is temporarily unavailable.", "model": "fallback"}

Key Points

  • Centralizing AI configuration in one service means API key rotation, model upgrades, and rate limit adjustments happen in one place
  • Spring Cloud LoadBalancer automatically distributes Feign calls across multiple AI service instances when you scale horizontally
  • Always provide a Feign fallback — AI service downtime should degrade gracefully, not cascade failures to every dependent service
  • Add request/response logging to the Feign client at the AI service boundary to track which services are consuming AI and their token usage
  • Use Spring Cloud Config to externalize AI provider settings (API keys, models, timeouts) across environments without redeployment
Topics: Java SpringAI
← Newer Post Older Post →