Spring AI with Spring Cloud — Distribute AI Features Across Microservices
In a microservices architecture, AI capabilities should be shared services rather than repeated per service. This tutorial shows how to build a centralized AI microservice discoverable via Spring Cloud Eureka, load-balanced with Spring Cloud LoadBalancer, and called through Spring Cloud OpenFeign — so every service in your ecosystem can use AI without embedding provider configuration everywhere.
Architecture — Centralized AI Service
Eureka Server (service registry)
│
┌────┴────┐
│ │
[AI Service] [Other Microservices]
(spring-ai) (product-service, order-service, notification-service)
│ │
│ ←── Feign ──┘
│
OpenAI / Claude / Gemini
AI Service — Standalone Spring Boot App
<!-- ai-service/pom.xml -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-netflix-eureka-client</artifactId>
</dependency>
# ai-service/application.properties
spring.application.name=ai-service
spring.ai.openai.api-key=${OPENAI_API_KEY}
eureka.client.service-url.defaultZone=http://localhost:8761/eureka/
server.port=8085
@SpringBootApplication
@EnableDiscoveryClient
public class AiServiceApplication {
public static void main(String[] args) {
SpringApplication.run(AiServiceApplication.class, args);
}
}
@RestController
@RequestMapping("/ai")
public class AiServiceController {
private final ChatClient chatClient;
public AiServiceController(ChatClient.Builder builder) {
this.chatClient = builder.build();
}
@PostMapping("/chat")
public AiResponse chat(@RequestBody AiRequest req) {
String response = chatClient.prompt()
.system(req.systemPrompt() != null ? req.systemPrompt() :
"You are a helpful assistant.")
.user(req.userMessage())
.call()
.content();
return new AiResponse(response, "gpt-4o-mini");
}
@PostMapping("/classify")
public ClassificationResponse classify(@RequestBody ClassificationRequest req) {
String category = chatClient.prompt()
.user("Classify: '%s'. Options: %s. Output only the category."
.formatted(req.text(), String.join(", ", req.categories())))
.call()
.content()
.trim();
return new ClassificationResponse(category, req.text());
}
@PostMapping("/summarize")
public SummaryResponse summarize(@RequestBody SummaryRequest req) {
String summary = chatClient.prompt()
.system("Summarize in " + req.maxSentences() + " sentences.")
.user(req.text())
.call()
.content();
return new SummaryResponse(summary);
}
}
record AiRequest(String systemPrompt, String userMessage) {}
record AiResponse(String response, String model) {}
record ClassificationRequest(String text, List<String> categories) {}
record ClassificationResponse(String category, String input) {}
record SummaryRequest(String text, int maxSentences) {}
record SummaryResponse(String summary) {}
Feign Client — Consume AI Service from Other Microservices
<!-- other-service/pom.xml -->
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-loadbalancer</artifactId>
</dependency>
@FeignClient(name = "ai-service") // matches spring.application.name
public interface AiServiceClient {
@PostMapping("/ai/chat")
AiResponse chat(@RequestBody AiRequest request);
@PostMapping("/ai/classify")
ClassificationResponse classify(@RequestBody ClassificationRequest request);
@PostMapping("/ai/summarize")
SummaryResponse summarize(@RequestBody SummaryRequest request);
}
Using AI in the Product Service
@SpringBootApplication
@EnableFeignClients
@EnableDiscoveryClient
public class ProductServiceApplication {
public static void main(String[] args) {
SpringApplication.run(ProductServiceApplication.class, args);
}
}
@Service
public class ProductService {
private final ProductRepository productRepo;
private final AiServiceClient aiClient; // Feign client — calls AI service
public ProductService(ProductRepository productRepo, AiServiceClient aiClient) {
this.productRepo = productRepo;
this.aiClient = aiClient;
}
// Classify customer review sentiment
public ReviewSentiment analyzeSentiment(String reviewText) {
ClassificationResponse response = aiClient.classify(
new ClassificationRequest(reviewText,
List.of("positive", "negative", "neutral"))
);
return ReviewSentiment.valueOf(response.category().toUpperCase());
}
// Generate product description from raw attributes
public String generateDescription(String productName, List<String> features) {
String prompt = "Generate a compelling product description for '%s' with features: %s"
.formatted(productName, String.join(", ", features));
AiResponse response = aiClient.chat(
new AiRequest("You are a product copywriter.", prompt)
);
return response.response();
}
}
Circuit Breaker on AI Feign Client
@FeignClient(name = "ai-service", fallback = AiServiceFallback.class)
public interface AiServiceClient {
@PostMapping("/ai/chat") AiResponse chat(@RequestBody AiRequest r);
@PostMapping("/ai/classify") ClassificationResponse classify(@RequestBody ClassificationRequest r);
}
@Component
public class AiServiceFallback implements AiServiceClient {
@Override
public AiResponse chat(AiRequest request) {
return new AiResponse("AI service is temporarily unavailable.", "fallback");
}
@Override
public ClassificationResponse classify(ClassificationRequest request) {
return new ClassificationResponse("unknown", request.text());
}
}
# Enable Resilience4j circuit breaker for Feign
spring.cloud.openfeign.circuitbreaker.enabled=true
resilience4j.circuitbreaker.instances.ai-service.failure-rate-threshold=50
resilience4j.circuitbreaker.instances.ai-service.wait-duration-in-open-state=30s
Output
// Eureka dashboard shows:
ai-service UP (1) http://localhost:8085
product-service UP (2) http://localhost:8081, http://localhost:8082
order-service UP (1) http://localhost:8083
// product-service calling ai-service via Feign (load balanced):
POST http://ai-service/ai/classify (Eureka resolves to 8085)
→ {"category": "positive", "input": "Great product, very fast!"}
// When ai-service is down, fallback activates:
→ {"response": "AI service is temporarily unavailable.", "model": "fallback"}
Key Points
- Centralizing AI configuration in one service means API key rotation, model upgrades, and rate limit adjustments happen in one place
- Spring Cloud LoadBalancer automatically distributes Feign calls across multiple AI service instances when you scale horizontally
- Always provide a Feign fallback — AI service downtime should degrade gracefully, not cascade failures to every dependent service
- Add request/response logging to the Feign client at the AI service boundary to track which services are consuming AI and their token usage
- Use Spring Cloud Config to externalize AI provider settings (API keys, models, timeouts) across environments without redeployment
Comments