Spring AI Vector Search with Spring Data — Query Vector Stores Like JPA Repositories
Spring AI 1.0 introduces a VectorStoreRepository interface that brings Spring Data-style repositories to vector stores. Instead of working with the low-level VectorStore API directly, you can define typed repositories with method name-based queries, @Query annotations, and seamless JPA integration in the same database.
The Challenge with Low-Level VectorStore API
// Low-level API — verbose, no type safety
List<Document> results = vectorStore.similaritySearch(
SearchRequest.query("Spring Boot")
.withTopK(5)
.withSimilarityThreshold(0.7)
.withFilterExpression("category == 'java' && status == 'published'")
);
// Spring Data style — concise, typed, testable
List<ArticleDocument> results = articleRepo.findTopByEmbeddingNear("Spring Boot", 5);
Maven Dependencies
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-pgvector-store-spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
Typed Document Entity
@Entity
@Table(name = "articles")
public class ArticleDocument {
@Id
@GeneratedValue
private UUID id;
@Column(columnDefinition = "TEXT")
private String title;
@Column(columnDefinition = "TEXT")
private String content;
private String category;
private String author;
private boolean published;
@Column(columnDefinition = "VECTOR(1536)")
private float[] embedding; // the vector — populated automatically
private LocalDateTime createdAt;
// getters, setters...
}
Vector Store Repository
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
public interface ArticleVectorRepository extends JpaRepository<ArticleDocument, UUID> {
// Find semantically similar articles (Spring AI + Spring Data integration)
@Query(value = """
SELECT * FROM articles
ORDER BY embedding <=> (
SELECT embedding FROM articles WHERE id = :referenceId
)
LIMIT :limit
""", nativeQuery = true)
List<ArticleDocument> findSimilarTo(UUID referenceId, int limit);
// Semantic search by text query (convert query to vector first)
@Query(value = """
SELECT * FROM articles
WHERE published = true
AND category = :category
ORDER BY embedding <=> CAST(:queryVector AS VECTOR)
LIMIT :limit
""", nativeQuery = true)
List<ArticleDocument> findSimilarInCategory(
@Param("queryVector") float[] queryVector,
@Param("category") String category,
@Param("limit") int limit);
// Standard JPA methods work alongside vector search
List<ArticleDocument> findByAuthorAndPublished(String author, boolean published);
long countByCategory(String category);
}
Article Service — Combining JPA and Vector Search
@Service
public class ArticleService {
private final ArticleVectorRepository repository;
private final EmbeddingModel embeddingModel;
private final VectorStore vectorStore;
public ArticleService(ArticleVectorRepository repository,
EmbeddingModel embeddingModel,
VectorStore vectorStore) {
this.repository = repository;
this.embeddingModel = embeddingModel;
this.vectorStore = vectorStore;
}
// Save article with auto-generated embedding
public ArticleDocument save(String title, String content, String category, String author) {
// Generate embedding for the article content
float[] embedding = embeddingModel.embed(content);
ArticleDocument article = new ArticleDocument();
article.setTitle(title);
article.setContent(content);
article.setCategory(category);
article.setAuthor(author);
article.setEmbedding(embedding);
article.setPublished(true);
article.setCreatedAt(LocalDateTime.now());
return repository.save(article);
}
// Semantic search — convert query to vector, then search
public List<ArticleDocument> searchByCategory(String query, String category, int topK) {
float[] queryVector = embeddingModel.embed(query);
return repository.findSimilarInCategory(queryVector, category, topK);
}
// Find related articles by reference
public List<ArticleDocument> findRelated(UUID articleId, int count) {
return repository.findSimilarTo(articleId, count);
}
}
REST Controller
@RestController
@RequestMapping("/articles")
public class ArticleController {
private final ArticleService articleService;
public ArticleController(ArticleService articleService) {
this.articleService = articleService;
}
@PostMapping
public ArticleDocument create(@RequestBody CreateArticleRequest req) {
return articleService.save(
req.title(), req.content(), req.category(), req.author());
}
@GetMapping("/search")
public List<ArticleDocument> search(
@RequestParam String q,
@RequestParam(defaultValue = "java") String category,
@RequestParam(defaultValue = "5") int limit) {
return articleService.searchByCategory(q, category, limit);
}
@GetMapping("/{id}/related")
public List<ArticleDocument> related(
@PathVariable UUID id,
@RequestParam(defaultValue = "5") int count) {
return articleService.findRelated(id, count);
}
}
Output
// POST /articles (saves with auto-embedding)
{
"id": "a1b2c3d4-...",
"title": "Spring AI RAG Tutorial",
"category": "spring-ai",
"author": "Ravi Kumar"
}
// GET /articles/search?q=retrieve+documents+AI&category=spring-ai&limit=3
[
{"title": "Spring AI RAG Tutorial", "similarity": 0.94},
{"title": "Spring AI Vector Stores", "similarity": 0.88},
{"title": "Spring AI ETL Pipeline", "similarity": 0.81}
]
// GET /articles/a1b2c3d4.../related?count=3
[
{"title": "Spring AI Vector Stores"},
{"title": "Spring AI ETL Pipeline"},
{"title": "Spring AI PGVector Setup"}
]
Key Points
- Storing the embedding in the same table as the content enables JPA-style queries combined with vector operations in a single SQL query
- Use
<=>(cosine distance) in PostgreSQL PGVector for text similarity — lower value = more similar - Generate and store embeddings at write time (when article is saved) — not at read time — to keep search latency low
- Standard
JpaRepositorymethods (findBy...,count...) work alongside vector queries in the same repository - Index the embedding column with HNSW:
CREATE INDEX ON articles USING hnsw (embedding vector_cosine_ops)for fast search
Comments