Tutorial: RAG Pipeline Demo (E05)¶
This tutorial corresponds to the example file examples/E05_rag_pipeline_demo.py
.
It demonstrates how to set up and use a complete, local Retrieval Augmented Generation (RAG) pipeline. It shows how to:
- Configure a local RAG pipeline using FeatureSettings
(Sentence Transformers for embeddings and FAISS for an in-memory vector store).
- Index documents from a local directory using genie.rag.index_directory()
.
- Perform a similarity search on the indexed documents using genie.rag.search()
.
Example Code¶
examples/E05_rag_pipeline_demo.py¶
""" Example: RAG Pipeline Demo using Genie Facade (Updated)
This example demonstrates setting up and using a RAG pipeline to index local text files and perform similarity searches using the Genie facade and FeatureSettings for simplified configuration.
To Run:
1. Ensure Genie Tooling is installed (poetry install --all-extras
).
You'll need dependencies for local RAG, e.g., sentence-transformers, faiss-cpu.
2. The script will create dummy data files in examples/data/ if they don't exist.
3. Run from the root of the project:
poetry run python examples/E05_rag_pipeline_demo.py
The demo will: - Initialize the Genie facade using FeatureSettings for RAG components. - Index documents from the 'examples/data/' directory. - Perform a search query against the indexed documents. - Print the search results. """ import asyncio import logging from pathlib import Path from typing import Optional
from genie_tooling.config.features import FeatureSettings from genie_tooling.config.models import MiddlewareConfig from genie_tooling.genie import Genie
async def main(): print("--- RAG Pipeline Demo using Genie Facade (FeatureSettings) ---")
current_file_dir = Path(__file__).parent
data_dir = current_file_dir / "data"
data_dir.mkdir(exist_ok=True) # Ensure data directory exists
# Create dummy files if they don't exist for the demo
doc1_path = data_dir / "doc1.txt"
doc2_path = data_dir / "doc2.txt"
doc3_path = data_dir / "doc3.txt"
if not doc1_path.exists(): doc1_path.write_text("The quick brown fox jumps over the lazy dog.\nLarge language models are transforming AI.")
if not doc2_path.exists(): doc2_path.write_text("Genie Tooling provides a hyper-pluggable middleware.\nRetrieval Augmented Generation enhances LLM responses.")
if not doc3_path.exists(): doc3_path.write_text("Python is a versatile programming language.\nAsync programming is key for I/O bound tasks.")
# 1. Configure Middleware using FeatureSettings for RAG
app_config = MiddlewareConfig(
features=FeatureSettings(
llm="none",
command_processor="none",
rag_embedder="sentence_transformer",
rag_vector_store="faiss",
)
)
genie: Optional[Genie] = None
try:
print("\nInitializing Genie facade...")
genie = await Genie.create(config=app_config)
print("Genie facade initialized.")
collection_name_for_demo = "my_local_rag_collection_e05"
print(f"\nIndexing documents from '{data_dir}' into collection '{collection_name_for_demo}'...")
index_result = await genie.rag.index_directory(
str(data_dir),
collection_name=collection_name_for_demo
)
print(f"Indexing result: {index_result}")
if index_result.get("status") != "success":
print(f"ERROR: Indexing failed: {index_result.get('message')}")
return
query = "What is Genie Tooling?"
print(f"\nPerforming search for query: '{query}' in collection '{collection_name_for_demo}'")
search_results = await genie.rag.search(
query,
collection_name=collection_name_for_demo,
top_k=2
)
if not search_results:
print("No search results found.")
else:
print("\nSearch Results:")
for i, result_chunk in enumerate(search_results):
print(f" --- Result {i+1} (Score: {result_chunk.score:.4f}, ID: {result_chunk.id}) ---")
print(f" Content: {result_chunk.content[:200]}...")
print(f" Metadata: {result_chunk.metadata}")
print(" ------------------------------------")
except Exception as e:
print(f"\nAn unexpected error occurred: {e}")
logging.exception("Error details:")
finally:
if genie:
print("\n--- Tearing down Genie facade ---")
await genie.close()
print("Genie facade teardown complete.")
# Clean up dummy files (optional, but good for repeated test runs)
# for p in [doc1_path, doc2_path, doc3_path]:
# p.unlink(missing_ok=True)
# if data_dir.exists() and not any(data_dir.iterdir()): # Only remove if empty
# data_dir.rmdir()
# For simplicity, we'll leave the data dir and files for now.
# To fully clean up FAISS index files if they were persisted to default location:
# default_faiss_path = Path("./.genie_data/faiss")
# if default_faiss_path.exists():
# shutil.rmtree(default_faiss_path, ignore_errors=True)
# print(f"Cleaned up FAISS data at {default_faiss_path}")
if name == "main": logging.basicConfig(level=logging.INFO) # logging.getLogger("genie_tooling").setLevel(logging.DEBUG) asyncio.run(main())