Tutorial: Llama.cpp Server E2E (E23)¶
This tutorial corresponds to the example file examples/E23_local_e2e_llama_cpp_server.py.
It provides a comprehensive end-to-end test using the Llama.cpp server provider. It covers most of Genie's features in a single, integrated script, including: - LLM chat and generation with Pydantic parsing (GBNF). - RAG indexing and search. - Custom tool registration and execution. - Command processing with HITL. - Prompt management and conversation state. - Guardrails and token usage tracking. - A simple ReAct agent loop.
Example Code¶
examples/E23_local_e2e_llama_cpp_server.py¶
""" End-to-End Test for Genie Tooling with Llama.cpp Server Provider
This example demonstrates a comprehensive flow using the Genie facade, targeting a Llama.cpp server (Ollama-compatible API) for LLM operations. It covers LLM chat/generate with Pydantic parsing, RAG, custom tool execution, command processing with HITL, prompt management, conversation state, guardrails, and a simple ReActAgent.
It also demonstrates the use of the @traceable decorator for adding
custom application logic to the observability trace.
Prerequisites:
1. genie-tooling installed (poetry install --all-extras).
2. A Llama.cpp server running and accessible, typically at http://localhost:8080.
The server should be configured with a GBNF-compatible model (e.g., Mistral).
You can set the model alias used by the server via the LLAMA_CPP_MODEL_ALIAS
environment variable (defaults to "mistral:latest" if not set).
Example server command:
./server -m mistral-7b-instruct-v0.2.Q4_K_M.gguf -c 4096 --host 0.0.0.0 --port 8080 --api-key mysecretkey --model-alias mistral:latest --cont-batching --embedding --gbnf-enabled
(Adjust model path and other server parameters as needed. If using an API key,
set LLAMA_CPP_API_KEY="mysecretkey" in your environment for this script.)
"""
import asyncio
import json
import logging
import os
import shutil
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
from genie_tooling import tool from genie_tooling.agents.react_agent import ReActAgent from genie_tooling.config.features import FeatureSettings from genie_tooling.config.models import MiddlewareConfig from genie_tooling.genie import Genie from genie_tooling.observability import traceable # Import the new decorator from pydantic import BaseModel from pydantic import Field as PydanticField
--- Configuration ---¶
LLAMA_CPP_BASE_URL = os.getenv("LLAMA_CPP_BASE_URL", "http://localhost:8080") LLAMA_CPP_MODEL_ALIAS = os.getenv("LLAMA_CPP_MODEL_ALIAS", "mistral:latest") # Model alias server uses LLAMA_CPP_API_KEY_NAME = "LLAMA_CPP_API_KEY" # Env var name for the API key if server needs one
--- Pydantic Model for LLM Output Parsing ---¶
class ExtractedDetails(BaseModel): item_name: str = PydanticField(description="The name of the item.") quantity: int = PydanticField(gt=0, description="The quantity of the item.") notes: Optional[str] = PydanticField(None, description="Optional notes about the item.")
--- Helper function decorated with @traceable ---¶
@traceable async def _get_size_from_path(file_path: Path, context: Dict[str, Any]) -> int: """A helper function to demonstrate custom tracing.""" # This function's execution will appear as a nested span in the trace. # The 'file_path' argument will be automatically added as a span attribute. logging.info(f"[_get_size_from_path] Getting size for: {file_path}") return file_path.stat().st_size
--- Custom Tool Definition ---¶
@tool async def get_file_metadata(file_path: str, context: Dict[str, Any]) -> Dict[str, Any]: """ Retrieves metadata for a specified file within the agent's sandbox. Args: file_path (str): The relative path to the file within the agent's sandbox. context (Dict[str, Any]): The invocation context, used for tracing. Returns: Dict[str, Any]: A dictionary containing file metadata (name, size, exists) or an error. """ sandbox_base = Path("./e23_agent_sandbox") try: prospective_path = (sandbox_base / file_path).resolve() if not str(prospective_path).startswith(str(sandbox_base.resolve())): return {"error": "Path traversal attempt detected.", "path_resolved": str(prospective_path)} full_path = prospective_path except Exception as e: return {"error": f"Path resolution error: {e!s}"}
if full_path.exists() and full_path.is_file():
# Call the traceable helper function, passing the context through
file_size = await _get_size_from_path(file_path=full_path, context=context)
return {"file_name": full_path.name, "size_bytes": file_size, "exists": True}
else:
return {"error": "File not found or is not a file.", "path_checked": str(full_path), "exists": False}
async def run_local_e2e_llama_cpp_server(): logging.basicConfig(level=logging.INFO) # logging.getLogger("genie_tooling").setLevel(logging.DEBUG) # logging.getLogger("genie_tooling.llm_providers.impl.llama_cpp_provider").setLevel(logging.DEBUG)
print(f"--- Genie Tooling Local E2E Test (Llama.cpp Server @ {LLAMA_CPP_BASE_URL} with model {LLAMA_CPP_MODEL_ALIAS}) ---")
if os.getenv(LLAMA_CPP_API_KEY_NAME):
print(f"Using API Key from environment variable: {LLAMA_CPP_API_KEY_NAME}")
else:
print(f"No API Key ({LLAMA_CPP_API_KEY_NAME}) found in environment. Assuming server does not require one.")
sandbox_dir = Path("./e23_agent_sandbox")
rag_data_dir = sandbox_dir / "rag_docs"
prompt_dir = Path("./e23_prompts")
for p in [sandbox_dir, rag_data_dir, prompt_dir]:
if p.exists(): shutil.rmtree(p)
p.mkdir(parents=True, exist_ok=True)
(rag_data_dir / "doc1.txt").write_text("Llama.cpp is a C/C++ port of Llama for fast inference.")
(rag_data_dir / "doc2.txt").write_text("Genie Tooling uses llama.cpp via its Ollama-compatible API for structured output.")
(prompt_dir / "greeting_template.j2").write_text(
'[{"role": "system", "content": "You are {{ bot_name }}."},\n'
' {"role": "user", "content": "Hello! My name is {{ user_name }}."}]'
)
(sandbox_dir / "testfile.txt").write_text("This is a test file for metadata.")
(prompt_dir / "react_agent_system_prompt_v1.j2").write_text(
"You are ReActBot. Your goal is: {{ goal }}.\n"
"Available tools:\n{{ tool_definitions }}\n"
"Scratchpad (Thought/Action/Observation cycles):\n{{ scratchpad }}\nThought:"
)
app_config = MiddlewareConfig(
features=FeatureSettings(
llm="llama_cpp", # Target Llama.cpp server
llm_llama_cpp_base_url=LLAMA_CPP_BASE_URL,
llm_llama_cpp_model_name=LLAMA_CPP_MODEL_ALIAS,
llm_llama_cpp_api_key_name=LLAMA_CPP_API_KEY_NAME if os.getenv(LLAMA_CPP_API_KEY_NAME) else None,
command_processor="llm_assisted",
command_processor_formatter_id_alias="compact_text_formatter",
tool_lookup="embedding",
tool_lookup_embedder_id_alias="st_embedder",
tool_lookup_formatter_id_alias="compact_text_formatter",
rag_loader="file_system",
rag_embedder="sentence_transformer",
rag_vector_store="faiss",
cache="in-memory",
observability_tracer="console_tracer",
hitl_approver="cli_hitl_approver",
token_usage_recorder="in_memory_token_recorder",
input_guardrails=["keyword_blocklist_guardrail"],
prompt_registry="file_system_prompt_registry",
prompt_template_engine="jinja2_chat_formatter",
conversation_state_provider="in_memory_convo_provider",
default_llm_output_parser="pydantic_output_parser"
),
# The @tool decorated `get_file_metadata` is auto-enabled by default.
# Class-based tools like calculator and sandboxed_fs still need to be enabled.
tool_configurations={
"calculator_tool": {},
"sandboxed_fs_tool_v1": {"sandbox_base_path": str(sandbox_dir.resolve())},
},
guardrail_configurations={
"keyword_blocklist_guardrail_v1": {
"blocklist": ["super_secret_project_X", "highly_classified_info"],
"action_on_match": "block"
}
},
prompt_registry_configurations={
"file_system_prompt_registry_v1": {
"base_path": str(prompt_dir.resolve()),
"template_suffix": ".j2"
}
},
observability_tracer_configurations={
"console_tracer_plugin_v1": {"log_level": "INFO"}
}
)
genie: Optional[Genie] = None
try:
print("\n[1] Initializing Genie facade...")
genie = await Genie.create(config=app_config)
await genie.register_tool_functions([get_file_metadata])
print("Genie facade initialized and custom tool registered!")
print("\n[2] Testing LLM Chat and Generate (with Pydantic parsing)...")
try:
chat_resp = await genie.llm.chat([{"role": "user", "content": "Hello Llama.cpp server! Write a short, friendly greeting."}])
print(f" LLM Chat Response: {chat_resp['message']['content'][:100]}...")
gen_prompt = "Extract details: Item is 'SuperWidget', quantity is 55, notes: 'Handle with care'."
gen_resp = await genie.llm.generate(
prompt=f"You must output ONLY a valid JSON object. {gen_prompt}",
output_schema=ExtractedDetails, # For GBNF
temperature=0.1,
n_predict=256 # Llama.cpp server needs n_predict for GBNF with /v1/completions
)
print(f" LLM Generate (raw text for Pydantic): {gen_resp['text']}")
parsed_details = await genie.llm.parse_output(gen_resp, schema=ExtractedDetails)
if isinstance(parsed_details, ExtractedDetails):
print(f" Parsed Pydantic: Name='{parsed_details.item_name}', Qty='{parsed_details.quantity}', Notes='{parsed_details.notes}'")
assert parsed_details.item_name == "SuperWidget"
assert parsed_details.quantity == 55
else:
assert False, f"Pydantic parsing failed, got type: {type(parsed_details)}"
except Exception as e_llm:
print(f" LLM Error: {e_llm} (Is Llama.cpp server running with model '{LLAMA_CPP_MODEL_ALIAS}' at {LLAMA_CPP_BASE_URL} and GBNF enabled?)")
raise
print("\n[3] Testing RAG (indexing and search)...")
try:
rag_collection_name = "e23_llama_docs"
index_result = await genie.rag.index_directory(str(rag_data_dir.resolve()), collection_name=rag_collection_name)
print(f" Indexed documents from '{rag_data_dir.resolve()}'. Result: {index_result}")
assert index_result.get("status") == "success"
rag_results = await genie.rag.search("What is Llama.cpp?", collection_name=rag_collection_name, top_k=1)
if rag_results:
print(f" RAG Search Result: '{rag_results[0].content[:100]}...' (Score: {rag_results[0].score:.2f})")
assert "Llama.cpp" in rag_results[0].content
else:
assert False, "RAG search returned no results"
except Exception as e_rag:
print(f" RAG Error: {e_rag}")
raise
print("\n[4] Testing direct custom tool execution (get_file_metadata)...")
try:
metadata_result = await genie.execute_tool("get_file_metadata", file_path="testfile.txt")
print(f" Metadata for 'testfile.txt': {metadata_result}")
assert metadata_result.get("file_name") == "testfile.txt"
assert metadata_result.get("exists") is True
except Exception as e_tool_direct:
print(f" Direct tool execution error: {e_tool_direct}")
raise
print("\n[5] Testing `run_command` (LLM-assisted, HITL)...")
try:
command_text = "What is the size of the file named testfile.txt in the sandbox?"
print(f" Sending command: '{command_text}' (Approval may be requested on CLI)")
command_result = await genie.run_command(command_text)
print(f" `run_command` result: {json.dumps(command_result, indent=2, default=str)}")
assert command_result.get("tool_result", {}).get("size_bytes") is not None or \
command_result.get("hitl_decision", {}).get("status") != "approved", \
f"Tool did not run or HITL was not approved. Result: {command_result}"
except Exception as e_run_cmd:
print(f" `run_command` error: {e_run_cmd}")
raise
print("\n[6] Testing Prompt Management...")
try:
prompt_data = {"bot_name": "E23-Bot", "user_name": "Tester"}
rendered_chat_prompt = await genie.prompts.render_chat_prompt(name="greeting_template", data=prompt_data)
assert rendered_chat_prompt is not None and len(rendered_chat_prompt) == 2
print(f" Rendered chat prompt: {rendered_chat_prompt}")
except Exception as e_prompt:
print(f" Prompt management error: {e_prompt}")
raise
print("\n[7] Testing Conversation State...")
try:
session_id = f"e23_session_{uuid.uuid4().hex[:6]}"
await genie.conversation.add_message(session_id, {"role": "user", "content": "First turn in e23 test."})
await genie.conversation.add_message(session_id, {"role": "assistant", "content": "Acknowledged first turn."})
state = await genie.conversation.load_state(session_id)
assert state is not None and len(state["history"]) == 2
print(f" Conversation history for {session_id} (last 2): {state['history'][-2:]}")
except Exception as e_convo:
print(f" Conversation state error: {e_convo}")
raise
print("\n[8] Testing Input Guardrail...")
try:
blocked_input = "Tell me about super_secret_project_X."
print(f" Sending potentially blocked input: '{blocked_input}'")
await genie.llm.chat([{"role": "user", "content": blocked_input}])
assert False, "Input guardrail did not block as expected."
except PermissionError as e_perm:
print(f" Guardrail test: Input successfully blocked: {e_perm}")
except Exception as e_guard:
print(f" Guardrail test error (possibly unrelated to guardrail): {e_guard}")
raise
print("\n[9] Testing ReActAgent...")
try:
react_agent = ReActAgent(genie=genie, agent_config={"max_iterations": 3})
agent_goal = "What is 15 plus 7 using the calculator?"
print(f" Agent Goal: '{agent_goal}' (Tool use by agent does not trigger interactive HITL here)")
agent_result = await react_agent.run(goal=agent_goal)
print(f" ReActAgent Result Status: {agent_result['status']}")
print(f" ReActAgent Output: {str(agent_result['output'])[:200]}...")
assert agent_result["status"] == "success"
assert "22" in str(agent_result["output"])
except Exception as e_agent:
print(f" ReActAgent Error: {e_agent}")
raise
print("\n[10] Testing Token Usage Summary...")
try:
usage_summary = await genie.usage.get_summary()
print(f" Token Usage: {json.dumps(usage_summary, indent=2)}")
recorder_id = "in_memory_token_usage_recorder_v1"
assert recorder_id in usage_summary, f"Recorder '{recorder_id}' not found in usage summary."
assert usage_summary[recorder_id]["total_records"] > 0
except Exception as e_usage:
print(f" Token usage error: {e_usage}")
raise
print("\n--- E2E Test PASSED ---")
except Exception as e_main:
print(f"\nE2E Test FAILED with critical error: {e_main}")
logging.error("E2E Main Error", exc_info=True)
raise
finally:
if genie:
print("\n[11] Tearing down Genie facade...")
await genie.close()
print("Genie facade torn down.")
print("\n[12] Cleaning up test files/directories...")
for p_cleanup in [sandbox_dir, prompt_dir]:
if p_cleanup.exists(): shutil.rmtree(p_cleanup, ignore_errors=True)
print("Cleanup complete.")
if name == "main": asyncio.run(run_local_e2e_llama_cpp_server())