Tutorial: Token Usage Tracking (E20)¶

This tutorial corresponds to the example file examples/E20_token_usage_example.py.

It demonstrates how to monitor LLM token consumption for cost and performance analysis. It shows how to: - Configure a TokenUsageRecorderPlugin (e.g., in_memory_token_recorder). - Observe how token usage is automatically recorded after genie.llm calls. - Retrieve and interpret a usage summary using genie.usage.get_summary().

Example Code¶

examples/E20_token_usage_example.py¶

""" Example: Using Token Usage Tracking (genie.usage)

This example demonstrates how to enable and use the token usage tracking feature of Genie Tooling. It shows configuration for both the InMemoryTokenUsageRecorderPlugin and the OpenTelemetryMetricsTokenRecorderPlugin.

To Run with InMemory Recorder: 1. Ensure Genie Tooling is installed (poetry install --all-extras). 2. Ensure Ollama is running and 'mistral:latest' is pulled (or configure a different LLM). 3. Run from the root of the project: poetry run python examples/E20_token_usage_example.py

To Run with OpenTelemetry Metrics Recorder (e.g., to Prometheus via OTel Collector): 1. Ensure OTel dependencies: poetry install --extras observability 2. Set up an OTel Collector configured to scrape Prometheus metrics and export them (e.g., to a Prometheus instance). 3. Modify app_config below to use token_usage_recorder="otel_metrics_recorder" and ensure observability_tracer="otel_tracer" is also configured for the OTel SDK to be initialized. 4. Run the script. Metrics should be available in your Prometheus/Grafana setup. """ import asyncio import json import logging from typing import Optional

from genie_tooling.config.features import FeatureSettings from genie_tooling.config.models import MiddlewareConfig from genie_tooling.genie import Genie

async def run_token_usage_demo(): print("--- Token Usage Tracking Example ---") logging.basicConfig(level=logging.INFO)

app_config_in_memory = MiddlewareConfig(
    features=FeatureSettings(
        llm="ollama",
        llm_ollama_model_name="mistral:latest",
        token_usage_recorder="in_memory_token_recorder"
    )
)

app_config_otel_metrics = MiddlewareConfig(
    features=FeatureSettings(
        llm="ollama",
        llm_ollama_model_name="mistral:latest",
        token_usage_recorder="otel_metrics_recorder",
        observability_tracer="otel_tracer" # OTel SDK needs to be initialized
    ),
    observability_tracer_configurations={
        "otel_tracer_plugin_v1": {
            "otel_service_name": "genie-e20-token-metrics-app",
            "exporter_type": "console" # Or "otlp_http" to send to a collector
        }
    }
)

# --- CHOOSE CONFIGURATION TO RUN ---
app_config = app_config_in_memory
# app_config = app_config_otel_metrics # Uncomment to use OTel Metrics
# ------------------------------------

genie: Optional[Genie] = None
try:
    print(f"\nInitializing Genie with token usage recorder: {app_config.features.token_usage_recorder}...")
    genie = await Genie.create(config=app_config)
    print("Genie initialized!")

    print("\n--- Making LLM calls ---")
    try:
        await genie.llm.chat([{"role": "user", "content": "Tell me a short story about a robot."}])
        print("First LLM chat call complete.")
        await genie.llm.generate("What is the capital of France?")
        print("Second LLM generate call complete.")
        await genie.llm.chat([{"role": "user", "content": "Another question for the same model."}])
        print("Third LLM chat call complete.")

    except Exception as e_llm:
        print(f"LLM call failed (expected if Ollama not running): {e_llm}")
        print("Token usage summary might be empty or incomplete.")

    print("\n--- Token Usage Summary ---")
    # For in-memory, get_summary returns a dict. For OTel, it returns a status message.
    summary = await genie.usage.get_summary()
    print(json.dumps(summary, indent=2))

    if app_config.features.token_usage_recorder == "in_memory_token_recorder" and summary:
        recorder_id = "in_memory_token_usage_recorder_v1"
        if recorder_id in summary and isinstance(summary[recorder_id], dict) and isinstance(summary[recorder_id].get("by_model"), dict):
            print("\nBreakdown by model (for in_memory_token_recorder):")
            for model_name, data in summary[recorder_id]["by_model"].items():
                print(f"  Model: {model_name}")
                print(f"    Calls: {data.get('count')}")
                print(f"    Prompt Tokens: {data.get('prompt')}")
                print(f"    Completion Tokens: {data.get('completion')}")
                print(f"    Total Tokens: {data.get('total')}")
    elif app_config.features.token_usage_recorder == "otel_metrics_recorder":
        print("\nFor 'otel_metrics_recorder', view metrics in your OpenTelemetry backend (e.g., Prometheus/Grafana).")
        print("Example Prometheus queries:")
        print("  - sum(rate(llm_request_tokens_prompt_total[5m])) by (llm_model_name)")
        print("  - sum(rate(llm_request_tokens_completion_total[5m])) by (llm_provider_id)")

except Exception as e:
    print(f"\nAn error occurred: {e}")
    logging.exception("Token usage demo error details:")
finally:
    if genie:
        await genie.close()
        print("\nGenie torn down.")

if name == "main": asyncio.run(run_token_usage_demo())