{
  "schema_version": "1.0",
  "surface_type": "guide_answer",
  "guide_id": "how-many-tokens-per-request",
  "question": "How do I estimate token load per request for cost modeling?",
  "canonical_url": "https://www.unitcostai.com/guides/how-many-tokens-per-request",
  "related_tool_url": "https://www.unitcostai.com/tools/ai-workflow-cost",
  "formula": "effective_input_tokens = base_prompt_tokens + retrieved_chunks * tokens_per_chunk",
  "assumptions": [
    "Use p50 and p90 ranges for input and output tokens",
    "Retrieved chunk tokens are modeled separately from base prompt tokens",
    "Use the same token profile when comparing models"
  ],
  "example": "Base prompt 500 plus 5 chunks at 180 tokens gives 1400 effective input tokens."
}
