{
  "schema_version": "1.0",
  "surface_type": "tool_answer",
  "tool_id": "rag-vs-long-context-comparison",
  "citation_question": "Is RAG or long context cheaper for my AI workflow?",
  "question": "How do I compare RAG against a long-context prompt for cost and margin?",
  "canonical_url": "https://www.unitcostai.com/tools/rag-vs-long-context",
  "formula": "cost_delta_usd = cost_long_context_usd - cost_rag_usd; request_input_tokens_delta = long_context_input_tokens - (base_prompt_tokens + retrieved_chunks * tokens_per_chunk)",
  "assumptions": [
    "Baseline RAG and candidate long-context runs share the same task and output assumptions unless explicitly changed",
    "Long-context candidate removes retrieval, reranking, vector-query, and embedding-refresh stack terms",
    "Embedding refresh is treated as a fixed monthly term when monthly business impact is calculated"
  ],
  "example": "If RAG request_input_tokens=2980 and long_context_input_tokens=3400, request_input_tokens_delta=420 before model pricing is applied.",
  "units": "USD deltas per active user-month, monthly business delta, and tokens per request",
  "pricing_context": {
    "context_type": "pricing_snapshot",
    "snapshot_id": "2026-06-21",
    "snapshot_date": "2026-06-21",
    "reference_url": "https://www.unitcostai.com/pricing"
  },
  "pricing_snapshot": {
    "snapshot_id": "2026-06-21",
    "snapshot_date": "2026-06-21",
    "pricing_reference_url": "https://www.unitcostai.com/pricing"
  }
}
