[{"data":1,"prerenderedAt":1215},["ShallowReactive",2],{"navigation_docs":3,"-context-engineering-token-based":208,"-context-engineering-token-based-surround":1210},[4,33,60,82,109,151],{"title":5,"path":6,"stem":7,"children":8,"icon":32},"OpenSSL","/openssl","1.openssl/1.index",[9,12,17,22,27],{"title":10,"path":6,"stem":7,"icon":11},"Overview","i-lucide-info",{"title":13,"path":14,"stem":15,"icon":16},"Certificates","/openssl/certificates","1.openssl/2.certificates","i-lucide-file-badge",{"title":18,"path":19,"stem":20,"icon":21},"Keys","/openssl/keys","1.openssl/3.keys","i-lucide-key",{"title":23,"path":24,"stem":25,"icon":26},"Encryption","/openssl/encryption","1.openssl/4.encryption","i-lucide-shield",{"title":28,"path":29,"stem":30,"icon":31},"Verification","/openssl/verification","1.openssl/5.verification","i-lucide-check-circle","i-lucide-lock",{"title":34,"path":35,"stem":36,"children":37,"icon":59},"Voice AI","/voice-ai","2.voice-ai/1.index",[38,39,44,49,54],{"title":10,"path":35,"stem":36,"icon":11},{"title":40,"path":41,"stem":42,"icon":43},"Twilio Media Streams","/voice-ai/twilio","2.voice-ai/2.twilio","i-lucide-radio",{"title":45,"path":46,"stem":47,"icon":48},"WebSocket & Streaming","/voice-ai/websocket","2.voice-ai/3.websocket","i-lucide-cable",{"title":50,"path":51,"stem":52,"icon":53},"LLM Integration","/voice-ai/llm","2.voice-ai/4.llm","i-lucide-brain",{"title":55,"path":56,"stem":57,"icon":58},"Speech Services","/voice-ai/speech","2.voice-ai/5.speech","i-lucide-audio-lines","i-lucide-phone",{"title":61,"path":62,"stem":63,"children":64,"icon":81},"Agents","/agents","3.agents/1.index",[65,66,71,76],{"title":10,"path":62,"stem":63,"icon":11},{"title":67,"path":68,"stem":69,"icon":70},"RAG","/agents/rag","3.agents/2.rag","i-lucide-database",{"title":72,"path":73,"stem":74,"icon":75},"ReAct Agents","/agents/react-agents","3.agents/3.react-agents","i-lucide-activity",{"title":77,"path":78,"stem":79,"icon":80},"OpenAI Agent","/agents/openai-agent","3.agents/4.openai-agent","i-lucide-bot","i-lucide-users",{"title":83,"path":84,"stem":85,"children":86,"icon":88},"Context Engineering","/context-engineering","4.context-engineering/1.index",[87,89,94,99,104],{"title":83,"path":84,"stem":85,"icon":88},"i-lucide-brain-circuit",{"title":90,"path":91,"stem":92,"icon":93},"Managing The Context Window","/context-engineering/managing-context-window","4.context-engineering/2.managing-context-window","i-lucide-settings",{"title":95,"path":96,"stem":97,"icon":98},"Sliding Window Strategy","/context-engineering/sliding-window","4.context-engineering/3.sliding-window","i-lucide-arrow-left-right",{"title":100,"path":101,"stem":102,"icon":103},"Token-based Management","/context-engineering/token-based","4.context-engineering/4.token-based","i-lucide-hash",{"title":105,"path":106,"stem":107,"icon":108},"Summarization Techniques","/context-engineering/summarization","4.context-engineering/5.summarization","i-lucide-file-text",{"title":110,"path":111,"stem":112,"children":113,"icon":150},"AI SDK 6","/ai-sdk","5.ai-sdk/1.index",[114,115,120,125,130,135,140,145],{"title":10,"path":111,"stem":112,"icon":11},{"title":116,"path":117,"stem":118,"icon":119},"Installation & Setup","/ai-sdk/installation","5.ai-sdk/2.installation","i-lucide-package",{"title":121,"path":122,"stem":123,"icon":124},"Model Method","/ai-sdk/model","5.ai-sdk/3.model","i-lucide-box",{"title":126,"path":127,"stem":128,"icon":129},"generateText","/ai-sdk/generate-text","5.ai-sdk/4.generate-text","i-lucide-message-square",{"title":131,"path":132,"stem":133,"icon":134},"streamText","/ai-sdk/stream-text","5.ai-sdk/5.stream-text","i-lucide-zap",{"title":136,"path":137,"stem":138,"icon":139},"Structured Output","/ai-sdk/structured-output","5.ai-sdk/6.structured-output","i-lucide-layers",{"title":141,"path":142,"stem":143,"icon":144},"Text Embeddings","/ai-sdk/embeddings","5.ai-sdk/7.embeddings","i-lucide-git-branch",{"title":146,"path":147,"stem":148,"icon":149},"Agent Loop – ToolLoopAgent","/ai-sdk/agent-loop","5.ai-sdk/8.agent-loop","i-lucide-refresh-cw","i-lucide-cpu",{"title":152,"icon":153,"path":154,"stem":155,"children":156,"page":207},"Syntax Reference","i-lucide-book-open","/syntax-reference","999.syntax-reference",[157,162,167,172,177,182,187,192,197,202],{"title":158,"path":159,"stem":160,"icon":161},"Markdown Syntax","/syntax-reference/markdown-syntax","999.syntax-reference/1.markdown-syntax","i-lucide-heading-1",{"title":163,"path":164,"stem":165,"icon":166},"Code Blocks","/syntax-reference/code-blocks","999.syntax-reference/2.code-blocks","i-lucide-code-xml",{"title":168,"path":169,"stem":170,"icon":171},"Introduction","/syntax-reference/introduction","999.syntax-reference/2.introduction","i-lucide-house",{"title":173,"path":174,"stem":175,"icon":176},"Components","/syntax-reference/components","999.syntax-reference/3.components","i-lucide-component",{"title":178,"path":179,"stem":180,"icon":181},"Installation","/syntax-reference/installation","999.syntax-reference/3.installation","i-lucide-download",{"title":183,"path":184,"stem":185,"icon":186},"Images and Embeds","/syntax-reference/images-embeds","999.syntax-reference/4.images-embeds","i-lucide-image",{"title":188,"path":189,"stem":190,"icon":191},"Project Structure","/syntax-reference/project-structure","999.syntax-reference/4.project-structure","i-lucide-folder-tree",{"title":193,"path":194,"stem":195,"icon":196},"Studio module","/syntax-reference/studio","999.syntax-reference/5.studio","i-lucide-mouse-pointer-2",{"title":198,"path":199,"stem":200,"icon":201},"Migration","/syntax-reference/migration","999.syntax-reference/6.migration","i-lucide-replace",{"title":203,"path":204,"stem":205,"icon":206},"Troubleshooting","/syntax-reference/troubleshooting","999.syntax-reference/7.troubleshooting","i-lucide-wrench",false,{"id":209,"title":100,"body":210,"description":1203,"extension":1204,"links":1205,"meta":1206,"navigation":1207,"path":101,"seo":1208,"stem":102,"__hash__":1209},"docs/4.context-engineering/4.token-based.md",{"type":211,"value":212,"toc":1159},"minimark",[213,217,222,226,231,242,246,403,407,413,417,425,431,438,444,451,560,567,573,580,706,710,716,773,779,832,838,909,913,919,922,947,953,956,972,978,981,996,1000,1006,1061,1067,1114,1120,1123,1134,1138,1141,1155],[214,215,100],"h1",{"id":216},"token-based-management",[218,219,221],"h2",{"id":220},"how-dropping-older-messages-based-on-max-tokens-works","How Dropping Older Messages Based on Max Tokens Works",[223,224,225],"p",{},"Token-based context management removes messages when the total token count exceeds a predefined limit. Unlike sliding windows that work with message count, this approach considers the actual length of messages.",[227,228,230],"h3",{"id":229},"token-counting-basics","Token Counting Basics",[232,233,238],"pre",{"className":234,"code":236,"language":237},[235],"language-text","Message: \"Hello, how are you today?\"\nTokens:  [\"Hello\", \",\", \" how\", \" are\", \" you\", \" today\", \"?\"]\nCount: 7 tokens\n\nLonger Message: \"I'm working on implementing a context management system\"\nTokens:  [\"I\", \"'m\", \" working\", \" on\", \" implementing\", \" a\", \" context\", \n         \" management\", \" system\"]\nCount: 9 tokens\n","text",[239,240,236],"code",{"__ignoreMap":241},"",[227,243,245],{"id":244},"implementation-logic","Implementation Logic",[232,247,251],{"className":248,"code":249,"language":250,"meta":241,"style":241},"language-python shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","class TokenBasedManager:\n    def __init__(self, max_tokens=4000):\n        self.max_tokens = max_tokens\n        self.messages = []\n        self.tokenizer = get_tokenizer()  # GPT tokenizer, etc.\n    \n    def add_message(self, role, content):\n        message = {\"role\": role, \"content\": content}\n        self.messages.append(message)\n        self._trim_context()\n    \n    def _trim_context(self):\n        total_tokens = self._count_tokens(self.messages)\n        \n        while total_tokens > self.max_tokens and len(self.messages) > 1:\n            # Remove the oldest non-system message\n            for i, msg in enumerate(self.messages):\n                if msg['role'] != 'system':  # Keep system messages\n                    removed = self.messages.pop(i)\n                    total_tokens -= self._count_tokens([removed])\n                    break\n    \n    def _count_tokens(self, messages):\n        text = \" \".join([msg['content'] for msg in messages])\n        return len(self.tokenizer.encode(text))\n","python",[239,252,253,261,267,273,279,285,291,297,303,309,315,320,326,332,338,344,350,356,362,368,374,380,385,391,397],{"__ignoreMap":241},[254,255,258],"span",{"class":256,"line":257},"line",1,[254,259,260],{},"class TokenBasedManager:\n",[254,262,264],{"class":256,"line":263},2,[254,265,266],{},"    def __init__(self, max_tokens=4000):\n",[254,268,270],{"class":256,"line":269},3,[254,271,272],{},"        self.max_tokens = max_tokens\n",[254,274,276],{"class":256,"line":275},4,[254,277,278],{},"        self.messages = []\n",[254,280,282],{"class":256,"line":281},5,[254,283,284],{},"        self.tokenizer = get_tokenizer()  # GPT tokenizer, etc.\n",[254,286,288],{"class":256,"line":287},6,[254,289,290],{},"    \n",[254,292,294],{"class":256,"line":293},7,[254,295,296],{},"    def add_message(self, role, content):\n",[254,298,300],{"class":256,"line":299},8,[254,301,302],{},"        message = {\"role\": role, \"content\": content}\n",[254,304,306],{"class":256,"line":305},9,[254,307,308],{},"        self.messages.append(message)\n",[254,310,312],{"class":256,"line":311},10,[254,313,314],{},"        self._trim_context()\n",[254,316,318],{"class":256,"line":317},11,[254,319,290],{},[254,321,323],{"class":256,"line":322},12,[254,324,325],{},"    def _trim_context(self):\n",[254,327,329],{"class":256,"line":328},13,[254,330,331],{},"        total_tokens = self._count_tokens(self.messages)\n",[254,333,335],{"class":256,"line":334},14,[254,336,337],{},"        \n",[254,339,341],{"class":256,"line":340},15,[254,342,343],{},"        while total_tokens > self.max_tokens and len(self.messages) > 1:\n",[254,345,347],{"class":256,"line":346},16,[254,348,349],{},"            # Remove the oldest non-system message\n",[254,351,353],{"class":256,"line":352},17,[254,354,355],{},"            for i, msg in enumerate(self.messages):\n",[254,357,359],{"class":256,"line":358},18,[254,360,361],{},"                if msg['role'] != 'system':  # Keep system messages\n",[254,363,365],{"class":256,"line":364},19,[254,366,367],{},"                    removed = self.messages.pop(i)\n",[254,369,371],{"class":256,"line":370},20,[254,372,373],{},"                    total_tokens -= self._count_tokens([removed])\n",[254,375,377],{"class":256,"line":376},21,[254,378,379],{},"                    break\n",[254,381,383],{"class":256,"line":382},22,[254,384,290],{},[254,386,388],{"class":256,"line":387},23,[254,389,390],{},"    def _count_tokens(self, messages):\n",[254,392,394],{"class":256,"line":393},24,[254,395,396],{},"        text = \" \".join([msg['content'] for msg in messages])\n",[254,398,400],{"class":256,"line":399},25,[254,401,402],{},"        return len(self.tokenizer.encode(text))\n",[227,404,406],{"id":405},"visual-representation","Visual Representation",[232,408,411],{"className":409,"code":410,"language":237},[235],"Initial State (Max: 10 tokens):\n┌─────────────────────────────────────┐\n│ [User: \"Hi\"] (2 tokens)             │\n│ [Bot: \"Hello!\"] (3 tokens)          │\n│ [User: \"How are you?\"] (4 tokens)   │\n│ Total: 9/10 tokens ✓                │\n└─────────────────────────────────────┘\n\nAdd Long Message:\n┌─────────────────────────────────────┐\n│ [User: \"Hi\"] (2 tokens)             │\n│ [Bot: \"Hello!\"] (3 tokens)          │\n│ [User: \"How are you?\"] (4 tokens)   │\n│ [Bot: \"I'm implementing context\"] (8)│\n│ Total: 17/10 tokens ✗               │\n└─────────────────────────────────────┘\n\nAfter Trimming:\n┌─────────────────────────────────────┐\n│ [User: \"How are you?\"] (4 tokens)   │\n│ [Bot: \"I'm implementing context\"] (8)│\n│ Total: 12/10 tokens ✗               │\n└─────────────────────────────────────┘\n\nFinal State:\n┌─────────────────────────────────────┐\n│ [Bot: \"I'm implementing context\"] (8)│\n│ Total: 8/10 tokens ✓                │\n└─────────────────────────────────────┘\n",[239,412,410],{"__ignoreMap":241},[218,414,416],{"id":415},"problems-with-token-based-dropping","Problems with Token-based Dropping",[227,418,420,421],{"id":419},"_1-unequal-message-treatment","1. ",[422,423,424],"strong",{},"Unequal Message Treatment",[232,426,429],{"className":427,"code":428,"language":237},[235],"Conversation:\nUser: \"Hi\"                    (2 tokens)\nAssistant: \"Hello!\"            (3 tokens)\nUser: \"Can you help me debug this complex React application that uses hooks for state management and context API for global state?\" (25 tokens)\n\nWhen trimming occurs:\n- Short messages might be kept\n- Long messages get dropped first\n- Important information in long messages is lost\n",[239,430,428],{"__ignoreMap":241},[227,432,434,435],{"id":433},"_2-loss-of-message-coherence","2. ",[422,436,437],{},"Loss of Message Coherence",[232,439,442],{"className":440,"code":441,"language":237},[235],"Original Exchange:\nUser: \"I have three issues with my code:\n1. Memory leak in component A\n2. Performance problem with function B  \n3. UI bug in component C\"\nAssistant: \"Let me help with all three issues...\"\n\nAfter token-based trimming:\nUser: \"UI bug in component C\"  ← Only this part remains\nAssistant: \"What UI bug?\"       ← Lost context of other issues\n",[239,443,441],{"__ignoreMap":241},[227,445,447,448],{"id":446},"_3-system-prompt-vulnerability","3. ",[422,449,450],{},"System Prompt Vulnerability",[232,452,454],{"className":248,"code":453,"language":250,"meta":241,"style":241},"# Problem: System prompts can be accidentally removed\ndef bad_trim_logic(messages, max_tokens):\n    total = count_tokens(messages)\n    while total > max_tokens:\n        messages.pop(0)  # Removes system prompt too!\n        total = count_tokens(messages)\n    return messages\n\n# Better approach:\ndef good_trim_logic(messages, max_tokens):\n    system_msgs = [m for m in messages if m['role'] == 'system']\n    other_msgs = [m for m in messages if m['role'] != 'system']\n    \n    # Only trim non-system messages\n    while count_tokens(system_msgs + other_msgs) > max_tokens:\n        if other_msgs:\n            other_msgs.pop(0)\n        else:\n            break  # Can't trim system messages\n    \n    return system_msgs + other_msgs\n",[239,455,456,461,466,471,476,481,486,491,497,502,507,512,517,521,526,531,536,541,546,551,555],{"__ignoreMap":241},[254,457,458],{"class":256,"line":257},[254,459,460],{},"# Problem: System prompts can be accidentally removed\n",[254,462,463],{"class":256,"line":263},[254,464,465],{},"def bad_trim_logic(messages, max_tokens):\n",[254,467,468],{"class":256,"line":269},[254,469,470],{},"    total = count_tokens(messages)\n",[254,472,473],{"class":256,"line":275},[254,474,475],{},"    while total > max_tokens:\n",[254,477,478],{"class":256,"line":281},[254,479,480],{},"        messages.pop(0)  # Removes system prompt too!\n",[254,482,483],{"class":256,"line":287},[254,484,485],{},"        total = count_tokens(messages)\n",[254,487,488],{"class":256,"line":293},[254,489,490],{},"    return messages\n",[254,492,493],{"class":256,"line":299},[254,494,496],{"emptyLinePlaceholder":495},true,"\n",[254,498,499],{"class":256,"line":305},[254,500,501],{},"# Better approach:\n",[254,503,504],{"class":256,"line":311},[254,505,506],{},"def good_trim_logic(messages, max_tokens):\n",[254,508,509],{"class":256,"line":317},[254,510,511],{},"    system_msgs = [m for m in messages if m['role'] == 'system']\n",[254,513,514],{"class":256,"line":322},[254,515,516],{},"    other_msgs = [m for m in messages if m['role'] != 'system']\n",[254,518,519],{"class":256,"line":328},[254,520,290],{},[254,522,523],{"class":256,"line":334},[254,524,525],{},"    # Only trim non-system messages\n",[254,527,528],{"class":256,"line":340},[254,529,530],{},"    while count_tokens(system_msgs + other_msgs) > max_tokens:\n",[254,532,533],{"class":256,"line":346},[254,534,535],{},"        if other_msgs:\n",[254,537,538],{"class":256,"line":352},[254,539,540],{},"            other_msgs.pop(0)\n",[254,542,543],{"class":256,"line":358},[254,544,545],{},"        else:\n",[254,547,548],{"class":256,"line":364},[254,549,550],{},"            break  # Can't trim system messages\n",[254,552,553],{"class":256,"line":370},[254,554,290],{},[254,556,557],{"class":256,"line":376},[254,558,559],{},"    return system_msgs + other_msgs\n",[227,561,563,564],{"id":562},"_4-inefficient-space-usage","4. ",[422,565,566],{},"Inefficient Space Usage",[232,568,571],{"className":569,"code":570,"language":237},[235],"Scenario: Many short messages vs few long messages\n\nMany Short Messages:\n[Hi] [Hello] [How] [are] [you?] [Great!] [Thanks] [Bye]\nTotal: 8 messages, ~12 tokens\n\nFew Long Messages:  \n[\"I'm working on a complex distributed system...\"] (15 tokens)\n[\"The architecture involves microservices...\"] (18 tokens)\n\nToken-based dropping treats both the same way, but the context\nvalue differs significantly.\n",[239,572,570],{"__ignoreMap":241},[227,574,576,577],{"id":575},"_5-token-counting-overhead","5. ",[422,578,579],{},"Token Counting Overhead",[232,581,583],{"className":248,"code":582,"language":250,"meta":241,"style":241},"# Each trim operation requires:\n# 1. Tokenize all messages\n# 2. Count tokens\n# 3. Remove messages\n# 4. Repeat if still over limit\n\ndef count_tokens(messages):\n    # This is computationally expensive!\n    total = 0\n    for msg in messages:\n        total += len(tokenizer.encode(msg['content']))\n    return total\n\n# Optimization: Cache token counts\nclass OptimizedTokenManager:\n    def __init__(self, max_tokens):\n        self.max_tokens = max_tokens\n        self.messages = []  # [(message, token_count), ...]\n        self.total_tokens = 0\n    \n    def add_message(self, message):\n        token_count = len(tokenizer.encode(message['content']))\n        self.messages.append((message, token_count))\n        self.total_tokens += token_count\n        self._trim()\n",[239,584,585,590,595,600,605,610,614,619,624,629,634,639,644,648,653,658,663,667,672,677,681,686,691,696,701],{"__ignoreMap":241},[254,586,587],{"class":256,"line":257},[254,588,589],{},"# Each trim operation requires:\n",[254,591,592],{"class":256,"line":263},[254,593,594],{},"# 1. Tokenize all messages\n",[254,596,597],{"class":256,"line":269},[254,598,599],{},"# 2. Count tokens\n",[254,601,602],{"class":256,"line":275},[254,603,604],{},"# 3. Remove messages\n",[254,606,607],{"class":256,"line":281},[254,608,609],{},"# 4. Repeat if still over limit\n",[254,611,612],{"class":256,"line":287},[254,613,496],{"emptyLinePlaceholder":495},[254,615,616],{"class":256,"line":293},[254,617,618],{},"def count_tokens(messages):\n",[254,620,621],{"class":256,"line":299},[254,622,623],{},"    # This is computationally expensive!\n",[254,625,626],{"class":256,"line":305},[254,627,628],{},"    total = 0\n",[254,630,631],{"class":256,"line":311},[254,632,633],{},"    for msg in messages:\n",[254,635,636],{"class":256,"line":317},[254,637,638],{},"        total += len(tokenizer.encode(msg['content']))\n",[254,640,641],{"class":256,"line":322},[254,642,643],{},"    return total\n",[254,645,646],{"class":256,"line":328},[254,647,496],{"emptyLinePlaceholder":495},[254,649,650],{"class":256,"line":334},[254,651,652],{},"# Optimization: Cache token counts\n",[254,654,655],{"class":256,"line":340},[254,656,657],{},"class OptimizedTokenManager:\n",[254,659,660],{"class":256,"line":346},[254,661,662],{},"    def __init__(self, max_tokens):\n",[254,664,665],{"class":256,"line":352},[254,666,272],{},[254,668,669],{"class":256,"line":358},[254,670,671],{},"        self.messages = []  # [(message, token_count), ...]\n",[254,673,674],{"class":256,"line":364},[254,675,676],{},"        self.total_tokens = 0\n",[254,678,679],{"class":256,"line":370},[254,680,290],{},[254,682,683],{"class":256,"line":376},[254,684,685],{},"    def add_message(self, message):\n",[254,687,688],{"class":256,"line":382},[254,689,690],{},"        token_count = len(tokenizer.encode(message['content']))\n",[254,692,693],{"class":256,"line":387},[254,694,695],{},"        self.messages.append((message, token_count))\n",[254,697,698],{"class":256,"line":393},[254,699,700],{},"        self.total_tokens += token_count\n",[254,702,703],{"class":256,"line":399},[254,704,705],{},"        self._trim()\n",[218,707,709],{"id":708},"advanced-token-based-strategies","Advanced Token-based Strategies",[227,711,420,713],{"id":712},"_1-smart-token-allocation",[422,714,715],{},"Smart Token Allocation",[232,717,719],{"className":248,"code":718,"language":250,"meta":241,"style":241},"def smart_token_allocation(messages, max_tokens):\n    # Allocate tokens based on message importance\n    system_tokens = 100    # Reserve for system prompt\n    recent_tokens = 500    # Reserve for recent messages\n    historical_tokens = max_tokens - system_tokens - recent_tokens\n    \n    system_msgs = [m for m in messages if m['role'] == 'system']\n    recent_msgs = get_recent_messages(messages, recent_tokens)\n    historical_msgs = select_important_messages(messages, historical_tokens)\n    \n    return system_msgs + recent_msgs + historical_msgs\n",[239,720,721,726,731,736,741,746,750,754,759,764,768],{"__ignoreMap":241},[254,722,723],{"class":256,"line":257},[254,724,725],{},"def smart_token_allocation(messages, max_tokens):\n",[254,727,728],{"class":256,"line":263},[254,729,730],{},"    # Allocate tokens based on message importance\n",[254,732,733],{"class":256,"line":269},[254,734,735],{},"    system_tokens = 100    # Reserve for system prompt\n",[254,737,738],{"class":256,"line":275},[254,739,740],{},"    recent_tokens = 500    # Reserve for recent messages\n",[254,742,743],{"class":256,"line":281},[254,744,745],{},"    historical_tokens = max_tokens - system_tokens - recent_tokens\n",[254,747,748],{"class":256,"line":287},[254,749,290],{},[254,751,752],{"class":256,"line":293},[254,753,511],{},[254,755,756],{"class":256,"line":299},[254,757,758],{},"    recent_msgs = get_recent_messages(messages, recent_tokens)\n",[254,760,761],{"class":256,"line":305},[254,762,763],{},"    historical_msgs = select_important_messages(messages, historical_tokens)\n",[254,765,766],{"class":256,"line":311},[254,767,290],{},[254,769,770],{"class":256,"line":317},[254,771,772],{},"    return system_msgs + recent_msgs + historical_msgs\n",[227,774,434,776],{"id":775},"_2-message-chunking",[422,777,778],{},"Message Chunking",[232,780,782],{"className":248,"code":781,"language":250,"meta":241,"style":241},"def chunk_long_messages(messages, max_chunk_size=100):\n    chunked = []\n    for msg in messages:\n        if count_tokens([msg]) > max_chunk_size:\n            # Split long messages into chunks\n            chunks = split_into_chunks(msg, max_chunk_size)\n            chunked.extend(chunks)\n        else:\n            chunked.append(msg)\n    return chunked\n",[239,783,784,789,794,798,803,808,813,818,822,827],{"__ignoreMap":241},[254,785,786],{"class":256,"line":257},[254,787,788],{},"def chunk_long_messages(messages, max_chunk_size=100):\n",[254,790,791],{"class":256,"line":263},[254,792,793],{},"    chunked = []\n",[254,795,796],{"class":256,"line":269},[254,797,633],{},[254,799,800],{"class":256,"line":275},[254,801,802],{},"        if count_tokens([msg]) > max_chunk_size:\n",[254,804,805],{"class":256,"line":281},[254,806,807],{},"            # Split long messages into chunks\n",[254,809,810],{"class":256,"line":287},[254,811,812],{},"            chunks = split_into_chunks(msg, max_chunk_size)\n",[254,814,815],{"class":256,"line":293},[254,816,817],{},"            chunked.extend(chunks)\n",[254,819,820],{"class":256,"line":299},[254,821,545],{},[254,823,824],{"class":256,"line":305},[254,825,826],{},"            chunked.append(msg)\n",[254,828,829],{"class":256,"line":311},[254,830,831],{},"    return chunked\n",[227,833,447,835],{"id":834},"_3-token-aware-sliding-window",[422,836,837],{},"Token-aware Sliding Window",[232,839,841],{"className":248,"code":840,"language":250,"meta":241,"style":241},"class TokenAwareSlidingWindow:\n    def __init__(self, max_tokens):\n        self.max_tokens = max_tokens\n        self.messages = []\n    \n    def add_message(self, message):\n        self.messages.append(message)\n        \n        # Remove messages until we fit in token limit\n        while self._total_tokens() > self.max_tokens:\n            # Remove oldest non-system message\n            for i, msg in enumerate(self.messages):\n                if msg['role'] != 'system':\n                    self.messages.pop(i)\n                    break\n",[239,842,843,848,852,856,860,864,868,872,876,881,886,891,895,900,905],{"__ignoreMap":241},[254,844,845],{"class":256,"line":257},[254,846,847],{},"class TokenAwareSlidingWindow:\n",[254,849,850],{"class":256,"line":263},[254,851,662],{},[254,853,854],{"class":256,"line":269},[254,855,272],{},[254,857,858],{"class":256,"line":275},[254,859,278],{},[254,861,862],{"class":256,"line":281},[254,863,290],{},[254,865,866],{"class":256,"line":287},[254,867,685],{},[254,869,870],{"class":256,"line":293},[254,871,308],{},[254,873,874],{"class":256,"line":299},[254,875,337],{},[254,877,878],{"class":256,"line":305},[254,879,880],{},"        # Remove messages until we fit in token limit\n",[254,882,883],{"class":256,"line":311},[254,884,885],{},"        while self._total_tokens() > self.max_tokens:\n",[254,887,888],{"class":256,"line":317},[254,889,890],{},"            # Remove oldest non-system message\n",[254,892,893],{"class":256,"line":322},[254,894,355],{},[254,896,897],{"class":256,"line":328},[254,898,899],{},"                if msg['role'] != 'system':\n",[254,901,902],{"class":256,"line":334},[254,903,904],{},"                    self.messages.pop(i)\n",[254,906,907],{"class":256,"line":340},[254,908,379],{},[218,910,912],{"id":911},"when-token-based-management-works-well","When Token-based Management Works Well",[227,914,420,916],{"id":915},"_1-api-cost-optimization",[422,917,918],{},"API Cost Optimization",[223,920,921],{},"When you need precise control over costs:",[232,923,925],{"className":248,"code":924,"language":250,"meta":241,"style":241},"# Estimate API cost based on tokens\ndef estimate_cost(tokens, model=\"gpt-4\"):\n    pricing = {\"gpt-4\": 0.03/1000, \"gpt-3.5\": 0.002/1000}\n    return tokens * pricing[model]\n",[239,926,927,932,937,942],{"__ignoreMap":241},[254,928,929],{"class":256,"line":257},[254,930,931],{},"# Estimate API cost based on tokens\n",[254,933,934],{"class":256,"line":263},[254,935,936],{},"def estimate_cost(tokens, model=\"gpt-4\"):\n",[254,938,939],{"class":256,"line":269},[254,940,941],{},"    pricing = {\"gpt-4\": 0.03/1000, \"gpt-3.5\": 0.002/1000}\n",[254,943,944],{"class":256,"line":275},[254,945,946],{},"    return tokens * pricing[model]\n",[227,948,434,950],{"id":949},"_2-model-specific-constraints",[422,951,952],{},"Model-specific Constraints",[223,954,955],{},"Different models have different token limits:",[957,958,959,963,966,969],"ul",{},[960,961,962],"li",{},"GPT-3.5: 4K tokens",[960,964,965],{},"GPT-4: 8K/32K tokens",[960,967,968],{},"Claude: 100K tokens",[960,970,971],{},"Local models: Varies widely",[227,973,447,975],{"id":974},"_3-performance-requirements",[422,976,977],{},"Performance Requirements",[223,979,980],{},"When processing time is critical:",[232,982,984],{"className":248,"code":983,"language":250,"meta":241,"style":241},"# Larger context = slower processing\nprocessing_time = base_time + (tokens * time_per_token)\n",[239,985,986,991],{"__ignoreMap":241},[254,987,988],{"class":256,"line":257},[254,989,990],{},"# Larger context = slower processing\n",[254,992,993],{"class":256,"line":263},[254,994,995],{},"processing_time = base_time + (tokens * time_per_token)\n",[218,997,999],{"id":998},"best-practices","Best Practices",[227,1001,420,1003],{"id":1002},"_1-always-protect-system-messages",[422,1004,1005],{},"Always Protect System Messages",[232,1007,1009],{"className":248,"code":1008,"language":250,"meta":241,"style":241},"def trim_context(messages, max_tokens):\n    system_msgs = [m for m in messages if m['role'] == 'system']\n    other_msgs = [m for m in messages if m['role'] != 'system']\n    \n    # Only trim non-system messages\n    while count_tokens(system_msgs + other_msgs) > max_tokens:\n        if other_msgs:\n            other_msgs.pop(0)\n        else:\n            break  # Emergency: even system messages too long\n    \n    return system_msgs + other_msgs\n",[239,1010,1011,1016,1020,1024,1028,1032,1036,1040,1044,1048,1053,1057],{"__ignoreMap":241},[254,1012,1013],{"class":256,"line":257},[254,1014,1015],{},"def trim_context(messages, max_tokens):\n",[254,1017,1018],{"class":256,"line":263},[254,1019,511],{},[254,1021,1022],{"class":256,"line":269},[254,1023,516],{},[254,1025,1026],{"class":256,"line":275},[254,1027,290],{},[254,1029,1030],{"class":256,"line":281},[254,1031,525],{},[254,1033,1034],{"class":256,"line":287},[254,1035,530],{},[254,1037,1038],{"class":256,"line":293},[254,1039,535],{},[254,1041,1042],{"class":256,"line":299},[254,1043,540],{},[254,1045,1046],{"class":256,"line":305},[254,1047,545],{},[254,1049,1050],{"class":256,"line":311},[254,1051,1052],{},"            break  # Emergency: even system messages too long\n",[254,1054,1055],{"class":256,"line":317},[254,1056,290],{},[254,1058,1059],{"class":256,"line":322},[254,1060,559],{},[227,1062,434,1064],{"id":1063},"_2-monitor-token-usage",[422,1065,1066],{},"Monitor Token Usage",[232,1068,1070],{"className":248,"code":1069,"language":250,"meta":241,"style":241},"class ContextMonitor:\n    def __init__(self, max_tokens):\n        self.max_tokens = max_tokens\n        self.usage_history = []\n    \n    def log_usage(self, current_tokens):\n        self.usage_history.append(current_tokens)\n        if current_tokens > self.max_tokens * 0.8:\n            print(\"Warning: Approaching token limit!\")\n",[239,1071,1072,1077,1081,1085,1090,1094,1099,1104,1109],{"__ignoreMap":241},[254,1073,1074],{"class":256,"line":257},[254,1075,1076],{},"class ContextMonitor:\n",[254,1078,1079],{"class":256,"line":263},[254,1080,662],{},[254,1082,1083],{"class":256,"line":269},[254,1084,272],{},[254,1086,1087],{"class":256,"line":275},[254,1088,1089],{},"        self.usage_history = []\n",[254,1091,1092],{"class":256,"line":281},[254,1093,290],{},[254,1095,1096],{"class":256,"line":287},[254,1097,1098],{},"    def log_usage(self, current_tokens):\n",[254,1100,1101],{"class":256,"line":293},[254,1102,1103],{},"        self.usage_history.append(current_tokens)\n",[254,1105,1106],{"class":256,"line":299},[254,1107,1108],{},"        if current_tokens > self.max_tokens * 0.8:\n",[254,1110,1111],{"class":256,"line":305},[254,1112,1113],{},"            print(\"Warning: Approaching token limit!\")\n",[227,1115,447,1117],{"id":1116},"_3-combine-with-other-strategies",[422,1118,1119],{},"Combine with Other Strategies",[223,1121,1122],{},"Token-based management works best when combined with:",[957,1124,1125,1128,1131],{},[960,1126,1127],{},"Message prioritization",[960,1129,1130],{},"Summarization",[960,1132,1133],{},"Semantic relevance scoring",[218,1135,1137],{"id":1136},"summary","Summary",[223,1139,1140],{},"Token-based context management provides precise control over context size but can be computationally expensive and may lose important information in long messages. It's most effective when:",[957,1142,1143,1146,1149,1152],{},[960,1144,1145],{},"API costs are a major concern",[960,1147,1148],{},"Model token limits are strict",[960,1150,1151],{},"Processing performance is critical",[960,1153,1154],{},"Combined with intelligent message selection strategies",[1156,1157,1158],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":241,"searchDepth":263,"depth":263,"links":1160},[1161,1166,1178,1186,1194,1202],{"id":220,"depth":263,"text":221,"children":1162},[1163,1164,1165],{"id":229,"depth":269,"text":230},{"id":244,"depth":269,"text":245},{"id":405,"depth":269,"text":406},{"id":415,"depth":263,"text":416,"children":1167},[1168,1170,1172,1174,1176],{"id":419,"depth":269,"text":1169},"1. Unequal Message Treatment",{"id":433,"depth":269,"text":1171},"2. Loss of Message Coherence",{"id":446,"depth":269,"text":1173},"3. System Prompt Vulnerability",{"id":562,"depth":269,"text":1175},"4. Inefficient Space Usage",{"id":575,"depth":269,"text":1177},"5. Token Counting Overhead",{"id":708,"depth":263,"text":709,"children":1179},[1180,1182,1184],{"id":712,"depth":269,"text":1181},"1. Smart Token Allocation",{"id":775,"depth":269,"text":1183},"2. Message Chunking",{"id":834,"depth":269,"text":1185},"3. Token-aware Sliding Window",{"id":911,"depth":263,"text":912,"children":1187},[1188,1190,1192],{"id":915,"depth":269,"text":1189},"1. API Cost Optimization",{"id":949,"depth":269,"text":1191},"2. Model-specific Constraints",{"id":974,"depth":269,"text":1193},"3. Performance Requirements",{"id":998,"depth":263,"text":999,"children":1195},[1196,1198,1200],{"id":1002,"depth":269,"text":1197},"1. Always Protect System Messages",{"id":1063,"depth":269,"text":1199},"2. Monitor Token Usage",{"id":1116,"depth":269,"text":1201},"3. Combine with Other Strategies",{"id":1136,"depth":263,"text":1137},"Dropping older messages based on max_token limits and their challenges.","md",null,{},{"icon":103},{"title":100,"description":1203},"dPUffgMGgXb1qGb3BpAaETs3meBSvydsTKDNZ7uI4Fo",[1211,1213],{"title":95,"path":96,"stem":97,"description":1212,"icon":98,"children":-1},"How sliding context windows work and their limitations.",{"title":105,"path":106,"stem":107,"description":1214,"icon":108,"children":-1},"Compressing older messages before dropping them to preserve context.",1772944811360]