Summarization compresses older messages into condensed forms before dropping them, preserving key information while reducing token usage. This approach maintains important context across long conversations.
Original Messages:
User: "I'm building a React app with TypeScript"
Assistant: "Great choice! What features are you implementing?"
User: "User authentication, dashboard, and data visualization"
Assistant: "Sounds comprehensive. What's your tech stack?"
User: "React, TypeScript, Node.js, PostgreSQL, and Redis"
Assistant: "Excellent stack. Are you using any specific libraries?"
User: "For auth: Auth0, for charts: D3.js, for state: Redux Toolkit"
Instead of dropping all these messages, create a summary:
Summary: "User is building a React/TypeScript app with user auth, dashboard,
data visualization using Node.js, PostgreSQL, Redis, Auth0, D3.js, and Redux Toolkit."
class SummarizingContextManager:
def __init__(self, max_tokens=4000, summary_threshold=2000):
self.max_tokens = max_tokens
self.summary_threshold = summary_threshold
self.messages = []
self.summaries = []
def add_message(self, role, content):
self.messages.append({"role": role, "content": content})
self._manage_context()
def _manage_context(self):
total_tokens = self._count_tokens(self.get_full_context())
if total_tokens > self.max_tokens:
# Need to summarize older messages
self._summarize_older_messages()
def _summarize_older_messages(self):
# Keep recent messages, summarize older ones
recent_messages = self.messages[-3:] # Keep last 3 messages
older_messages = self.messages[:-3]
if older_messages:
summary = self._create_summary(older_messages)
self.summaries.append({
"role": "system",
"content": f"Previous conversation summary: {summary}"
})
# Replace old messages with summary
self.messages = recent_messages
def get_full_context(self):
return self.summaries + self.messages
def _create_summary(self, messages):
# Use LLM to create summary
conversation = "\n".join([
f"{msg['role']}: {msg['content']}"
for msg in messages
])
summary_prompt = f"""
Summarize this conversation, preserving key information:
{conversation}
Keep it concise but include important details like:
- User's goals and requirements
- Technical decisions made
- Problems discussed and solutions
- Any specific preferences or constraints
"""
# Call LLM API for summarization
return call_llm_api(summary_prompt)
Step 1: Context Full
┌─────────────────────────────────────────────────┐
│ [Msg1] [Msg2] [Msg3] [Msg4] [Msg5] [Msg6] [Msg7] │
│ Total: 4500/4000 tokens ✗ │
└─────────────────────────────────────────────────┘
Step 2: Identify Messages to Summarize
┌─────────────────────────────────────────────────┐
│ [Msg1] [Msg2] [Msg3] [Msg4] │ [Msg5] [Msg6] [Msg7] │
│ Summarize these │ Keep recent │
└─────────────────────────────────────────────────┘
Step 3: Create Summary
┌─────────────────────────────────────────────────┐
│ [Summary: User discussed React app with auth...] │
│ [Msg5] [Msg6] [Msg7] │
│ Total: 2800/4000 tokens ✓ │
└─────────────────────────────────────────────────┘
def incremental_summarization(messages, window_size=5):
"""Summarize in chunks to preserve more detail"""
summaries = []
for i in range(0, len(messages), window_size):
chunk = messages[i:i + window_size]
if len(chunk) == window_size: # Only summarize full chunks
summary = create_summary(chunk)
summaries.append(summary)
else:
summaries.extend(chunk) # Keep partial chunk as-is
return summaries
def topic_based_summarization(messages):
"""Group messages by topic before summarizing"""
topics = detect_topics(messages)
topic_summaries = {}
for topic, topic_messages in topics.items():
topic_summaries[topic] = create_summary(topic_messages)
return format_topic_summaries(topic_summaries)
def hierarchical_summarization(messages, levels=3):
"""Create multi-level summaries for different detail needs"""
summaries = {}
# Level 1: Very brief overview
summaries['brief'] = create_summary(messages, max_length=50)
# Level 2: Moderate detail
summaries['detailed'] = create_summary(messages, max_length=200)
# Level 3: Full context
summaries['full'] = create_summary(messages, max_length=500)
return summaries
def selective_summarization(messages):
"""Only summarize certain types of messages"""
important_messages = []
regular_messages = []
for msg in messages:
if is_important_message(msg):
important_messages.append(msg)
else:
regular_messages.append(msg)
# Keep important messages, summarize regular ones
summary = create_summary(regular_messages)
return important_messages + [{"role": "system", "content": summary}]
def create_structured_summary(messages):
"""Create summaries with specific structure"""
summary_prompt = """
Create a structured summary with these sections:
GOALS: What the user wants to accomplish
TECH_STACK: Technologies and tools mentioned
DECISIONS: Important decisions made
PROBLEMS: Issues discussed and their solutions
CONSTRAINTS: Any limitations or requirements
Conversation:
{conversation}
"""
return call_llm_api(summary_prompt.format(conversation=format_messages(messages)))
# Result:
"""
SUMMARY:
GOALS: Build a React dashboard with user authentication
TECH_STACK: React, TypeScript, Node.js, PostgreSQL, Auth0
DECISIONS: Use Redux Toolkit for state management, D3.js for charts
PROBLEMS: Resolved CORS issues with API integration
CONSTRAINTS: Must support 1000+ concurrent users
"""
def query_aware_summary(messages, current_query):
"""Summarize with focus on current query relevance"""
summary_prompt = f"""
Summarize this conversation, emphasizing information relevant to:
"{current_query}"
Include background context but focus on details that help answer the current query.
Conversation:
{format_messages(messages)}
"""
return call_llm_api(summary_prompt)
class ProgressiveSummarizer:
def __init__(self):
self.summaries = [] # List of (timestamp, summary) tuples
self.detail_levels = ['brief', 'moderate', 'detailed']
def add_messages(self, messages):
# Create summaries at different detail levels
for level in self.detail_levels:
summary = create_summary(messages, detail_level=level)
self.summaries.append({
'timestamp': datetime.now(),
'level': level,
'summary': summary
})
def get_relevant_summary(self, query):
# Return most appropriate summary based on query
if is_simple_query(query):
return self.get_brief_summary()
elif is_complex_query(query):
return self.get_detailed_summary()
else:
return self.get_moderate_summary()
Without Summarization:
User: "What was my original requirement?"
Assistant: "I don't remember, that message was dropped."
With Summarization:
User: "What was my original requirement?"
Assistant: "Based on the summary, you wanted to build a React dashboard with user authentication and data visualization."
Original Messages: 2000 tokens
Summary: 200 tokens
Savings: 90% reduction while preserving key information
# What gets lost in summarization?
def analyze_summary_loss(original, summary):
lost_information = {
'specific_numbers': [], # Exact values, measurements
'nuance': [], # Subtle details and context
'conversation_flow': [], # How discussion evolved
'user_personality': [], # User's communication style
'emotional_context': [] # User's frustration/excitement
}
return lost_information
# Summarization requires additional API calls
def calculate_summarization_cost(messages, model="gpt-4"):
original_tokens = count_tokens(messages)
summary_tokens = estimate_summary_tokens(original_tokens)
# Cost to create summary
summary_cost = (original_tokens + summary_tokens) * get_pricing(model)
# Savings from reduced context in future calls
future_savings = (original_tokens - summary_tokens) * get_pricing(model)
return {
'summary_cost': summary_cost,
'future_savings': future_savings,
'break_even': summary_cost / future_savings
}
def ensure_summary_quality(summary, original_messages):
"""Validate that summary preserves critical information"""
checks = {
'goals_preserved': check_goals(summary, original_messages),
'decisions_preserved': check_decisions(summary, original_messages),
'constraints_preserved': check_constraints(summary, original_messages),
'technical_details_preserved': check_tech_details(summary, original_messages)
}
if not all(checks.values()):
return improve_summary(summary, original_messages, failed_checks=checks)
return summary
# Summarization adds processing time
async def add_message_with_summary(self, message):
start_time = time.time()
# Add message
self.messages.append(message)
# Check if summarization needed
if self._needs_summarization():
# This adds latency
summary = await self._create_summary_async()
self._apply_summary(summary)
processing_time = time.time() - start_time
return processing_time
class SmartSummarizer:
def should_summarize(self, messages):
# Don't summarize too frequently
if len(messages) < 5:
return False
# Summarize when context is full
if self._token_count(messages) > self.max_tokens * 0.8:
return True
# Summarize if topic changed significantly
if self._topic_shift(messages):
return True
# Summarize if time gap is large
if self._time_gap(messages) > timedelta(hours=1):
return True
return False
def validate_summary(summary, original_messages):
"""Ensure summary doesn't lose critical information"""
critical_elements = extract_critical_elements(original_messages)
for element in critical_elements:
if not contains_information(summary, element):
return False, f"Missing critical element: {element}"
return True, "Summary is valid"
def hybrid_context_management(messages, max_tokens):
"""Combine summarization with other strategies"""
# 1. Always keep system messages
system_messages = get_system_messages(messages)
# 2. Keep very recent messages
recent_messages = get_recent_messages(messages, count=3)
# 3. Summarize middle messages
middle_messages = get_middle_messages(messages)
if middle_messages:
summary = create_summary(middle_messages)
summary_message = {"role": "system", "content": f"Summary: {summary}"}
else:
summary_message = None
# 4. Combine everything
final_context = system_messages
if summary_message:
final_context.append(summary_message)
final_context.extend(recent_messages)
# 5. Trim if still too long
return trim_to_token_limit(final_context, max_tokens)
Summarization is a powerful technique for maintaining context in long conversations, offering:
However, it comes with trade-offs:
When implemented carefully with validation and smart triggering, summarization can dramatically improve the user experience in long-running AI interactions.