Introduction: Automated Prompt Optimization
DSPy revolutionizes prompt engineering by treating it as a machine learning optimization problem. Instead of manually iterating on prompts, DSPy automatically discovers effective prompt structures through systematic search and optimization algorithms.
This stage covers DSPy's signature system, automated instruction optimization with MIPROv2, few-shot example selection, and building production optimization pipelines that continuously improve prompt performance.
DSPy Fundamentals and Architecture
DSPy introduces declarative programming for LLMs through "signatures" - specifications of input-output behavior that DSPy uses to generate and optimize prompts automatically.
import dspy
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
# Configure DSPy with your LLM
lm = dspy.OpenAI(model='gpt-4', api_key='your-api-key')
dspy.settings.configure(lm=lm)
class BusinessAnalysisSignature(dspy.Signature):
"""Analyze business data and provide strategic insights with recommendations"""
# Input fields with descriptions
data_context = dspy.InputField(desc="description of the business data being analyzed")
analysis_focus = dspy.InputField(desc="specific area of focus for the analysis")
stakeholder_audience = dspy.InputField(desc="target audience for the analysis")
# Output fields with descriptions
key_insights = dspy.OutputField(desc="3-5 critical insights from the data analysis")
strategic_recommendations = dspy.OutputField(desc="actionable recommendations with implementation priority")
risk_assessment = dspy.OutputField(desc="potential risks and mitigation strategies")
confidence_level = dspy.OutputField(desc="confidence in analysis (high/medium/low) with reasoning")
class CustomerSupportSignature(dspy.Signature):
"""Classify and route customer support tickets with detailed analysis"""
ticket_content = dspy.InputField(desc="full customer support ticket text")
customer_tier = dspy.InputField(desc="customer tier (enterprise/pro/basic)")
issue_category = dspy.OutputField(desc="primary issue category")
urgency_level = dspy.OutputField(desc="urgency level: critical/high/medium/low")
routing_team = dspy.OutputField(desc="recommended support team")
estimated_resolution_time = dspy.OutputField(desc="estimated time to resolve")
escalation_needed = dspy.OutputField(desc="whether immediate escalation is required")
class AdvancedBusinessAnalyzer(dspy.Module):
"""Advanced business analysis module with multi-step reasoning"""
def __init__(self):
super().__init__()
# Core analysis component
self.analyzer = dspy.Predict(BusinessAnalysisSignature)
# Chain of thought for complex reasoning
self.reasoning_chain = dspy.ChainOfThought(BusinessAnalysisSignature)
# Multi-hop reasoning for comprehensive analysis
self.deep_analyzer = dspy.MultiChainComparison(BusinessAnalysisSignature)
def forward(self, data_context, analysis_focus, stakeholder_audience):
"""Execute the business analysis with multiple reasoning approaches"""
# Basic analysis
basic_result = self.analyzer(
data_context=data_context,
analysis_focus=analysis_focus,
stakeholder_audience=stakeholder_audience
)
# Chain of thought analysis for complex reasoning
cot_result = self.reasoning_chain(
data_context=data_context,
analysis_focus=analysis_focus,
stakeholder_audience=stakeholder_audience
)
# Return the chain of thought result which includes reasoning
return cot_result
class OptimizedSupportClassifier(dspy.Module):
"""Optimized customer support classification system"""
def __init__(self):
super().__init__()
# Primary classifier
self.classifier = dspy.Predict(CustomerSupportSignature)
# Confidence assessment
self.confidence_assessor = dspy.Predict(
"ticket_content, classification_result -> confidence_score, quality_indicators"
)
# Few-shot learning component
self.few_shot_classifier = dspy.FewShot(CustomerSupportSignature, k=5)
def forward(self, ticket_content, customer_tier="basic"):
"""Classify support ticket with confidence assessment"""
# Get classification
classification = self.few_shot_classifier(
ticket_content=ticket_content,
customer_tier=customer_tier
)
# Assess confidence in classification
confidence = self.confidence_assessor(
ticket_content=ticket_content,
classification_result=str(classification)
)
# Combine results
return dspy.Prediction(
issue_category=classification.issue_category,
urgency_level=classification.urgency_level,
routing_team=classification.routing_team,
estimated_resolution_time=classification.estimated_resolution_time,
escalation_needed=classification.escalation_needed,
confidence_score=confidence.confidence_score,
quality_indicators=confidence.quality_indicators
)
# Advanced example: Multi-stage analysis pipeline
class ComprehensiveBusinessIntelligence(dspy.Module):
"""Multi-stage business intelligence pipeline"""
def __init__(self):
super().__init__()
# Stage 1: Data preprocessing and validation
self.data_validator = dspy.Predict(
"raw_data_description -> data_quality_assessment, preprocessing_recommendations"
)
# Stage 2: Primary analysis
self.primary_analyzer = dspy.ChainOfThought(BusinessAnalysisSignature)
# Stage 3: Competitive context analysis
self.competitive_analyzer = dspy.Predict(
"analysis_results, market_context -> competitive_positioning, market_opportunities"
)
# Stage 4: Strategic synthesis
self.strategic_synthesizer = dspy.Predict(
"primary_analysis, competitive_analysis -> executive_summary, action_plan"
)
def forward(self, data_context, analysis_focus, stakeholder_audience, market_context=""):
"""Execute comprehensive multi-stage analysis"""
# Stage 1: Validate and preprocess
data_validation = self.data_validator(raw_data_description=data_context)
# Stage 2: Primary analysis
primary_analysis = self.primary_analyzer(
data_context=data_context,
analysis_focus=analysis_focus,
stakeholder_audience=stakeholder_audience
)
# Stage 3: Competitive analysis
competitive_analysis = self.competitive_analyzer(
analysis_results=str(primary_analysis),
market_context=market_context
)
# Stage 4: Strategic synthesis
strategic_synthesis = self.strategic_synthesizer(
primary_analysis=str(primary_analysis),
competitive_analysis=str(competitive_analysis)
)
return dspy.Prediction(
data_quality=data_validation,
primary_insights=primary_analysis,
competitive_insights=competitive_analysis,
strategic_synthesis=strategic_synthesis
)
# Demonstration of basic DSPy usage
def demonstrate_dspy_basics():
"""Demonstrate basic DSPy functionality"""
print("=== DSPy Basic Demonstration ===")
# Simple business analyzer
analyzer = AdvancedBusinessAnalyzer()
# Example analysis
result = analyzer(
data_context="Q3 2024 sales data showing 15% growth in enterprise segment, 8% decline in SMB",
analysis_focus="revenue optimization and market segment strategy",
stakeholder_audience="C-level executives and board members"
)
print("Business Analysis Result:")
print(f"Key Insights: {result.key_insights}")
print(f"Recommendations: {result.strategic_recommendations}")
print(f"Risk Assessment: {result.risk_assessment}")
print(f"Confidence: {result.confidence_level}")
# Customer support classifier
support_classifier = OptimizedSupportClassifier()
support_result = support_classifier(
ticket_content="Our production API has been returning 500 errors for the past 30 minutes. This is affecting all our enterprise customers and costing us revenue.",
customer_tier="enterprise"
)
print("\nSupport Classification Result:")
print(f"Category: {support_result.issue_category}")
print(f"Urgency: {support_result.urgency_level}")
print(f"Team: {support_result.routing_team}")
print(f"Escalation Needed: {support_result.escalation_needed}")
if __name__ == "__main__":
demonstrate_dspy_basics()Automated Optimization with MIPROv2
MIPROv2 (Multi-prompt Instruction Proposal Optimizer) automatically optimizes both instructions and few-shot examples through systematic search and evaluation.
from dspy.teleprompt import MIPROv2, BootstrapFewShot
from dspy.evaluate import Evaluate
import dspy
class DSPyOptimizer:
"""Comprehensive DSPy optimization system"""
def __init__(self, training_data, validation_data, test_data):
self.training_data = training_data
self.validation_data = validation_data
self.test_data = test_data
self.optimization_history = []
def create_business_analysis_metric(self, example, prediction, trace=None):
"""Custom metric for business analysis evaluation"""
# Check if key components are present
has_insights = bool(prediction.key_insights and len(prediction.key_insights) > 50)
has_recommendations = bool(prediction.strategic_recommendations and len(prediction.strategic_recommendations) > 50)
has_risk_assessment = bool(prediction.risk_assessment and len(prediction.risk_assessment) > 30)
has_confidence = bool(prediction.confidence_level and any(level in prediction.confidence_level.lower() for level in ['high', 'medium', 'low']))
# Content quality scoring
content_score = 0
if has_insights: content_score += 0.3
if has_recommendations: content_score += 0.3
if has_risk_assessment: content_score += 0.2
if has_confidence: content_score += 0.2
# Check for specific business terms
business_terms = ['revenue', 'growth', 'market', 'strategy', 'competitive', 'roi', 'performance']
combined_text = f"{prediction.key_insights} {prediction.strategic_recommendations}".lower()
term_coverage = sum(1 for term in business_terms if term in combined_text) / len(business_terms)
# Final score
final_score = (content_score * 0.7) + (term_coverage * 0.3)
return final_score
def create_support_classification_metric(self, example, prediction, trace=None):
"""Custom metric for support ticket classification"""
# Check required fields
required_fields = ['issue_category', 'urgency_level', 'routing_team', 'estimated_resolution_time']
field_completeness = sum(1 for field in required_fields if hasattr(prediction, field) and getattr(prediction, field)) / len(required_fields)
# Urgency appropriateness (simple heuristic)
urgency_keywords = {
'critical': ['down', 'outage', 'production', 'emergency', 'urgent'],
'high': ['error', 'bug', 'issue', 'problem', 'not working'],
'medium': ['question', 'help', 'how to', 'clarification'],
'low': ['feature request', 'suggestion', 'enhancement']
}
predicted_urgency = getattr(prediction, 'urgency_level', '').lower()
ticket_text = example.get('ticket_content', '').lower()
urgency_appropriate = False
for urgency, keywords in urgency_keywords.items():
if urgency in predicted_urgency and any(keyword in ticket_text for keyword in keywords):
urgency_appropriate = True
break
appropriateness_score = 1.0 if urgency_appropriate else 0.5
return (field_completeness * 0.7) + (appropriateness_score * 0.3)
def optimize_with_mipro(self, module_class, metric_func, num_trials=10, max_bootstrapped_demos=4):
"""Optimize using MIPROv2"""
print(f"Starting MIPROv2 optimization with {num_trials} trials...")
# Initialize module
module = module_class()
# Create MIPROv2 optimizer
optimizer = MIPROv2(
metric=metric_func,
num_trials=num_trials,
init_temperature=1.0,
max_bootstrapped_demos=max_bootstrapped_demos,
max_labeled_demos=16
)
# Run optimization
optimized_module = optimizer.compile(
module,
trainset=self.training_data,
valset=self.validation_data[:10] # Use subset for validation during optimization
)
# Evaluate on test set
evaluator = Evaluate(
devset=self.test_data,
metric=metric_func,
num_threads=4,
display_progress=True
)
test_score = evaluator(optimized_module)
# Store optimization result
optimization_result = {
'module_class': module_class.__name__,
'optimizer': 'MIPROv2',
'num_trials': num_trials,
'test_score': test_score,
'optimized_module': optimized_module
}
self.optimization_history.append(optimization_result)
print(f"Optimization complete. Test score: {test_score:.3f}")
return optimized_module, test_score
def optimize_with_bootstrap(self, module_class, metric_func, max_bootstrapped_demos=8):
"""Optimize using BootstrapFewShot"""
print(f"Starting BootstrapFewShot optimization...")
# Initialize module
module = module_class()
# Create bootstrap optimizer
optimizer = BootstrapFewShot(
metric=metric_func,
max_bootstrapped_demos=max_bootstrapped_demos,
max_labeled_demos=16,
teacher_settings=dict({'lm': dspy.OpenAI(model='gpt-4')}),
student_settings=dict({'lm': dspy.OpenAI(model='gpt-3.5-turbo')})
)
# Run optimization
optimized_module = optimizer.compile(
module,
teacher=module,
trainset=self.training_data
)
# Evaluate
evaluator = Evaluate(devset=self.test_data, metric=metric_func)
test_score = evaluator(optimized_module)
optimization_result = {
'module_class': module_class.__name__,
'optimizer': 'BootstrapFewShot',
'max_demos': max_bootstrapped_demos,
'test_score': test_score,
'optimized_module': optimized_module
}
self.optimization_history.append(optimization_result)
print(f"Bootstrap optimization complete. Test score: {test_score:.3f}")
return optimized_module, test_score
def comparative_optimization(self, module_class, metric_func):
"""Compare different optimization approaches"""
print("=== Comparative Optimization Analysis ===")
# Baseline (no optimization)
baseline_module = module_class()
baseline_evaluator = Evaluate(devset=self.test_data, metric=metric_func)
baseline_score = baseline_evaluator(baseline_module)
print(f"Baseline score: {baseline_score:.3f}")
# MIPROv2 optimization
mipro_module, mipro_score = self.optimize_with_mipro(
module_class, metric_func, num_trials=8
)
# Bootstrap optimization
bootstrap_module, bootstrap_score = self.optimize_with_bootstrap(
module_class, metric_func
)
# Compare results
results = {
'baseline': {'score': baseline_score, 'improvement': 0.0},
'mipro': {'score': mipro_score, 'improvement': mipro_score - baseline_score},
'bootstrap': {'score': bootstrap_score, 'improvement': bootstrap_score - baseline_score}
}
print("\n=== Optimization Comparison ===")
for method, result in results.items():
print(f"{method}: {result['score']:.3f} (improvement: {result['improvement']:+.3f})")
# Return best performing module
best_method = max(results.keys(), key=lambda k: results[k]['score'])
if best_method == 'mipro':
return mipro_module, mipro_score
elif best_method == 'bootstrap':
return bootstrap_module, bootstrap_score
else:
return baseline_module, baseline_score
def create_training_data():
"""Create comprehensive training data for optimization"""
business_training_data = [
dspy.Example(
data_context="Q3 revenue of $12M, up 18% YoY, enterprise segment grew 25%, SMB declined 5%",
analysis_focus="segment performance and growth strategy",
stakeholder_audience="executive team",
key_insights="Enterprise segment driving growth with 25% increase, SMB segment requires attention with 5% decline",
strategic_recommendations="Double down on enterprise sales while developing SMB retention program",
risk_assessment="Over-dependence on enterprise segment could create vulnerability",
confidence_level="High - data shows clear trends"
),
dspy.Example(
data_context="Customer acquisition cost increased 30%, but lifetime value up 45%",
analysis_focus="profitability and unit economics",
stakeholder_audience="finance and marketing teams",
key_insights="Despite higher acquisition costs, improved unit economics with 45% LTV increase",
strategic_recommendations="Optimize acquisition channels for efficiency while maintaining quality",
risk_assessment="Need to monitor acquisition cost trends to prevent margin erosion",
confidence_level="Medium - need more data on channel performance"
)
]
support_training_data = [
dspy.Example(
ticket_content="Production API returning 500 errors, affecting all customers",
customer_tier="enterprise",
issue_category="technical",
urgency_level="critical",
routing_team="platform engineering",
estimated_resolution_time="30 minutes",
escalation_needed="yes"
),
dspy.Example(
ticket_content="How do I reset my password? I can't access my account",
customer_tier="basic",
issue_category="authentication",
urgency_level="medium",
routing_team="customer support",
estimated_resolution_time="15 minutes",
escalation_needed="no"
)
]
return business_training_data, support_training_data
# Demonstration
def demonstrate_dspy_optimization():
"""Demonstrate DSPy optimization workflows"""
# Create training data
business_data, support_data = create_training_data()
# Split data (in practice, you'd have much more data)
business_train = business_data[:8] if len(business_data) >= 8 else business_data
business_val = business_data[8:10] if len(business_data) >= 10 else business_data[:1]
business_test = business_data[10:] if len(business_data) >= 12 else business_data[:1]
# Initialize optimizer
optimizer = DSPyOptimizer(
training_data=business_train,
validation_data=business_val,
test_data=business_test
)
# Optimize business analysis module
print("Optimizing Business Analysis Module...")
best_business_module, best_business_score = optimizer.comparative_optimization(
AdvancedBusinessAnalyzer,
optimizer.create_business_analysis_metric
)
print(f"\nBest business analysis score: {best_business_score:.3f}")
# Test optimized module
test_result = best_business_module(
data_context="Q4 revenue projection shows 22% growth, driven by new product launch",
analysis_focus="growth sustainability and market expansion",
stakeholder_audience="board of directors"
)
print("\nOptimized module test result:")
print(f"Insights: {test_result.key_insights[:100]}...")
print(f"Recommendations: {test_result.strategic_recommendations[:100]}...")
if __name__ == "__main__":
demonstrate_dspy_optimization()Production Integration and Monitoring
Integrating DSPy optimization into production requires careful monitoring, gradual rollout strategies, and continuous performance tracking to ensure optimized prompts maintain their effectiveness over time.
🎯 Stage 5 Preview: Building Your LangSmith Alternative
Next, we'll build a complete monitoring and analytics platform that rivals commercial solutions like LangSmith.
- Comprehensive trace collection and storage
- Real-time analytics and performance monitoring
- Experiment management and A/B testing
- Cost tracking and optimization insights
- Custom dashboard and alerting systems
