Examples
This page contains practical examples demonstrating various features of LLM Batch Helper.
🎉 New in v0.3.0: All examples use the simplified API - no async/await syntax needed!
Basic Examples
Simple Batch Processing
from llm_batch_helper import LLMConfig, process_prompts_batch
# Create configuration
config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.7,
max_completion_tokens=100
)
# Define prompts
prompts = [
"What is machine learning?",
"Explain neural networks briefly.",
"What is deep learning?"
]
# Process prompts - no async/await needed!
results = process_prompts_batch(
config=config,
provider="openai",
prompts=prompts,
cache_dir="ml_cache"
)
# Display results
for prompt_id, response in results.items():
status = "[CACHE]" if response.get("from_cache") else "[GENERATED]"
print(f"{status} Q: What is machine learning related topic?")
print(f"A: {response['response_text']}\n")
File-Based Processing
import os
from llm_batch_helper import LLMConfig, process_prompts_batch
# Create configuration
config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.5,
max_completion_tokens=300,
system_instruction="You are a helpful assistant providing detailed explanations."
)
# Process all .txt files in the 'questions' directory
results = process_prompts_batch(
config=config,
provider="openai",
input_dir="questions", # Directory containing .txt files
cache_dir="answers_cache",
desc="Processing question files"
)
# Create answers directory if it doesn't exist
os.makedirs("answers", exist_ok=True)
# Save results to files
for prompt_id, response in results.items():
status = "[CACHE]" if response.get("from_cache") else "[GENERATED]"
if "error" not in response:
with open(f"answers/{prompt_id}_answer.txt", "w") as f:
f.write(response['response_text'])
print(f"{status} Saved answer for {prompt_id}")
else:
print(f"❌ Error for {prompt_id}: {response['error']}")
Advanced Examples
Custom Verification
from llm_batch_helper import LLMConfig, process_prompts_batch
def verify_code_response(prompt_id, llm_response_data, original_prompt_text, **kwargs):
"""Verify that code responses contain actual code."""
response_text = llm_response_data.get("response_text", "")
# Check for code indicators
code_indicators = ["def ", "class ", "import ", "```", "function"]
has_code = any(indicator in response_text for indicator in code_indicators)
# Check minimum length
min_length = kwargs.get("min_length", 50)
is_long_enough = len(response_text) >= min_length
return has_code and is_long_enough
# Create configuration with custom verification
config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.3,
max_completion_tokens=500,
system_instruction="You are a coding assistant. Always provide working code examples.",
verification_callback=verify_code_response,
verification_callback_args={"min_length": 100},
max_retries=3
)
# Define coding prompts
coding_prompts = [
"Write a Python function to calculate fibonacci numbers",
"Create a class for a simple calculator in Python",
"Write a function to reverse a string in Python"
]
# Process with verification - no async/await needed!
results = process_prompts_batch(
config=config,
provider="openai",
prompts=coding_prompts,
cache_dir="coding_cache"
)
# Display results
for prompt_id, response in results.items():
status = "[CACHE]" if response.get("from_cache") else "[GENERATED]"
if "error" in response:
print(f"❌ Failed verification for {prompt_id}: {response['error']}")
else:
print(f"✅ {status} Verified code response for {prompt_id}")
print(response['response_text'])
print("-" * 80)
Multi-Provider Comparison
from llm_batch_helper import LLMConfig, process_prompts_batch
# Common prompts for comparison
prompts = [
"Explain quantum computing in simple terms",
"What are the benefits of renewable energy?",
"How does machine learning work?"
]
# OpenAI configuration
openai_config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.7,
max_completion_tokens=200
)
# OpenRouter configuration (recommended for variety)
openrouter_config = LLMConfig(
model_name="anthropic/claude-3-5-sonnet",
temperature=0.7,
max_completion_tokens=200
)
print("🚀 Processing prompts with multiple providers...")
# Process with OpenAI
print("Processing with OpenAI...")
openai_results = process_prompts_batch(
config=openai_config,
provider="openai",
prompts=prompts,
cache_dir="openai_comparison"
)
# Process with OpenRouter
print("Processing with OpenRouter...")
openrouter_results = process_prompts_batch(
config=openrouter_config,
provider="openrouter",
prompts=prompts,
cache_dir="openrouter_comparison"
)
# Compare results
print("\n📊 Comparison Results:")
print("=" * 80)
for i, prompt in enumerate(prompts):
openai_ids = list(openai_results.keys())
openrouter_ids = list(openrouter_results.keys())
if i < len(openai_ids) and i < len(openrouter_ids):
openai_response = openai_results[openai_ids[i]]
openrouter_response = openrouter_results[openrouter_ids[i]]
print(f"\n🔍 Prompt: {prompt}")
print(f"🤖 OpenAI: {openai_response['response_text'][:100]}...")
print(f"🧠 OpenRouter: {openrouter_response['response_text'][:100]}...")
print("-" * 80)
Large-Scale Processing
import json
from llm_batch_helper import LLMConfig, process_prompts_batch
# Load prompts from JSON file (example format)
# large_dataset.json should contain: [{"prompt": "text1"}, {"prompt": "text2"}, ...]
try:
with open("large_dataset.json", "r") as f:
data = json.load(f)
prompts = [item["prompt"] for item in data]
except FileNotFoundError:
# Create example dataset if file doesn't exist
prompts = [
f"Generate a creative story about topic {i}" for i in range(1, 51)
]
print("📝 Using example dataset (50 prompts)")
# Configuration for large-scale processing
config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.7,
max_completion_tokens=150,
max_concurrent_requests=10, # Higher concurrency for speed
max_retries=5
)
print(f"🚀 Processing {len(prompts)} prompts in batches...")
# Process in batches to manage memory and API limits
batch_size = 20 # Smaller batches for better control
all_results = {}
for i in range(0, len(prompts), batch_size):
batch_prompts = prompts[i:i + batch_size]
batch_num = i//batch_size + 1
total_batches = (len(prompts)-1)//batch_size + 1
print(f"📦 Processing batch {batch_num}/{total_batches} ({len(batch_prompts)} prompts)")
# Process current batch - no async/await needed!
batch_results = process_prompts_batch(
config=config,
provider="openai",
prompts=batch_prompts,
cache_dir="large_scale_cache",
desc=f"Batch {batch_num}"
)
all_results.update(batch_results)
# Save intermediate results
with open(f"results_batch_{batch_num}.json", "w") as f:
json.dump(batch_results, f, indent=2)
print(f"✅ Batch {batch_num} completed: {len(batch_results)} responses")
# Save final consolidated results
with open("final_results.json", "w") as f:
json.dump(all_results, f, indent=2)
# Summary
successful = sum(1 for r in all_results.values() if "error" not in r)
failed = len(all_results) - successful
print(f"\n📊 Processing Complete!")
print(f"✅ Successful: {successful}")
print(f"❌ Failed: {failed}")
print(f"📁 Results saved to: final_results.json")
Content Generation Pipeline
from llm_batch_helper import LLMConfig, process_prompts_batch
print("🏗️ Starting Content Generation Pipeline...")
# Stage 1: Generate topics
print("\n📝 Stage 1: Generating topics...")
topic_config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.8,
max_completion_tokens=50,
system_instruction="Generate creative topic ideas."
)
topic_prompts = [
"Suggest 3 interesting topics about artificial intelligence",
"Suggest 3 interesting topics about space exploration",
"Suggest 3 interesting topics about environmental science"
]
# Generate topics - no async/await needed!
topic_results = process_prompts_batch(
config=topic_config,
provider="openai",
prompts=topic_prompts,
cache_dir="topics_cache"
)
# Stage 2: Generate detailed content for each topic
print("\n📖 Stage 2: Generating detailed content...")
content_config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.7,
max_completion_tokens=300,
system_instruction="Write detailed, informative content about the given topic."
)
# Extract topics and create content prompts
content_prompts = []
for prompt_id, response in topic_results.items():
if "error" not in response:
topics_text = response['response_text']
# Simple parsing - extract lines that look like topics
lines = [line.strip() for line in topics_text.split('\n') if line.strip()]
for line in lines[:3]: # Take first 3 topics per category
if line and not line.startswith("#"):
content_prompts.append(f"Write a detailed explanation about: {line}")
# Generate detailed content
content_results = process_prompts_batch(
config=content_config,
provider="openai",
prompts=content_prompts,
cache_dir="content_cache",
desc="Generating detailed content"
)
# Stage 3: Generate summaries
print("\n📋 Stage 3: Generating summaries...")
summary_config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.3,
max_completion_tokens=100,
system_instruction="Create concise summaries of the given content."
)
# Create summary prompts from successful content
summary_prompts = []
content_items = []
for response in content_results.values():
if "error" not in response and len(response['response_text']) > 50:
summary_prompts.append(f"Summarize this content in 2-3 sentences: {response['response_text']}")
content_items.append(response)
# Generate summaries
summary_results = process_prompts_batch(
config=summary_config,
provider="openai",
prompts=summary_prompts,
cache_dir="summary_cache",
desc="Generating summaries"
)
# Combine and display results
print("\n🎯 Pipeline Results:")
print("=" * 60)
summary_list = list(summary_results.values())
for i, (content, summary) in enumerate(zip(content_items, summary_list)):
if "error" not in summary:
word_count = len(content['response_text'].split())
print(f"\n📄 Content {i + 1} ({word_count} words):")
print(f"📝 Summary: {summary['response_text']}")
print(f"💾 Full content available in results")
print("-" * 40)
print(f"\n✅ Pipeline completed! Generated {len(content_items)} pieces of content.")
Error Handling Examples
Robust Error Handling
from llm_batch_helper import LLMConfig, process_prompts_batch
print("🛡️ Testing Robust Error Handling...")
# Create configuration with retry settings
config = LLMConfig(
model_name="gpt-4o-mini",
temperature=0.7,
max_completion_tokens=200,
max_retries=3 # Will retry failed requests
)
# Test prompts including some that might cause issues
test_prompts = [
"What is the capital of France?", # Valid prompt
"", # Empty prompt - might cause issues
"Explain quantum physics briefly.", # Valid prompt
"A" * 8000, # Very long prompt - might hit token limits
"What is 2+2?", # Valid prompt
{"id": "custom_test", "text": "Dictionary format test"} # Mixed format
]
print(f"📝 Processing {len(test_prompts)} test prompts...")
try:
# Process with error handling - no async/await needed!
results = process_prompts_batch(
config=config,
provider="openai",
prompts=test_prompts,
cache_dir="error_handling_cache",
desc="Error Handling Test"
)
# Analyze results and handle errors
print("\n📊 Results Analysis:")
print("=" * 50)
successful_responses = 0
failed_responses = 0
cached_responses = 0
for prompt_id, response in results.items():
# Check response status
if "error" in response:
print(f"❌ Error in {prompt_id}: {response['error'][:100]}...")
failed_responses += 1
else:
status_icon = "💾" if response.get("from_cache") else "✅"
char_count = len(response['response_text'])
print(f"{status_icon} Success {prompt_id}: {char_count} characters")
successful_responses += 1
if response.get("from_cache"):
cached_responses += 1
# Summary statistics
print(f"\n📈 Summary:")
print(f"✅ Successful: {successful_responses}")
print(f"❌ Failed: {failed_responses}")
print(f"💾 From cache: {cached_responses}")
print(f"📊 Success rate: {successful_responses/(successful_responses+failed_responses)*100:.1f}%")
except Exception as e:
print(f"💥 Unexpected error during processing: {e}")
print("This might indicate a configuration or API key issue.")
print("\n🎯 Error handling test completed!")