urllib.request.OpenerDirector
The OpenerDirector class is a sophisticated URL opener that manages a collection of handlers for different URL schemes and authentication methods. It provides a high-level interface for opening URLs with customizable behavior through handler chaining.
Quick Reference
Module: urllib.request
Type: Class
Purpose: Manage URL opening handlers and provide extensible URL opening functionality
Documentation: urllib.request.OpenerDirector
Constructor
urllib.request.OpenerDirector()
Creates a new OpenerDirector instance with no handlers initially installed.
Core Methods
Handler Management
add_handler(handler)
Add a handler to the opener.
import urllib.request
# Create opener and add handlers
opener = urllib.request.OpenerDirector()
opener.add_handler(urllib.request.HTTPHandler())
opener.add_handler(urllib.request.HTTPSHandler())
# Add authentication handler
auth_handler = urllib.request.HTTPBasicAuthHandler()
opener.add_handler(auth_handler)
remove_handler(handler)
Remove a handler from the opener.
# Remove specific handler
opener.remove_handler(auth_handler)
URL Opening
open(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT)
Open a URL using the configured handlers.
# Basic URL opening
response = opener.open('https://httpbin.org/get')
print(response.read().decode())
# POST request with data
data = b'key=value'
response = opener.open('https://httpbin.org/post', data=data)
# With timeout
response = opener.open('https://httpbin.org/delay/2', timeout=5)
error(proto, *args)
Handle errors by calling the appropriate error handler.
# Usually called internally by handlers
# opener.error('http', request, response, code, msg, hdrs)
Advanced Usage
Custom Opener with Authentication
import urllib.request
import urllib.parse
def create_authenticated_opener(username, password, realm=None, uri=None):
"""Create an opener with HTTP basic authentication."""
# Create password manager
password_mgr = urllib.request.HTTPPasswordMgr()
password_mgr.add_password(realm, uri, username, password)
# Create authentication handler
auth_handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
# Create opener with authentication
opener = urllib.request.OpenerDirector()
opener.add_handler(urllib.request.HTTPHandler())
opener.add_handler(urllib.request.HTTPSHandler())
opener.add_handler(auth_handler)
return opener
# Usage
opener = create_authenticated_opener('user', 'pass',
uri='https://httpbin.org')
response = opener.open('https://httpbin.org/basic-auth/user/pass')
print(response.read().decode())
Proxy Support
def create_proxy_opener(proxy_url):
"""Create opener with proxy support."""
proxy_handler = urllib.request.ProxyHandler({
'http': proxy_url,
'https': proxy_url
})
opener = urllib.request.OpenerDirector()
opener.add_handler(urllib.request.HTTPHandler())
opener.add_handler(urllib.request.HTTPSHandler())
opener.add_handler(proxy_handler)
return opener
# Usage with proxy
opener = create_proxy_opener('http://proxy.example.com:8080')
response = opener.open('https://httpbin.org/ip')
Cookie Handling
import http.cookiejar
def create_cookie_opener():
"""Create opener with cookie support."""
cookie_jar = http.cookiejar.CookieJar()
cookie_handler = urllib.request.HTTPCookieProcessor(cookie_jar)
opener = urllib.request.OpenerDirector()
opener.add_handler(urllib.request.HTTPHandler())
opener.add_handler(urllib.request.HTTPSHandler())
opener.add_handler(cookie_handler)
return opener, cookie_jar
# Usage
opener, cookies = create_cookie_opener()
# First request - sets cookies
response = opener.open('https://httpbin.org/cookies/set?session=abc123')
# Second request - sends cookies automatically
response = opener.open('https://httpbin.org/cookies')
print(response.read().decode())
# Access cookies
for cookie in cookies:
print(f"Cookie: {cookie.name}={cookie.value}")
Custom Error Handling
class CustomHTTPErrorHandler(urllib.request.HTTPErrorProcessor):
"""Custom error handler for specific HTTP status codes."""
def http_error_default(self, req, fp, code, msg, hdrs):
"""Handle all HTTP errors."""
if code == 404:
print(f"Page not found: {req.full_url}")
return None
elif code >= 500:
print(f"Server error {code}: {msg}")
return None
else:
# Let default handler process other errors
return urllib.request.HTTPErrorProcessor.http_error_default(
self, req, fp, code, msg, hdrs)
def create_custom_error_opener():
"""Create opener with custom error handling."""
opener = urllib.request.OpenerDirector()
opener.add_handler(urllib.request.HTTPHandler())
opener.add_handler(urllib.request.HTTPSHandler())
opener.add_handler(CustomHTTPErrorHandler())
return opener
# Usage
opener = create_custom_error_opener()
response = opener.open('https://httpbin.org/status/404') # Returns None
Handler Chain Architecture
Understanding Handler Priority
Handlers are organized by protocol and priority:
# View handler chain
opener = urllib.request.build_opener()
for handler in opener.handlers:
print(f"Handler: {handler.__class__.__name__}")
print(f" Protocol: {getattr(handler, 'protocol', 'N/A')}")
print(f" Priority: {getattr(handler, 'priority', 'N/A')}")
Common Handler Types
# Essential handlers for most use cases
essential_handlers = [
urllib.request.HTTPHandler(),
urllib.request.HTTPSHandler(),
urllib.request.HTTPRedirectHandler(),
urllib.request.HTTPErrorProcessor(),
]
# Security-focused handlers
security_handlers = [
urllib.request.HTTPSHandler(context=ssl_context),
urllib.request.HTTPBasicAuthHandler(password_mgr),
urllib.request.ProxyHandler(proxy_dict),
]
# Debugging handlers
debug_handlers = [
urllib.request.HTTPHandler(debuglevel=1),
urllib.request.HTTPSHandler(debuglevel=1),
]
Practical Use Cases
1. API Client with Authentication
class APIClient:
"""Simple API client using OpenerDirector."""
def __init__(self, base_url, api_key=None):
self.base_url = base_url.rstrip('/')
self.opener = urllib.request.OpenerDirector()
# Add basic handlers
self.opener.add_handler(urllib.request.HTTPHandler())
self.opener.add_handler(urllib.request.HTTPSHandler())
# Add API key handler if provided
if api_key:
self._add_api_key_handler(api_key)
def _add_api_key_handler(self, api_key):
"""Add custom handler for API key authentication."""
class APIKeyHandler(urllib.request.BaseHandler):
def http_request(self, request):
request.add_header('Authorization', f'Bearer {api_key}')
return request
https_request = http_request
self.opener.add_handler(APIKeyHandler())
def get(self, endpoint):
"""Make GET request to API endpoint."""
url = f"{self.base_url}/{endpoint.lstrip('/')}"
response = self.opener.open(url)
return response.read().decode()
def post(self, endpoint, data):
"""Make POST request to API endpoint."""
url = f"{self.base_url}/{endpoint.lstrip('/')}"
data_bytes = data.encode() if isinstance(data, str) else data
response = self.opener.open(url, data=data_bytes)
return response.read().decode()
# Usage
client = APIClient('https://api.example.com', api_key='secret-key')
result = client.get('/users')
2. Web Scraper with Session Management
import http.cookiejar
import time
class WebScraper:
"""Web scraper with session and rate limiting."""
def __init__(self, delay=1.0):
self.delay = delay
self.last_request = 0
# Setup cookie jar
self.cookies = http.cookiejar.CookieJar()
cookie_handler = urllib.request.HTTPCookieProcessor(self.cookies)
# Create opener with cookies
self.opener = urllib.request.OpenerDirector()
self.opener.add_handler(urllib.request.HTTPHandler())
self.opener.add_handler(urllib.request.HTTPSHandler())
self.opener.add_handler(cookie_handler)
# Add user agent
self._add_user_agent()
def _add_user_agent(self):
"""Add user agent header to all requests."""
class UserAgentHandler(urllib.request.BaseHandler):
def http_request(self, request):
request.add_header('User-Agent',
'Mozilla/5.0 (Python urllib scraper)')
return request
https_request = http_request
self.opener.add_handler(UserAgentHandler())
def _rate_limit(self):
"""Implement rate limiting."""
elapsed = time.time() - self.last_request
if elapsed < self.delay:
time.sleep(self.delay - elapsed)
self.last_request = time.time()
def fetch(self, url):
"""Fetch URL with rate limiting."""
self._rate_limit()
response = self.opener.open(url)
return response.read().decode()
def login(self, login_url, username, password):
"""Perform form-based login."""
# This would need form parsing in real implementation
login_data = urllib.parse.urlencode({
'username': username,
'password': password
}).encode()
response = self.opener.open(login_url, data=login_data)
return response.read().decode()
# Usage
scraper = WebScraper(delay=2.0)
content = scraper.fetch('https://example.com')
3. File Download Manager
import os
import urllib.parse
class DownloadManager:
"""File download manager with resume capability."""
def __init__(self):
self.opener = urllib.request.OpenerDirector()
self.opener.add_handler(urllib.request.HTTPHandler())
self.opener.add_handler(urllib.request.HTTPSHandler())
# Add range request handler
self._add_range_handler()
def _add_range_handler(self):
"""Add handler for range requests (resume downloads)."""
class RangeHandler(urllib.request.BaseHandler):
def __init__(self, start_byte=0):
self.start_byte = start_byte
def http_request(self, request):
if self.start_byte > 0:
request.add_header('Range', f'bytes={self.start_byte}-')
return request
https_request = http_request
self.range_handler = RangeHandler()
self.opener.add_handler(self.range_handler)
def download(self, url, filename, resume=True):
"""Download file with optional resume."""
# Check if file exists for resume
start_byte = 0
if resume and os.path.exists(filename):
start_byte = os.path.getsize(filename)
self.range_handler.start_byte = start_byte
# Open file for writing
mode = 'ab' if resume and start_byte > 0 else 'wb'
try:
response = self.opener.open(url)
# Get file size
content_length = response.headers.get('Content-Length')
if content_length:
total_size = int(content_length)
if resume:
total_size += start_byte
# Download with progress
with open(filename, mode) as f:
downloaded = start_byte
while True:
chunk = response.read(8192)
if not chunk:
break
f.write(chunk)
downloaded += len(chunk)
# Show progress
if content_length:
progress = (downloaded / total_size) * 100
print(f"\rProgress: {progress:.1f}%", end='')
print(f"\nDownload completed: {filename}")
except urllib.error.HTTPError as e:
if e.code == 416: # Range not satisfiable
print("File already completely downloaded")
else:
raise
# Usage
downloader = DownloadManager()
downloader.download('https://example.com/largefile.zip', 'largefile.zip')
Performance Considerations
Connection Pooling
# OpenerDirector doesn't provide built-in connection pooling
# For high-performance applications, consider using requests library
# or implementing custom connection management
class PooledOpener:
"""Wrapper to simulate connection reuse."""
def __init__(self, max_connections=10):
self.opener = urllib.request.build_opener()
self.max_connections = max_connections
# Note: urllib doesn't support true connection pooling
# This is a simplified example
def open(self, url, **kwargs):
return self.opener.open(url, **kwargs)
Memory Usage
# For large files, use streaming
def stream_download(opener, url, chunk_size=8192):
"""Stream download to avoid memory issues."""
response = opener.open(url)
while True:
chunk = response.read(chunk_size)
if not chunk:
break
yield chunk
Debugging and Troubleshooting
Enable Debug Output
# Enable debugging for HTTP handlers
opener = urllib.request.OpenerDirector()
opener.add_handler(urllib.request.HTTPHandler(debuglevel=1))
opener.add_handler(urllib.request.HTTPSHandler(debuglevel=1))
# This will print detailed HTTP communication
response = opener.open('https://httpbin.org/get')
Inspect Handler Chain
def inspect_opener(opener):
"""Inspect opener configuration."""
print("Handler Chain:")
for i, handler in enumerate(opener.handlers):
print(f" {i}: {handler.__class__.__name__}")
if hasattr(handler, 'protocol'):
print(f" Protocol: {handler.protocol}")
if hasattr(handler, 'priority'):
print(f" Priority: {handler.priority}")
inspect_opener(urllib.request.build_opener())
Related Components
- urllib.request.urlopen - High-level interface using default opener
- urllib.request.Request - Request objects used with OpenerDirector
- urllib.error - Exception classes for error handling
- urllib.parse - URL parsing utilities
Additional Resources
- urllib.request documentation
- urllib.request.OpenerDirector API
- HTTP authentication with urllib
- Proxy support in urllib
The OpenerDirector class provides the foundation for creating sophisticated HTTP clients with custom behavior, authentication, and error handling. It's particularly useful when you need more control than the basic urlopen() function provides.