urllib.error Module
The urllib.error module defines the exception classes raised by urllib.request for URL-related errors. Understanding these exceptions is crucial for robust error handling in HTTP requests and URL operations.
Exception Hierarchy
Exception
└── OSError
└── URLError
└── HTTPError
Exception Classes Overview
| Exception | Base Class | Description | When Raised |
|---|---|---|---|
URLError | OSError | General URL-related errors | Network issues, DNS failures, connection problems |
HTTPError | URLError | HTTP-specific errors | HTTP status codes 400-599 |
ContentTooShortError | URLError | Incomplete downloads | Download interrupted or corrupted |
URLError Class
The base exception for URL-related errors. Raised when there are problems with the URL request that are not HTTP-specific.
URLError Attributes
| Attribute | Type | Description | Example |
|---|---|---|---|
reason | str or Exception | Error description or underlying exception | 'Name or service not known' |
Common URLError Scenarios
import urllib.request
import urllib.error
def demonstrate_url_errors():
url_error_cases = [
'http://nonexistent-domain-12345.com', # DNS failure
'http://192.168.1.999', # Invalid IP
'http://localhost:99999', # Connection refused
'ftp://unreachable-ftp.com', # Protocol/network issue
]
for url in url_error_cases:
try:
response = urllib.request.urlopen(url, timeout=5)
print(f"✓ Success: {url}")
except urllib.error.URLError as e:
print(f"✗ URLError for {url}")
print(f" Reason: {e.reason}")
print(f" Reason type: {type(e.reason)}")
except Exception as e:
print(f"✗ Other error for {url}: {e}")
print()
# demonstrate_url_errors()
HTTPError Class
Specialized exception for HTTP protocol errors. Inherits from URLError and provides additional HTTP-specific information.
HTTPError Attributes
| Attribute | Type | Description | Example |
|---|---|---|---|
code | int | HTTP status code | 404, 500, 403 |
reason | str | HTTP reason phrase | 'Not Found', 'Internal Server Error' |
headers | HTTPMessage | Response headers | Access via e.headers['Content-Type'] |
url | str | URL that caused the error | Original request URL |
HTTPError Methods
| Method | Description | Return Type | Example |
|---|---|---|---|
read() | Read error response body | bytes | error_body = e.read() |
getcode() | Get HTTP status code | int | status = e.getcode() |
geturl() | Get final URL (after redirects) | str | final_url = e.geturl() |
info() | Get response headers | HTTPMessage | headers = e.info() |
HTTP Status Code Categories
import urllib.request
import urllib.error
def categorize_http_error(error_code):
"""Categorize HTTP error codes."""
categories = {
range(400, 500): "Client Error",
range(500, 600): "Server Error"
}
for code_range, category in categories.items():
if error_code in code_range:
return category
return "Unknown Error"
def demonstrate_http_errors():
# URLs that return different HTTP errors
test_urls = [
'https://httpbin.org/status/400', # Bad Request
'https://httpbin.org/status/401', # Unauthorized
'https://httpbin.org/status/403', # Forbidden
'https://httpbin.org/status/404', # Not Found
'https://httpbin.org/status/429', # Too Many Requests
'https://httpbin.org/status/500', # Internal Server Error
'https://httpbin.org/status/502', # Bad Gateway
'https://httpbin.org/status/503', # Service Unavailable
]
for url in test_urls:
try:
response = urllib.request.urlopen(url)
print(f"✓ Unexpected success: {url}")
except urllib.error.HTTPError as e:
print(f"✗ HTTP {e.code}: {e.reason}")
print(f" URL: {e.url}")
print(f" Category: {categorize_http_error(e.code)}")
print(f" Headers: {dict(e.headers)}")
# Read error response body if available
try:
error_body = e.read().decode('utf-8')
if error_body:
print(f" Body: {error_body[:100]}...")
except:
print(" Body: Unable to read")
except urllib.error.URLError as e:
print(f"✗ URL Error: {e.reason}")
print()
# demonstrate_http_errors()
ContentTooShortError Class
Raised when a download is interrupted or the content is shorter than expected.
ContentTooShortError Attributes
| Attribute | Type | Description |
|---|---|---|
content | bytes | Partial content that was downloaded |
reason | str | Error description |
Handling Download Interruptions
import urllib.request
import urllib.error
def safe_download(url, expected_size=None):
"""Download with interruption handling."""
try:
response = urllib.request.urlopen(url)
content = response.read()
if expected_size and len(content) < expected_size:
raise urllib.error.ContentTooShortError(
f"Downloaded {len(content)} bytes, expected {expected_size}",
content
)
return content
except urllib.error.ContentTooShortError as e:
print(f"Download incomplete: {e.reason}")
print(f"Partial content size: {len(e.content)} bytes")
return e.content # Return partial content
except urllib.error.HTTPError as e:
print(f"HTTP Error {e.code}: {e.reason}")
return None
except urllib.error.URLError as e:
print(f"URL Error: {e.reason}")
return None
# Usage
# content = safe_download('https://httpbin.org/bytes/1000', expected_size=1000)
Primary Use Cases
1. Robust HTTP Client with Error Handling
import urllib.request
import urllib.error
import json
import time
import logging
class RobustHTTPClient:
def __init__(self, max_retries=3, timeout=30):
self.max_retries = max_retries
self.timeout = timeout
self.logger = logging.getLogger(__name__)
def make_request(self, url, method='GET', data=None, headers=None, retry_on_errors=None):
"""Make HTTP request with comprehensive error handling."""
if retry_on_errors is None:
retry_on_errors = [500, 502, 503, 504] # Server errors worth retrying
if headers is None:
headers = {}
for attempt in range(self.max_retries + 1):
try:
# Create request
req = urllib.request.Request(url, data=data, headers=headers, method=method)
# Make request
with urllib.request.urlopen(req, timeout=self.timeout) as response:
return {
'success': True,
'status_code': response.getcode(),
'headers': dict(response.headers),
'data': response.read(),
'url': response.geturl()
}
except urllib.error.HTTPError as e:
error_info = {
'success': False,
'error_type': 'HTTPError',
'status_code': e.code,
'reason': e.reason,
'url': e.url,
'headers': dict(e.headers),
'attempt': attempt + 1
}
# Read error response body
try:
error_info['error_body'] = e.read().decode('utf-8')
except:
error_info['error_body'] = None
# Decide whether to retry
if e.code in retry_on_errors and attempt < self.max_retries:
wait_time = 2 ** attempt # Exponential backoff
self.logger.warning(f"HTTP {e.code} error, retrying in {wait_time}s...")
time.sleep(wait_time)
continue
else:
self.logger.error(f"HTTP {e.code} error, no more retries")
return error_info
except urllib.error.URLError as e:
error_info = {
'success': False,
'error_type': 'URLError',
'reason': str(e.reason),
'attempt': attempt + 1
}
# Retry on certain network errors
if attempt < self.max_retries and self._should_retry_url_error(e):
wait_time = 2 ** attempt
self.logger.warning(f"Network error, retrying in {wait_time}s...")
time.sleep(wait_time)
continue
else:
self.logger.error(f"Network error, no more retries: {e.reason}")
return error_info
except Exception as e:
error_info = {
'success': False,
'error_type': type(e).__name__,
'reason': str(e),
'attempt': attempt + 1
}
if attempt < self.max_retries:
wait_time = 2 ** attempt
self.logger.warning(f"Unexpected error, retrying in {wait_time}s...")
time.sleep(wait_time)
continue
else:
self.logger.error(f"Unexpected error, no more retries: {e}")
return error_info
return {'success': False, 'error_type': 'MaxRetriesExceeded'}
def _should_retry_url_error(self, error):
"""Determine if URL error is worth retrying."""
reason = str(error.reason).lower()
retry_reasons = [
'timeout', 'connection reset', 'connection refused',
'temporary failure', 'network unreachable'
]
return any(retry_reason in reason for retry_reason in retry_reasons)
# Usage
client = RobustHTTPClient(max_retries=3, timeout=10)
# Test with various scenarios
test_requests = [
('https://httpbin.org/get', 'GET'),
('https://httpbin.org/status/500', 'GET'), # Server error - will retry
('https://httpbin.org/status/404', 'GET'), # Client error - won't retry
('https://nonexistent-domain.com', 'GET'), # Network error
]
for url, method in test_requests:
result = client.make_request(url, method=method)
if result['success']:
print(f"✓ {method} {url}: HTTP {result['status_code']}")
else:
print(f"✗ {method} {url}: {result['error_type']} - {result.get('reason', 'Unknown')}")
2. API Response Validator
import urllib.request
import urllib.error
import json
class APIResponseValidator:
def __init__(self):
self.error_handlers = {
400: self._handle_bad_request,
401: self._handle_unauthorized,
403: self._handle_forbidden,
404: self._handle_not_found,
429: self._handle_rate_limit,
500: self._handle_server_error,
}
def validate_api_response(self, url, expected_fields=None):
"""Validate API response with detailed error analysis."""
try:
with urllib.request.urlopen(url) as response:
data = json.loads(response.read().decode('utf-8'))
# Validate expected fields
if expected_fields:
missing_fields = set(expected_fields) - set(data.keys())
if missing_fields:
return {
'valid': False,
'error': 'Missing required fields',
'missing_fields': list(missing_fields),
'data': data
}
return {
'valid': True,
'status_code': response.getcode(),
'data': data,
'headers': dict(response.headers)
}
except urllib.error.HTTPError as e:
# Use specific handler for status code
handler = self.error_handlers.get(e.code, self._handle_generic_http_error)
return handler(e)
except urllib.error.URLError as e:
return {
'valid': False,
'error_type': 'NetworkError',
'message': f"Network error: {e.reason}",
'troubleshooting': [
'Check internet connection',
'Verify API endpoint URL',
'Check DNS resolution'
]
}
except json.JSONDecodeError as e:
return {
'valid': False,
'error_type': 'InvalidJSON',
'message': f"Invalid JSON response: {e}",
'troubleshooting': [
'API may be returning HTML error page',
'Check API documentation for response format',
'Verify Content-Type header'
]
}
def _handle_bad_request(self, error):
"""Handle 400 Bad Request errors."""
try:
error_body = json.loads(error.read().decode('utf-8'))
return {
'valid': False,
'error_type': 'BadRequest',
'status_code': 400,
'message': 'Invalid request parameters',
'details': error_body,
'troubleshooting': [
'Check request parameters',
'Verify parameter types and formats',
'Review API documentation'
]
}
except:
return self._handle_generic_http_error(error)
def _handle_unauthorized(self, error):
"""Handle 401 Unauthorized errors."""
return {
'valid': False,
'error_type': 'Unauthorized',
'status_code': 401,
'message': 'Authentication required or invalid',
'troubleshooting': [
'Check API key or token',
'Verify authentication header format',
'Check if credentials have expired'
]
}
def _handle_forbidden(self, error):
"""Handle 403 Forbidden errors."""
return {
'valid': False,
'error_type': 'Forbidden',
'status_code': 403,
'message': 'Access denied - insufficient permissions',
'troubleshooting': [
'Check user permissions',
'Verify API key has required scopes',
'Contact API provider for access'
]
}
def _handle_not_found(self, error):
"""Handle 404 Not Found errors."""
return {
'valid': False,
'error_type': 'NotFound',
'status_code': 404,
'message': 'Resource not found',
'troubleshooting': [
'Check URL endpoint',
'Verify resource ID',
'Check API version'
]
}
def _handle_rate_limit(self, error):
"""Handle 429 Too Many Requests errors."""
retry_after = error.headers.get('Retry-After')
return {
'valid': False,
'error_type': 'RateLimited',
'status_code': 429,
'message': 'Rate limit exceeded',
'retry_after': retry_after,
'troubleshooting': [
f'Wait {retry_after} seconds before retrying' if retry_after else 'Implement exponential backoff',
'Reduce request frequency',
'Consider API rate limit policies'
]
}
def _handle_server_error(self, error):
"""Handle 500 Internal Server Error."""
return {
'valid': False,
'error_type': 'ServerError',
'status_code': 500,
'message': 'Server internal error',
'troubleshooting': [
'Retry the request after a delay',
'Check API status page',
'Contact API support if persistent'
]
}
def _handle_generic_http_error(self, error):
"""Handle other HTTP errors."""
try:
error_body = error.read().decode('utf-8')
except:
error_body = None
return {
'valid': False,
'error_type': 'HTTPError',
'status_code': error.code,
'reason': error.reason,
'message': f"HTTP {error.code}: {error.reason}",
'error_body': error_body,
'headers': dict(error.headers)
}
# Usage
validator = APIResponseValidator()
# Test different API scenarios
test_apis = [
('https://jsonplaceholder.typicode.com/posts/1', ['userId', 'id', 'title', 'body']),
('https://httpbin.org/status/404', None),
('https://httpbin.org/status/429', None),
]
for url, expected_fields in test_apis:
result = validator.validate_api_response(url, expected_fields)
if result['valid']:
print(f"✓ Valid API response from {url}")
else:
print(f"✗ API validation failed: {result['message']}")
if 'troubleshooting' in result:
print(" Troubleshooting steps:")
for step in result['troubleshooting']:
print(f" - {step}")
print()
3. Error Recovery and Fallback System
import urllib.request
import urllib.error
import json
from typing import List, Dict, Any, Optional
class ErrorRecoverySystem:
def __init__(self):
self.fallback_strategies = {
'primary': self._try_primary_source,
'cache': self._try_cache,
'fallback_api': self._try_fallback_api,
'default': self._use_default_data
}
self.cache = {}
def fetch_with_recovery(self, primary_url: str, fallback_urls: List[str] = None,
cache_key: str = None) -> Dict[str, Any]:
"""Fetch data with multiple fallback strategies."""
strategies = ['primary']
if cache_key and cache_key in self.cache:
strategies.append('cache')
if fallback_urls:
strategies.append('fallback_api')
strategies.append('default')
last_error = None
for strategy in strategies:
try:
result = self.fallback_strategies[strategy](
primary_url, fallback_urls, cache_key
)
if result['success']:
# Cache successful results
if cache_key:
self.cache[cache_key] = result['data']
return result
except Exception as e:
last_error = e
continue
return {
'success': False,
'error': 'All recovery strategies failed',
'last_error': str(last_error),
'strategies_tried': strategies
}
def _try_primary_source(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Try the primary data source."""
try:
with urllib.request.urlopen(primary_url, timeout=10) as response:
data = json.loads(response.read().decode('utf-8'))
return {
'success': True,
'data': data,
'source': 'primary',
'status_code': response.getcode()
}
except urllib.error.HTTPError as e:
if e.code in [429, 503]: # Rate limit or service unavailable
raise Exception(f"Primary source temporarily unavailable: {e.code}")
else:
raise Exception(f"Primary source error: {e.code} {e.reason}")
except urllib.error.URLError as e:
raise Exception(f"Primary source network error: {e.reason}")
def _try_cache(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Use cached data if available."""
if cache_key and cache_key in self.cache:
return {
'success': True,
'data': self.cache[cache_key],
'source': 'cache',
'warning': 'Using cached data due to primary source failure'
}
else:
raise Exception("No cached data available")
def _try_fallback_api(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Try fallback API endpoints."""
if not fallback_urls:
raise Exception("No fallback URLs provided")
for i, fallback_url in enumerate(fallback_urls):
try:
with urllib.request.urlopen(fallback_url, timeout=10) as response:
data = json.loads(response.read().decode('utf-8'))
return {
'success': True,
'data': data,
'source': f'fallback_{i+1}',
'status_code': response.getcode()
}
except (urllib.error.HTTPError, urllib.error.URLError) as e:
if i == len(fallback_urls) - 1: # Last fallback failed
raise Exception(f"All fallback APIs failed. Last error: {e}")
continue
def _use_default_data(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Return default data as last resort."""
default_data = {
'message': 'Default data - all sources unavailable',
'timestamp': '2024-01-01T00:00:00Z',
'status': 'degraded'
}
return {
'success': True,
'data': default_data,
'source': 'default',
'warning': 'All data sources failed, using default data'
}
# Usage example
recovery_system = ErrorRecoverySystem()
# Test with different failure scenarios
test_scenarios = [
{
'name': 'Working API',
'primary': 'https://jsonplaceholder.typicode.com/users/1',
'fallbacks': ['https://jsonplaceholder.typicode.com/users/2'],
'cache_key': 'user_data'
},
{
'name': 'Primary fails, fallback works',
'primary': 'https://httpbin.org/status/500',
'fallbacks': ['https://jsonplaceholder.typicode.com/users/1'],
'cache_key': 'backup_user'
},
{
'name': 'All APIs fail, use cache',
'primary': 'https://httpbin.org/status/500',
'fallbacks': ['https://httpbin.org/status/503'],
'cache_key': 'user_data' # Should use cached data from first test
},
{
'name': 'Everything fails, use default',
'primary': 'https://nonexistent-api.com',
'fallbacks': ['https://another-fake-api.com'],
'cache_key': 'no_cache'
}
]
for scenario in test_scenarios:
print(f"\nTesting: {scenario['name']}")
result = recovery_system.fetch_with_recovery(
scenario['primary'],
scenario['fallbacks'],
scenario['cache_key']
)
if result['success']:
print(f"✓ Success from {result['source']}")
if 'warning' in result:
print(f" Warning: {result['warning']}")
else:
print(f"✗ Failed: {result['error']}")
Error Handling Best Practices
Exception Hierarchy Handling
import urllib.request
import urllib.error
def handle_urllib_errors_properly(url):
"""Demonstrate proper exception handling order."""
try:
response = urllib.request.urlopen(url)
return response.read()
except urllib.error.HTTPError as e:
# Handle HTTP errors first (more specific)
print(f"HTTP Error {e.code}: {e.reason}")
# Can still read error response
try:
error_content = e.read().decode('utf-8')
print(f"Error response: {error_content}")
except:
pass
return None
except urllib.error.URLError as e:
# Handle general URL errors (less specific)
print(f"URL Error: {e.reason}")
return None
except Exception as e:
# Handle any other unexpected errors
print(f"Unexpected error: {e}")
return None
# Wrong order - HTTPError won't be caught specifically
def wrong_exception_order(url):
"""This is WRONG - HTTPError inherits from URLError."""
try:
response = urllib.request.urlopen(url)
return response.read()
except urllib.error.URLError as e: # This catches HTTPError too!
print(f"URL Error: {e.reason}")
return None
except urllib.error.HTTPError as e: # This will never be reached
print(f"HTTP Error: {e.code}")
return None
Comprehensive Error Information Extraction
import urllib.request
import urllib.error
def extract_comprehensive_error_info(url):
"""Extract all available error information."""
try:
response = urllib.request.urlopen(url)
return {'success': True, 'data': response.read()}
except urllib.error.HTTPError as e:
error_info = {
'success': False,
'error_type': 'HTTPError',
'status_code': e.code,
'reason': e.reason,
'url': e.url,
'headers': dict(e.headers),
}
# Try to read error response body
try:
error_info['response_body'] = e.read().decode('utf-8')
except Exception:
error_info['response_body'] = None
return error_info
except urllib.error.URLError as e:
return {
'success': False,
'error_type': 'URLError',
'reason': str(e.reason),
'reason_type': type(e.reason).__name__
}
except Exception as e:
return {
'success': False,
'error_type': type(e).__name__,
'message': str(e)
}
# Test comprehensive error extraction
test_url = 'https://httpbin.org/status/404'
result = extract_comprehensive_error_info(test_url)
print(json.dumps(result, indent=2))
When to Use urllib.error
Ideal Use Cases
- Robust HTTP client error handling
- API response validation and recovery
- Network operation monitoring
- Automated retry logic
- Error logging and diagnostics
- Building resilient web scrapers
When NOT to Use urllib.error
- Simple requests without error handling → Use
requestswith default behavior - Complex authentication flows → Use specialized libraries
- Async operations → Use
aiohttpexceptions
Related Modules
- urllib.request - HTTP requests that raise these exceptions
- http.client - Lower-level HTTP operations
- socket - Network socket errors