Skip to main content

urllib.error Module

The urllib.error module defines the exception classes raised by urllib.request for URL-related errors. Understanding these exceptions is crucial for robust error handling in HTTP requests and URL operations.

Exception Hierarchy

Exception
└── OSError
└── URLError
└── HTTPError

Exception Classes Overview

ExceptionBase ClassDescriptionWhen Raised
URLErrorOSErrorGeneral URL-related errorsNetwork issues, DNS failures, connection problems
HTTPErrorURLErrorHTTP-specific errorsHTTP status codes 400-599
ContentTooShortErrorURLErrorIncomplete downloadsDownload interrupted or corrupted

URLError Class

The base exception for URL-related errors. Raised when there are problems with the URL request that are not HTTP-specific.

URLError Attributes

AttributeTypeDescriptionExample
reasonstr or ExceptionError description or underlying exception'Name or service not known'

Common URLError Scenarios

import urllib.request
import urllib.error

def demonstrate_url_errors():
url_error_cases = [
'http://nonexistent-domain-12345.com', # DNS failure
'http://192.168.1.999', # Invalid IP
'http://localhost:99999', # Connection refused
'ftp://unreachable-ftp.com', # Protocol/network issue
]

for url in url_error_cases:
try:
response = urllib.request.urlopen(url, timeout=5)
print(f"✓ Success: {url}")
except urllib.error.URLError as e:
print(f"✗ URLError for {url}")
print(f" Reason: {e.reason}")
print(f" Reason type: {type(e.reason)}")
except Exception as e:
print(f"✗ Other error for {url}: {e}")
print()

# demonstrate_url_errors()

HTTPError Class

Specialized exception for HTTP protocol errors. Inherits from URLError and provides additional HTTP-specific information.

HTTPError Attributes

AttributeTypeDescriptionExample
codeintHTTP status code404, 500, 403
reasonstrHTTP reason phrase'Not Found', 'Internal Server Error'
headersHTTPMessageResponse headersAccess via e.headers['Content-Type']
urlstrURL that caused the errorOriginal request URL

HTTPError Methods

MethodDescriptionReturn TypeExample
read()Read error response bodybyteserror_body = e.read()
getcode()Get HTTP status codeintstatus = e.getcode()
geturl()Get final URL (after redirects)strfinal_url = e.geturl()
info()Get response headersHTTPMessageheaders = e.info()

HTTP Status Code Categories

import urllib.request
import urllib.error

def categorize_http_error(error_code):
"""Categorize HTTP error codes."""
categories = {
range(400, 500): "Client Error",
range(500, 600): "Server Error"
}

for code_range, category in categories.items():
if error_code in code_range:
return category
return "Unknown Error"

def demonstrate_http_errors():
# URLs that return different HTTP errors
test_urls = [
'https://httpbin.org/status/400', # Bad Request
'https://httpbin.org/status/401', # Unauthorized
'https://httpbin.org/status/403', # Forbidden
'https://httpbin.org/status/404', # Not Found
'https://httpbin.org/status/429', # Too Many Requests
'https://httpbin.org/status/500', # Internal Server Error
'https://httpbin.org/status/502', # Bad Gateway
'https://httpbin.org/status/503', # Service Unavailable
]

for url in test_urls:
try:
response = urllib.request.urlopen(url)
print(f"✓ Unexpected success: {url}")
except urllib.error.HTTPError as e:
print(f"✗ HTTP {e.code}: {e.reason}")
print(f" URL: {e.url}")
print(f" Category: {categorize_http_error(e.code)}")
print(f" Headers: {dict(e.headers)}")

# Read error response body if available
try:
error_body = e.read().decode('utf-8')
if error_body:
print(f" Body: {error_body[:100]}...")
except:
print(" Body: Unable to read")
except urllib.error.URLError as e:
print(f"✗ URL Error: {e.reason}")
print()

# demonstrate_http_errors()

ContentTooShortError Class

Raised when a download is interrupted or the content is shorter than expected.

ContentTooShortError Attributes

AttributeTypeDescription
contentbytesPartial content that was downloaded
reasonstrError description

Handling Download Interruptions

import urllib.request
import urllib.error

def safe_download(url, expected_size=None):
"""Download with interruption handling."""
try:
response = urllib.request.urlopen(url)
content = response.read()

if expected_size and len(content) < expected_size:
raise urllib.error.ContentTooShortError(
f"Downloaded {len(content)} bytes, expected {expected_size}",
content
)

return content

except urllib.error.ContentTooShortError as e:
print(f"Download incomplete: {e.reason}")
print(f"Partial content size: {len(e.content)} bytes")
return e.content # Return partial content

except urllib.error.HTTPError as e:
print(f"HTTP Error {e.code}: {e.reason}")
return None

except urllib.error.URLError as e:
print(f"URL Error: {e.reason}")
return None

# Usage
# content = safe_download('https://httpbin.org/bytes/1000', expected_size=1000)

Primary Use Cases

1. Robust HTTP Client with Error Handling

import urllib.request
import urllib.error
import json
import time
import logging

class RobustHTTPClient:
def __init__(self, max_retries=3, timeout=30):
self.max_retries = max_retries
self.timeout = timeout
self.logger = logging.getLogger(__name__)

def make_request(self, url, method='GET', data=None, headers=None, retry_on_errors=None):
"""Make HTTP request with comprehensive error handling."""
if retry_on_errors is None:
retry_on_errors = [500, 502, 503, 504] # Server errors worth retrying

if headers is None:
headers = {}

for attempt in range(self.max_retries + 1):
try:
# Create request
req = urllib.request.Request(url, data=data, headers=headers, method=method)

# Make request
with urllib.request.urlopen(req, timeout=self.timeout) as response:
return {
'success': True,
'status_code': response.getcode(),
'headers': dict(response.headers),
'data': response.read(),
'url': response.geturl()
}

except urllib.error.HTTPError as e:
error_info = {
'success': False,
'error_type': 'HTTPError',
'status_code': e.code,
'reason': e.reason,
'url': e.url,
'headers': dict(e.headers),
'attempt': attempt + 1
}

# Read error response body
try:
error_info['error_body'] = e.read().decode('utf-8')
except:
error_info['error_body'] = None

# Decide whether to retry
if e.code in retry_on_errors and attempt < self.max_retries:
wait_time = 2 ** attempt # Exponential backoff
self.logger.warning(f"HTTP {e.code} error, retrying in {wait_time}s...")
time.sleep(wait_time)
continue
else:
self.logger.error(f"HTTP {e.code} error, no more retries")
return error_info

except urllib.error.URLError as e:
error_info = {
'success': False,
'error_type': 'URLError',
'reason': str(e.reason),
'attempt': attempt + 1
}

# Retry on certain network errors
if attempt < self.max_retries and self._should_retry_url_error(e):
wait_time = 2 ** attempt
self.logger.warning(f"Network error, retrying in {wait_time}s...")
time.sleep(wait_time)
continue
else:
self.logger.error(f"Network error, no more retries: {e.reason}")
return error_info

except Exception as e:
error_info = {
'success': False,
'error_type': type(e).__name__,
'reason': str(e),
'attempt': attempt + 1
}

if attempt < self.max_retries:
wait_time = 2 ** attempt
self.logger.warning(f"Unexpected error, retrying in {wait_time}s...")
time.sleep(wait_time)
continue
else:
self.logger.error(f"Unexpected error, no more retries: {e}")
return error_info

return {'success': False, 'error_type': 'MaxRetriesExceeded'}

def _should_retry_url_error(self, error):
"""Determine if URL error is worth retrying."""
reason = str(error.reason).lower()
retry_reasons = [
'timeout', 'connection reset', 'connection refused',
'temporary failure', 'network unreachable'
]
return any(retry_reason in reason for retry_reason in retry_reasons)

# Usage
client = RobustHTTPClient(max_retries=3, timeout=10)

# Test with various scenarios
test_requests = [
('https://httpbin.org/get', 'GET'),
('https://httpbin.org/status/500', 'GET'), # Server error - will retry
('https://httpbin.org/status/404', 'GET'), # Client error - won't retry
('https://nonexistent-domain.com', 'GET'), # Network error
]

for url, method in test_requests:
result = client.make_request(url, method=method)
if result['success']:
print(f"✓ {method} {url}: HTTP {result['status_code']}")
else:
print(f"✗ {method} {url}: {result['error_type']} - {result.get('reason', 'Unknown')}")

2. API Response Validator

import urllib.request
import urllib.error
import json

class APIResponseValidator:
def __init__(self):
self.error_handlers = {
400: self._handle_bad_request,
401: self._handle_unauthorized,
403: self._handle_forbidden,
404: self._handle_not_found,
429: self._handle_rate_limit,
500: self._handle_server_error,
}

def validate_api_response(self, url, expected_fields=None):
"""Validate API response with detailed error analysis."""
try:
with urllib.request.urlopen(url) as response:
data = json.loads(response.read().decode('utf-8'))

# Validate expected fields
if expected_fields:
missing_fields = set(expected_fields) - set(data.keys())
if missing_fields:
return {
'valid': False,
'error': 'Missing required fields',
'missing_fields': list(missing_fields),
'data': data
}

return {
'valid': True,
'status_code': response.getcode(),
'data': data,
'headers': dict(response.headers)
}

except urllib.error.HTTPError as e:
# Use specific handler for status code
handler = self.error_handlers.get(e.code, self._handle_generic_http_error)
return handler(e)

except urllib.error.URLError as e:
return {
'valid': False,
'error_type': 'NetworkError',
'message': f"Network error: {e.reason}",
'troubleshooting': [
'Check internet connection',
'Verify API endpoint URL',
'Check DNS resolution'
]
}

except json.JSONDecodeError as e:
return {
'valid': False,
'error_type': 'InvalidJSON',
'message': f"Invalid JSON response: {e}",
'troubleshooting': [
'API may be returning HTML error page',
'Check API documentation for response format',
'Verify Content-Type header'
]
}

def _handle_bad_request(self, error):
"""Handle 400 Bad Request errors."""
try:
error_body = json.loads(error.read().decode('utf-8'))
return {
'valid': False,
'error_type': 'BadRequest',
'status_code': 400,
'message': 'Invalid request parameters',
'details': error_body,
'troubleshooting': [
'Check request parameters',
'Verify parameter types and formats',
'Review API documentation'
]
}
except:
return self._handle_generic_http_error(error)

def _handle_unauthorized(self, error):
"""Handle 401 Unauthorized errors."""
return {
'valid': False,
'error_type': 'Unauthorized',
'status_code': 401,
'message': 'Authentication required or invalid',
'troubleshooting': [
'Check API key or token',
'Verify authentication header format',
'Check if credentials have expired'
]
}

def _handle_forbidden(self, error):
"""Handle 403 Forbidden errors."""
return {
'valid': False,
'error_type': 'Forbidden',
'status_code': 403,
'message': 'Access denied - insufficient permissions',
'troubleshooting': [
'Check user permissions',
'Verify API key has required scopes',
'Contact API provider for access'
]
}

def _handle_not_found(self, error):
"""Handle 404 Not Found errors."""
return {
'valid': False,
'error_type': 'NotFound',
'status_code': 404,
'message': 'Resource not found',
'troubleshooting': [
'Check URL endpoint',
'Verify resource ID',
'Check API version'
]
}

def _handle_rate_limit(self, error):
"""Handle 429 Too Many Requests errors."""
retry_after = error.headers.get('Retry-After')
return {
'valid': False,
'error_type': 'RateLimited',
'status_code': 429,
'message': 'Rate limit exceeded',
'retry_after': retry_after,
'troubleshooting': [
f'Wait {retry_after} seconds before retrying' if retry_after else 'Implement exponential backoff',
'Reduce request frequency',
'Consider API rate limit policies'
]
}

def _handle_server_error(self, error):
"""Handle 500 Internal Server Error."""
return {
'valid': False,
'error_type': 'ServerError',
'status_code': 500,
'message': 'Server internal error',
'troubleshooting': [
'Retry the request after a delay',
'Check API status page',
'Contact API support if persistent'
]
}

def _handle_generic_http_error(self, error):
"""Handle other HTTP errors."""
try:
error_body = error.read().decode('utf-8')
except:
error_body = None

return {
'valid': False,
'error_type': 'HTTPError',
'status_code': error.code,
'reason': error.reason,
'message': f"HTTP {error.code}: {error.reason}",
'error_body': error_body,
'headers': dict(error.headers)
}

# Usage
validator = APIResponseValidator()

# Test different API scenarios
test_apis = [
('https://jsonplaceholder.typicode.com/posts/1', ['userId', 'id', 'title', 'body']),
('https://httpbin.org/status/404', None),
('https://httpbin.org/status/429', None),
]

for url, expected_fields in test_apis:
result = validator.validate_api_response(url, expected_fields)
if result['valid']:
print(f"✓ Valid API response from {url}")
else:
print(f"✗ API validation failed: {result['message']}")
if 'troubleshooting' in result:
print(" Troubleshooting steps:")
for step in result['troubleshooting']:
print(f" - {step}")
print()

3. Error Recovery and Fallback System

import urllib.request
import urllib.error
import json
from typing import List, Dict, Any, Optional

class ErrorRecoverySystem:
def __init__(self):
self.fallback_strategies = {
'primary': self._try_primary_source,
'cache': self._try_cache,
'fallback_api': self._try_fallback_api,
'default': self._use_default_data
}
self.cache = {}

def fetch_with_recovery(self, primary_url: str, fallback_urls: List[str] = None,
cache_key: str = None) -> Dict[str, Any]:
"""Fetch data with multiple fallback strategies."""

strategies = ['primary']
if cache_key and cache_key in self.cache:
strategies.append('cache')
if fallback_urls:
strategies.append('fallback_api')
strategies.append('default')

last_error = None

for strategy in strategies:
try:
result = self.fallback_strategies[strategy](
primary_url, fallback_urls, cache_key
)
if result['success']:
# Cache successful results
if cache_key:
self.cache[cache_key] = result['data']
return result

except Exception as e:
last_error = e
continue

return {
'success': False,
'error': 'All recovery strategies failed',
'last_error': str(last_error),
'strategies_tried': strategies
}

def _try_primary_source(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Try the primary data source."""
try:
with urllib.request.urlopen(primary_url, timeout=10) as response:
data = json.loads(response.read().decode('utf-8'))
return {
'success': True,
'data': data,
'source': 'primary',
'status_code': response.getcode()
}

except urllib.error.HTTPError as e:
if e.code in [429, 503]: # Rate limit or service unavailable
raise Exception(f"Primary source temporarily unavailable: {e.code}")
else:
raise Exception(f"Primary source error: {e.code} {e.reason}")

except urllib.error.URLError as e:
raise Exception(f"Primary source network error: {e.reason}")

def _try_cache(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Use cached data if available."""
if cache_key and cache_key in self.cache:
return {
'success': True,
'data': self.cache[cache_key],
'source': 'cache',
'warning': 'Using cached data due to primary source failure'
}
else:
raise Exception("No cached data available")

def _try_fallback_api(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Try fallback API endpoints."""
if not fallback_urls:
raise Exception("No fallback URLs provided")

for i, fallback_url in enumerate(fallback_urls):
try:
with urllib.request.urlopen(fallback_url, timeout=10) as response:
data = json.loads(response.read().decode('utf-8'))
return {
'success': True,
'data': data,
'source': f'fallback_{i+1}',
'status_code': response.getcode()
}

except (urllib.error.HTTPError, urllib.error.URLError) as e:
if i == len(fallback_urls) - 1: # Last fallback failed
raise Exception(f"All fallback APIs failed. Last error: {e}")
continue

def _use_default_data(self, primary_url: str, fallback_urls: List[str],
cache_key: str) -> Dict[str, Any]:
"""Return default data as last resort."""
default_data = {
'message': 'Default data - all sources unavailable',
'timestamp': '2024-01-01T00:00:00Z',
'status': 'degraded'
}

return {
'success': True,
'data': default_data,
'source': 'default',
'warning': 'All data sources failed, using default data'
}

# Usage example
recovery_system = ErrorRecoverySystem()

# Test with different failure scenarios
test_scenarios = [
{
'name': 'Working API',
'primary': 'https://jsonplaceholder.typicode.com/users/1',
'fallbacks': ['https://jsonplaceholder.typicode.com/users/2'],
'cache_key': 'user_data'
},
{
'name': 'Primary fails, fallback works',
'primary': 'https://httpbin.org/status/500',
'fallbacks': ['https://jsonplaceholder.typicode.com/users/1'],
'cache_key': 'backup_user'
},
{
'name': 'All APIs fail, use cache',
'primary': 'https://httpbin.org/status/500',
'fallbacks': ['https://httpbin.org/status/503'],
'cache_key': 'user_data' # Should use cached data from first test
},
{
'name': 'Everything fails, use default',
'primary': 'https://nonexistent-api.com',
'fallbacks': ['https://another-fake-api.com'],
'cache_key': 'no_cache'
}
]

for scenario in test_scenarios:
print(f"\nTesting: {scenario['name']}")
result = recovery_system.fetch_with_recovery(
scenario['primary'],
scenario['fallbacks'],
scenario['cache_key']
)

if result['success']:
print(f"✓ Success from {result['source']}")
if 'warning' in result:
print(f" Warning: {result['warning']}")
else:
print(f"✗ Failed: {result['error']}")

Error Handling Best Practices

Exception Hierarchy Handling

import urllib.request
import urllib.error

def handle_urllib_errors_properly(url):
"""Demonstrate proper exception handling order."""
try:
response = urllib.request.urlopen(url)
return response.read()

except urllib.error.HTTPError as e:
# Handle HTTP errors first (more specific)
print(f"HTTP Error {e.code}: {e.reason}")

# Can still read error response
try:
error_content = e.read().decode('utf-8')
print(f"Error response: {error_content}")
except:
pass

return None

except urllib.error.URLError as e:
# Handle general URL errors (less specific)
print(f"URL Error: {e.reason}")
return None

except Exception as e:
# Handle any other unexpected errors
print(f"Unexpected error: {e}")
return None

# Wrong order - HTTPError won't be caught specifically
def wrong_exception_order(url):
"""This is WRONG - HTTPError inherits from URLError."""
try:
response = urllib.request.urlopen(url)
return response.read()
except urllib.error.URLError as e: # This catches HTTPError too!
print(f"URL Error: {e.reason}")
return None
except urllib.error.HTTPError as e: # This will never be reached
print(f"HTTP Error: {e.code}")
return None

Comprehensive Error Information Extraction

import urllib.request
import urllib.error

def extract_comprehensive_error_info(url):
"""Extract all available error information."""
try:
response = urllib.request.urlopen(url)
return {'success': True, 'data': response.read()}

except urllib.error.HTTPError as e:
error_info = {
'success': False,
'error_type': 'HTTPError',
'status_code': e.code,
'reason': e.reason,
'url': e.url,
'headers': dict(e.headers),
}

# Try to read error response body
try:
error_info['response_body'] = e.read().decode('utf-8')
except Exception:
error_info['response_body'] = None

return error_info

except urllib.error.URLError as e:
return {
'success': False,
'error_type': 'URLError',
'reason': str(e.reason),
'reason_type': type(e.reason).__name__
}

except Exception as e:
return {
'success': False,
'error_type': type(e).__name__,
'message': str(e)
}

# Test comprehensive error extraction
test_url = 'https://httpbin.org/status/404'
result = extract_comprehensive_error_info(test_url)
print(json.dumps(result, indent=2))

When to Use urllib.error

Ideal Use Cases

  • Robust HTTP client error handling
  • API response validation and recovery
  • Network operation monitoring
  • Automated retry logic
  • Error logging and diagnostics
  • Building resilient web scrapers

When NOT to Use urllib.error

  • Simple requests without error handling → Use requests with default behavior
  • Complex authentication flows → Use specialized libraries
  • Async operations → Use aiohttp exceptions
  • urllib.request - HTTP requests that raise these exceptions
  • http.client - Lower-level HTTP operations
  • socket - Network socket errors

Additional Learning Resources

Official Python Resources

Error Handling Best Practices