Skip to main content

Standard Library

File I/O and pathlib

Reading Files

# Read entire file
with open('file.txt', 'r') as f:
content = f.read()

# Read line by line
with open('file.txt', 'r') as f:
for line in f:
print(line.strip())

# Read all lines into list
with open('file.txt', 'r') as f:
lines = f.readlines()

# Read with encoding
with open('file.txt', 'r', encoding='utf-8') as f:
content = f.read()

# Read binary file
with open('image.jpg', 'rb') as f:
data = f.read()

# Read CSV-like data
with open('data.csv', 'r') as f:
for line in f:
fields = line.strip().split(',')
print(fields)

Writing Files

# Write to file (overwrite)
with open('output.txt', 'w') as f:
f.write("Hello, World!")

# Append to file
with open('output.txt', 'a') as f:
f.write("\nNew line")

# Write multiple lines
lines = ["Line 1", "Line 2", "Line 3"]
with open('output.txt', 'w') as f:
f.writelines(line + '\n' for line in lines)

# Write with encoding
with open('output.txt', 'w', encoding='utf-8') as f:
f.write("Hello, 世界!")

# Write binary file
with open('output.bin', 'wb') as f:
f.write(b'binary data')

# Write formatted data
data = [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}]
with open('output.txt', 'w') as f:
for item in data:
f.write(f"{item['name']},{item['age']}\n")

pathlib - Modern Path Operations

from pathlib import Path

# Create path objects
path = Path('folder/file.txt')
path = Path.home() / 'documents' / 'file.txt'

# Path properties
print(path.name) # 'file.txt'
print(path.stem) # 'file'
print(path.suffix) # '.txt'
print(path.parent) # Path('folder')
print(path.parts) # ('folder', 'file.txt')
print(path.anchor) # '/' (on Unix) or 'C:\' (on Windows)

# Path operations
path.exists() # Check if exists
path.is_file() # Check if file
path.is_dir() # Check if directory
path.is_symlink() # Check if symlink
path.stat() # Get file stats

# Directory operations
path.mkdir() # Create directory
path.mkdir(parents=True) # Create with parents
path.mkdir(exist_ok=True) # Don't fail if exists
path.rmdir() # Remove empty directory

# File operations
path.touch() # Create empty file
path.unlink() # Delete file
path.rename('new_name.txt') # Rename file
path.replace('new_name.txt') # Replace file

# Directory listing
for item in path.iterdir():
print(item)

# Glob patterns
list(path.glob('*.txt')) # Files matching pattern
list(path.rglob('*.py')) # Recursive glob

# Reading/writing with pathlib
path.read_text() # Read file content
path.read_bytes() # Read binary content
path.write_text('content') # Write text
path.write_bytes(b'data') # Write binary

# Resolve paths
path.resolve() # Get absolute path
path.expanduser() # Expand ~ in path

datetime and time

Basic datetime Operations

from datetime import datetime, date, time, timedelta

# Current date/time
now = datetime.now()
today = date.today()
current_time = datetime.now().time()

# Create specific dates
specific_date = datetime(2023, 12, 25, 15, 30, 0)
birthday = date(1990, 5, 15)
meeting_time = time(14, 30, 0)

# Date components
print(now.year) # 2023
print(now.month) # 12
print(now.day) # 25
print(now.hour) # 15
print(now.minute) # 30
print(now.second) # 0
print(now.weekday()) # 0=Monday, 6=Sunday
print(now.isoweekday()) # 1=Monday, 7=Sunday

Date Arithmetic

from datetime import timedelta

# Create timedelta
delta = timedelta(days=7, hours=2, minutes=30)
delta = timedelta(weeks=2)
delta = timedelta(seconds=3600)

# Date arithmetic
tomorrow = today + timedelta(days=1)
last_week = now - timedelta(weeks=1)
next_month = now + timedelta(days=30)

# Calculate differences
diff = datetime(2023, 12, 25) - datetime(2023, 1, 1)
print(diff.days) # Number of days
print(diff.seconds) # Seconds component
print(diff.total_seconds()) # Total seconds

# Practical examples
deadline = datetime(2023, 12, 31, 23, 59, 59)
time_left = deadline - now
print(f"Days left: {time_left.days}")

Date Formatting and Parsing

import datetime

# Formatting dates
now = datetime.datetime.now()
formatted = now.strftime("%Y-%m-%d %H:%M:%S") # 2023-12-25 15:30:00
formatted = now.strftime("%B %d, %Y") # December 25, 2023
formatted = now.strftime("%A, %b %d") # Monday, Dec 25

# Common format codes
# %Y - 4-digit year
# %y - 2-digit year
# %m - Month as number
# %B - Full month name
# %b - Abbreviated month name
# %d - Day of month
# %A - Full weekday name
# %a - Abbreviated weekday name
# %H - Hour (24-hour)
# %I - Hour (12-hour)
# %M - Minute
# %S - Second
# %p - AM/PM

# Parsing dates
date_string = "2023-12-25 15:30:00"
parsed = datetime.datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")

# ISO format
iso_string = now.isoformat() # 2023-12-25T15:30:00
parsed_iso = datetime.datetime.fromisoformat(iso_string)

# Common parsing patterns
patterns = [
("2023-12-25", "%Y-%m-%d"),
("25/12/2023", "%d/%m/%Y"),
("Dec 25, 2023", "%b %d, %Y"),
("Monday, December 25, 2023", "%A, %B %d, %Y")
]

for date_str, pattern in patterns:
parsed = datetime.datetime.strptime(date_str, pattern)
print(f"{date_str} -> {parsed}")

Time Module

import time

# Time operations
current_timestamp = time.time() # Unix timestamp
time.sleep(2) # Sleep for 2 seconds

# Time formatting
time_str = time.strftime("%Y-%m-%d %H:%M:%S")
local_time = time.localtime()
utc_time = time.gmtime()

# Performance measurement
start = time.time()
# ... some operation ...
end = time.time()
duration = end - start

# High-resolution timing
start = time.perf_counter()
# ... some operation ...
end = time.perf_counter()
duration = end - start

os and sys

os Module - Operating System Interface

import os

# Current directory
cwd = os.getcwd() # Get current working directory
os.chdir('/path/to/directory') # Change directory

# Directory operations
os.listdir('.') # List directory contents
os.makedirs('path/to/dir', exist_ok=True) # Create directories
os.removedirs('path/to/dir') # Remove directories

# File operations
os.remove('file.txt') # Delete file
os.rename('old.txt', 'new.txt') # Rename file
os.chmod('file.txt', 0o755) # Change permissions

# Path operations
os.path.exists('file.txt') # Check if exists
os.path.isfile('file.txt') # Check if file
os.path.isdir('directory') # Check if directory
os.path.getsize('file.txt') # Get file size
os.path.getmtime('file.txt') # Get modification time

# Path manipulation
os.path.join('folder', 'file.txt') # Join paths
os.path.split('/path/to/file.txt') # Split path
os.path.dirname('/path/to/file.txt') # Get directory
os.path.basename('/path/to/file.txt') # Get filename
os.path.splitext('file.txt') # Split extension

# Walking directory tree
for root, dirs, files in os.walk('/path'):
for file in files:
full_path = os.path.join(root, file)
print(full_path)

Environment Variables

import os

# Get environment variables
home = os.environ.get('HOME')
path = os.environ.get('PATH', '')
api_key = os.environ.get('API_KEY', 'default_key')

# Set environment variables
os.environ['MY_VAR'] = 'my_value'

# Check if variable exists
if 'DEBUG' in os.environ:
print("Debug mode enabled")

# Common environment variables
print(f"Home: {os.environ.get('HOME')}")
print(f"User: {os.environ.get('USER')}")
print(f"Shell: {os.environ.get('SHELL')}")
print(f"Path: {os.environ.get('PATH')}")

# Environment variable patterns
DATABASE_URL = os.environ.get('DATABASE_URL', 'sqlite:///default.db')
DEBUG = os.environ.get('DEBUG', 'False').lower() == 'true'
PORT = int(os.environ.get('PORT', 8000))

sys Module - System-specific Parameters

import sys

# Command line arguments
script_name = sys.argv[0]
args = sys.argv[1:]

# Python version info
print(sys.version) # Full version string
print(sys.version_info) # Version tuple
print(sys.version_info.major) # Major version

# Platform information
print(sys.platform) # Platform identifier
print(sys.maxsize) # Maximum integer size

# Module paths
print(sys.path) # Module search paths
sys.path.append('/custom/path') # Add custom path

# Standard streams
sys.stdout.write('Hello\n') # Write to stdout
sys.stderr.write('Error\n') # Write to stderr
user_input = sys.stdin.readline() # Read from stdin

# Exit program
sys.exit(0) # Exit with code 0
sys.exit("Error message") # Exit with error message

# Memory usage
import sys
obj = [1, 2, 3, 4, 5]
size = sys.getsizeof(obj) # Get object size in bytes

json and pickle

JSON Operations

import json

# Python to JSON
data = {
'name': 'Alice',
'age': 30,
'skills': ['Python', 'JavaScript'],
'is_active': True,
'salary': None
}

# Serialize to JSON string
json_string = json.dumps(data)
print(json_string)

# Pretty print JSON
pretty_json = json.dumps(data, indent=2)
print(pretty_json)

# Serialize with custom options
json_string = json.dumps(data,
indent=2,
sort_keys=True,
ensure_ascii=False)

# JSON to Python
parsed_data = json.loads(json_string)
print(parsed_data['name'])

# File operations
# Write JSON to file
with open('data.json', 'w') as f:
json.dump(data, f, indent=2)

# Read JSON from file
with open('data.json', 'r') as f:
loaded_data = json.load(f)

# Handle JSON errors
try:
invalid_json = '{"name": "Alice", "age":}'
data = json.loads(invalid_json)
except json.JSONDecodeError as e:
print(f"JSON error: {e}")
print(f"Error at line {e.lineno}, column {e.colno}")
print(f"Error position: {e.pos}")

# Safe JSON loading with validation
def safe_json_load(json_string, default=None):
"""Safely load JSON with proper error handling"""
try:
return json.loads(json_string)
except json.JSONDecodeError as e:
print(f"Invalid JSON: {e}")
return default
except TypeError as e:
print(f"Type error: {e}")
return default

# Usage examples
test_cases = [
'{"name": "Alice", "age": 30}', # Valid JSON
'{"name": "Alice", "age":}', # Invalid JSON
'{"name": "Alice", "age": 30,}', # Trailing comma
None, # None value
42, # Wrong type
]

for test_case in test_cases:
result = safe_json_load(test_case, {"error": "Invalid JSON"})
print(f"Input: {test_case} -> Output: {result}")

# Safe file operations
def load_json_file(filename, default=None):
"""Load JSON from file with comprehensive error handling"""
try:
with open(filename, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
print(f"File not found: {filename}")
return default
except json.JSONDecodeError as e:
print(f"Invalid JSON in {filename}: {e}")
return default
except PermissionError:
print(f"Permission denied: {filename}")
return default
except Exception as e:
print(f"Unexpected error loading {filename}: {e}")
return default

def save_json_file(data, filename, indent=2):
"""Save JSON to file with error handling"""
try:
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=indent, ensure_ascii=False)
return True
except (TypeError, ValueError) as e:
print(f"Serialization error: {e}")
return False
except PermissionError:
print(f"Permission denied: {filename}")
return False
except Exception as e:
print(f"Unexpected error saving {filename}: {e}")
return False

# Usage
data = {"name": "Alice", "age": 30, "skills": ["Python", "JavaScript"]}
if save_json_file(data, "user_data.json"):
print("Data saved successfully")

loaded_data = load_json_file("user_data.json", {})
print(f"Loaded data: {loaded_data}")

# JSON validation and schema checking
def validate_user_data(data):
"""Validate user data structure"""
required_fields = ['name', 'age']

if not isinstance(data, dict):
return False, "Data must be a dictionary"

for field in required_fields:
if field not in data:
return False, f"Missing required field: {field}"

if not isinstance(data['name'], str):
return False, "Name must be a string"

if not isinstance(data['age'], int) or data['age'] < 0:
return False, "Age must be a non-negative integer"

return True, "Valid data"

# Test validation
test_data = [
{"name": "Alice", "age": 30},
{"name": "Bob"}, # Missing age
{"name": 123, "age": 30}, # Invalid name type
{"name": "Charlie", "age": -5}, # Invalid age
]

for data in test_data:
is_valid, message = validate_user_data(data)
print(f"Data: {data} -> Valid: {is_valid}, Message: {message}")

# Working with nested JSON safely
def safe_get_nested(data, path, default=None):
"""Safely get nested value from JSON data"""
try:
keys = path.split('.')
current = data
for key in keys:
if isinstance(current, dict) and key in current:
current = current[key]
else:
return default
return current
except (AttributeError, TypeError):
return default

# Example nested data
nested_data = {
"user": {
"profile": {
"name": "Alice",
"settings": {
"theme": "dark",
"notifications": True
}
}
}
}

# Safe access
name = safe_get_nested(nested_data, "user.profile.name", "Unknown")
theme = safe_get_nested(nested_data, "user.profile.settings.theme", "light")
invalid = safe_get_nested(nested_data, "user.profile.invalid.path", "default")

print(f"Name: {name}, Theme: {theme}, Invalid: {invalid}")

Custom JSON Encoding

import json
from datetime import datetime

# Custom encoder for datetime
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)

# Use custom encoder
data = {
'name': 'Alice',
'created': datetime.now()
}

json_string = json.dumps(data, cls=DateTimeEncoder)

# Custom decoder
def datetime_decoder(dct):
for key, value in dct.items():
if key.endswith('_time') or key == 'created':
try:
dct[key] = datetime.fromisoformat(value)
except (ValueError, TypeError):
pass
return dct

parsed_data = json.loads(json_string, object_hook=datetime_decoder)

Pickle - Python Object Serialization

import pickle

# Serialize Python objects
data = {
'list': [1, 2, 3],
'dict': {'key': 'value'},
'set': {1, 2, 3},
'tuple': (1, 2, 3),
'function': lambda x: x * 2
}

# Pickle to bytes
pickled_data = pickle.dumps(data)

# Unpickle from bytes
unpickled_data = pickle.loads(pickled_data)

# File operations
# Save to file
with open('data.pkl', 'wb') as f:
pickle.dump(data, f)

# Load from file
with open('data.pkl', 'rb') as f:
loaded_data = pickle.load(f)

# Pickle custom objects
class Person:
def __init__(self, name, age):
self.name = name
self.age = age

def __str__(self):
return f"{self.name} ({self.age})"

person = Person("Alice", 30)
pickled_person = pickle.dumps(person)
unpickled_person = pickle.loads(pickled_person)

# Pickle protocol versions
# Protocol 0: ASCII, human-readable
# Protocol 1: Binary format
# Protocol 2: More efficient (Python 2.3+)
# Protocol 3: Better support for bytes (Python 3.0+)
# Protocol 4: Large object support (Python 3.4+)
# Protocol 5: Out-of-band data (Python 3.8+)

data = [1, 2, 3, 4, 5]
pickled = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)

Regular Expressions (re)

Basic Pattern Matching

import re

# Basic search
text = "The quick brown fox jumps over the lazy dog"
pattern = r"brown"
match = re.search(pattern, text)
if match:
print(f"Found: {match.group()}")

# Find all matches
numbers = "Phone: 123-456-7890, Fax: 098-765-4321"
phone_pattern = r"\d{3}-\d{3}-\d{4}"
matches = re.findall(phone_pattern, numbers)
print(matches) # ['123-456-7890', '098-765-4321']

# Match at beginning
if re.match(r"The", text):
print("Text starts with 'The'")

# Full string match
if re.fullmatch(r"\d+", "12345"):
print("String is all digits")

Pattern Syntax

# Common patterns
patterns = {
r"\d+": "One or more digits",
r"\w+": "One or more word characters",
r"\s+": "One or more whitespace",
r"[a-zA-Z]+": "One or more letters",
r"[0-9]{3}": "Exactly 3 digits",
r"colou?r": "Optional character (color or colour)",
r"cats?": "Optional s (cat or cats)",
r"^start": "Start of string",
r"end$": "End of string",
r"a|b": "Either a or b",
r"[abc]": "Any of a, b, or c",
r"[^abc]": "Any character except a, b, or c",
r".": "Any character except newline",
r"\.": "Literal dot",
r"a*": "Zero or more a's",
r"a+": "One or more a's",
r"a{2,4}": "Between 2 and 4 a's"
}

# Test patterns
test_strings = ["cat", "cats", "color", "colour", "123", "abc123"]
for pattern, description in patterns.items():
print(f"\nPattern: {pattern} ({description})")
for test in test_strings:
if re.search(pattern, test):
print(f" ✓ {test}")

Groups and Capturing

# Basic groups
email_pattern = r"([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})"
email = "user@example.com"
match = re.search(email_pattern, email)
if match:
print(f"Username: {match.group(1)}") # user
print(f"Domain: {match.group(2)}") # example.com
print(f"Full email: {match.group(0)}") # user@example.com

# Named groups
pattern = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})"
date = "2023-12-25"
match = re.search(pattern, date)
if match:
print(f"Year: {match.group('year')}")
print(f"Month: {match.group('month')}")
print(f"Day: {match.group('day')}")
print(f"All groups: {match.groupdict()}")

# Multiple matches with groups
log_pattern = r"(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) \[(\w+)\] (.+)"
log_line = "2023-12-25 14:30:15 [ERROR] Something went wrong"
match = re.search(log_pattern, log_line)
if match:
date, time, level, message = match.groups()
print(f"Date: {date}, Time: {time}, Level: {level}, Message: {message}")

Search and Replace

# Basic replacement
text = "The quick brown fox"
new_text = re.sub(r"brown", "red", text)
print(new_text) # "The quick red fox"

# Replace with function
def title_case(match):
return match.group(0).title()

text = "hello world python"
new_text = re.sub(r"\b\w+\b", title_case, text)
print(new_text) # "Hello World Python"

# Replace with group references
text = "John Smith, Jane Doe"
new_text = re.sub(r"(\w+) (\w+)", r"\2, \1", text)
print(new_text) # "Smith, John, Doe, Jane"

# Replace with limit
text = "apple apple apple"
new_text = re.sub(r"apple", "orange", text, count=2)
print(new_text) # "orange orange apple"

# Case-insensitive replacement
text = "Hello WORLD"
new_text = re.sub(r"hello", "Hi", text, flags=re.IGNORECASE)
print(new_text) # "Hi WORLD"

Compiled Patterns

# Compile for reuse
email_pattern = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")

# Use compiled pattern
emails = ["user@example.com", "invalid-email", "admin@site.org"]
for email in emails:
if email_pattern.match(email):
print(f"Valid: {email}")

# Compiled pattern with flags
pattern = re.compile(r"hello", re.IGNORECASE | re.MULTILINE)

Common Regex Patterns

# Validation patterns
patterns = {
'email': r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$',
'phone': r'^\+?1?-?\(?([0-9]{3})\)?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})$',
'url': r'^https?://[^\s/$.?#].[^\s]*$',
'ip_address': r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$',
'credit_card': r'^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|3[0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})$',
'ssn': r'^\d{3}-\d{2}-\d{4}$',
'password': r'^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$'
}

# Test validation
test_data = {
'email': 'user@example.com',
'phone': '(555) 123-4567',
'url': 'https://www.example.com',
'ip_address': '192.168.1.1'
}

for pattern_name, pattern in patterns.items():
if pattern_name in test_data:
test_value = test_data[pattern_name]
if re.match(pattern, test_value):
print(f"✓ {pattern_name}: {test_value}")
else:
print(f"✗ {pattern_name}: {test_value}")

Collections

defaultdict

from collections import defaultdict

# Basic usage
dd = defaultdict(int) # Default value is 0
dd['key1'] += 1
dd['key2'] += 5
print(dd) # defaultdict(<class 'int'>, {'key1': 1, 'key2': 5})

# With list
dd_list = defaultdict(list)
dd_list['fruits'].append('apple')
dd_list['fruits'].append('banana')
dd_list['vegetables'].append('carrot')
print(dd_list) # defaultdict(<class 'list'>, {'fruits': ['apple', 'banana'], 'vegetables': ['carrot']})

# Counting items
text = "hello world"
char_count = defaultdict(int)
for char in text:
char_count[char] += 1
print(dict(char_count)) # {'h': 1, 'e': 1, 'l': 3, 'o': 2, ' ': 1, 'w': 1, 'r': 1, 'd': 1}

# Grouping items
from collections import defaultdict
students = [
{'name': 'Alice', 'grade': 'A'},
{'name': 'Bob', 'grade': 'B'},
{'name': 'Charlie', 'grade': 'A'},
{'name': 'David', 'grade': 'B'}
]

grade_groups = defaultdict(list)
for student in students:
grade_groups[student['grade']].append(student['name'])
print(dict(grade_groups)) # {'A': ['Alice', 'Charlie'], 'B': ['Bob', 'David']}

# Custom default factory
def default_set():
return set()

dd_set = defaultdict(default_set)
dd_set['group1'].add('item1')
dd_set['group1'].add('item2')

Counter

from collections import Counter

# Count items in iterable
words = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple']
counter = Counter(words)
print(counter) # Counter({'apple': 3, 'banana': 2, 'cherry': 1})

# Count characters
text = "hello world"
char_counter = Counter(text)
print(char_counter) # Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})

# Most common items
print(counter.most_common()) # [('apple', 3), ('banana', 2), ('cherry', 1)]
print(counter.most_common(2)) # [('apple', 3), ('banana', 2)]

# Counter arithmetic
counter1 = Counter(['a', 'b', 'c', 'a'])
counter2 = Counter(['a', 'b', 'b', 'd'])

# Addition
combined = counter1 + counter2
print(combined) # Counter({'a': 3, 'b': 3, 'c': 1, 'd': 1})

# Subtraction
difference = counter1 - counter2
print(difference) # Counter({'c': 1, 'a': 1})

# Intersection
intersection = counter1 & counter2
print(intersection) # Counter({'a': 1, 'b': 1})

# Union
union = counter1 | counter2
print(union) # Counter({'a': 2, 'b': 2, 'c': 1, 'd': 1})

# Update counter
counter.update(['apple', 'date'])
print(counter) # Counter({'apple': 4, 'banana': 2, 'cherry': 1, 'date': 1})

# Word frequency analysis
text = "the quick brown fox jumps over the lazy dog the fox"
words = text.split()
word_freq = Counter(words)
print(f"Most common words: {word_freq.most_common(3)}")

namedtuple

from collections import namedtuple

# Create namedtuple class
Point = namedtuple('Point', ['x', 'y'])
Person = namedtuple('Person', ['name', 'age', 'email'])

# Create instances
point = Point(3, 4)
person = Person('Alice', 30, 'alice@example.com')

# Access by name
print(point.x) # 3
print(person.name) # Alice

# Access by index (like regular tuple)
print(point[0]) # 3
print(person[1]) # 30

# Unpack like tuple
x, y = point
name, age, email = person

# namedtuple methods
print(person._fields) # ('name', 'age', 'email')
print(person._asdict()) # {'name': 'Alice', 'age': 30, 'email': 'alice@example.com'}

# Create new instance with changes
new_person = person._replace(age=31)
print(new_person) # Person(name='Alice', age=31, email='alice@example.com')

# Create from iterable
data = ['Bob', 25, 'bob@example.com']
bob = Person._make(data)
print(bob) # Person(name='Bob', age=25, email='bob@example.com')

# Use in functions
def calculate_distance(p1, p2):
return ((p1.x - p2.x)**2 + (p1.y - p2.y)**2)**0.5

point1 = Point(0, 0)
point2 = Point(3, 4)
distance = calculate_distance(point1, point2)
print(f"Distance: {distance}") # Distance: 5.0

# Database record representation
Record = namedtuple('Record', ['id', 'name', 'value', 'timestamp'])
records = [
Record(1, 'temperature', 23.5, '2023-12-25 10:00:00'),
Record(2, 'humidity', 65.0, '2023-12-25 10:05:00'),
Record(3, 'pressure', 1013.25, '2023-12-25 10:10:00')
]

for record in records:
print(f"ID: {record.id}, Name: {record.name}, Value: {record.value}")

deque

from collections import deque

# Create deque
dq = deque([1, 2, 3, 4, 5])
print(dq) # deque([1, 2, 3, 4, 5])

# Add/remove from both ends
dq.appendleft(0) # Add to left
dq.append(6) # Add to right
print(dq) # deque([0, 1, 2, 3, 4, 5, 6])

left_item = dq.popleft() # Remove from left
right_item = dq.pop() # Remove from right
print(f"Removed: {left_item}, {right_item}") # Removed: 0, 6

# Extend from both ends
dq.extendleft([-2, -1]) # Add multiple to left
dq.extend([7, 8]) # Add multiple to right
print(dq) # deque([-1, -2, 1, 2, 3, 4, 5, 7, 8])

# Rotate
dq = deque([1, 2, 3, 4, 5])
dq.rotate(2) # Rotate right by 2
print(dq) # deque([4, 5, 1, 2, 3])

dq.rotate(-1) # Rotate left by 1
print(dq) # deque([5, 1, 2, 3, 4])

# Limited size deque (circular buffer)
circular_buffer = deque(maxlen=3)
for i in range(5):
circular_buffer.append(i)
print(f"Added {i}: {circular_buffer}")
# Added 0: deque([0], maxlen=3)
# Added 1: deque([0, 1], maxlen=3)
# Added 2: deque([0, 1, 2], maxlen=3)
# Added 3: deque([1, 2, 3], maxlen=3)
# Added 4: deque([2, 3, 4], maxlen=3)

# Use as queue (FIFO)
queue = deque()
queue.append('task1')
queue.append('task2')
queue.append('task3')

while queue:
task = queue.popleft()
print(f"Processing: {task}")

# Use as stack (LIFO)
stack = deque()
stack.append('item1')
stack.append('item2')
stack.append('item3')

while stack:
item = stack.pop()
print(f"Popped: {item}")

# Recent items tracking
recent_items = deque(maxlen=5)
items = ['item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'item7']
for item in items:
recent_items.append(item)
print(f"Recent items: {list(recent_items)}") # Recent items: ['item3', 'item4', 'item5', 'item6', 'item7']

Itertools

Infinite Iterators

import itertools

# count - infinite arithmetic progression
for i in itertools.count(10, 2): # Start at 10, step by 2
if i > 20:
break
print(i) # 10, 12, 14, 16, 18, 20

# cycle - infinite repetition
colors = ['red', 'green', 'blue']
color_cycle = itertools.cycle(colors)
for i, color in enumerate(color_cycle):
if i >= 10:
break
print(f"{i}: {color}")

# repeat - repeat value
for item in itertools.repeat('hello', 3):
print(item) # hello, hello, hello

# Practical use of repeat
list(map(pow, [2, 3, 4], itertools.repeat(2))) # [4, 9, 16] (squares)

Iterators on Sequences

# accumulate - cumulative results
data = [1, 2, 3, 4, 5]
cumulative_sum = list(itertools.accumulate(data))
print(cumulative_sum) # [1, 3, 6, 10, 15]

# Custom accumulation function
cumulative_product = list(itertools.accumulate(data, lambda x, y: x * y))
print(cumulative_product) # [1, 2, 6, 24, 120]

# chain - flatten multiple iterables
list1 = [1, 2, 3]
list2 = [4, 5, 6]
list3 = [7, 8, 9]
flattened = list(itertools.chain(list1, list2, list3))
print(flattened) # [1, 2, 3, 4, 5, 6, 7, 8, 9]

# chain.from_iterable - flatten nested iterables
nested = [[1, 2], [3, 4], [5, 6]]
flattened = list(itertools.chain.from_iterable(nested))
print(flattened) # [1, 2, 3, 4, 5, 6]

# compress - filter by selector
data = ['A', 'B', 'C', 'D', 'E']
selectors = [1, 0, 1, 0, 1]
filtered = list(itertools.compress(data, selectors))
print(filtered) # ['A', 'C', 'E']

# dropwhile - drop items while predicate is true
data = [1, 3, 5, 24, 7, 8, 10]
result = list(itertools.dropwhile(lambda x: x < 10, data))
print(result) # [24, 7, 8, 10]

# takewhile - take items while predicate is true
result = list(itertools.takewhile(lambda x: x < 10, data))
print(result) # [1, 3, 5]

# filterfalse - opposite of filter
data = [1, 2, 3, 4, 5, 6]
result = list(itertools.filterfalse(lambda x: x % 2 == 0, data))
print(result) # [1, 3, 5] (odd numbers)

# islice - slice iterator
data = range(20)
result = list(itertools.islice(data, 5, 15, 2))
print(result) # [5, 7, 9, 11, 13]

# starmap - apply function to argument tuples
data = [(2, 5), (3, 2), (10, 3)]
result = list(itertools.starmap(pow, data))
print(result) # [32, 9, 1000]

# tee - split iterator into multiple independent iterators
data = [1, 2, 3, 4, 5]
iter1, iter2 = itertools.tee(data, 2)
print(list(iter1)) # [1, 2, 3, 4, 5]
print(list(iter2)) # [1, 2, 3, 4, 5]

# zip_longest - zip with padding
list1 = [1, 2, 3]
list2 = ['a', 'b', 'c', 'd', 'e']
result = list(itertools.zip_longest(list1, list2, fillvalue=0))
print(result) # [(1, 'a'), (2, 'b'), (3, 'c'), (0, 'd'), (0, 'e')]

Combinatorial Iterators

# product - Cartesian product
colors = ['red', 'blue']
sizes = ['S', 'M', 'L']
variants = list(itertools.product(colors, sizes))
print(variants) # [('red', 'S'), ('red', 'M'), ('red', 'L'), ('blue', 'S'), ('blue', 'M'), ('blue', 'L')]

# Self product
dice_rolls = list(itertools.product(range(1, 7), repeat=2))
print(len(dice_rolls)) # 36 combinations

# permutations - all possible orderings
letters = ['A', 'B', 'C']
perms = list(itertools.permutations(letters))
print(perms) # [('A', 'B', 'C'), ('A', 'C', 'B'), ('B', 'A', 'C'), ('B', 'C', 'A'), ('C', 'A', 'B'), ('C', 'B', 'A')]

# Permutations of specific length
perms_2 = list(itertools.permutations(letters, 2))
print(perms_2) # [('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

# combinations - combinations without repetition
combos = list(itertools.combinations(letters, 2))
print(combos) # [('A', 'B'), ('A', 'C'), ('B', 'C')]

# combinations_with_replacement - combinations with repetition
combos_rep = list(itertools.combinations_with_replacement(letters, 2))
print(combos_rep) # [('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

Practical Examples

# Grouping consecutive items
from itertools import groupby

data = [1, 1, 2, 2, 2, 3, 1, 1]
grouped = [(key, list(group)) for key, group in groupby(data)]
print(grouped) # [(1, [1, 1]), (2, [2, 2, 2]), (3, [3]), (1, [1, 1])]

# Group by property
students = [
{'name': 'Alice', 'grade': 'A'},
{'name': 'Bob', 'grade': 'A'},
{'name': 'Charlie', 'grade': 'B'},
{'name': 'David', 'grade': 'B'},
{'name': 'Eve', 'grade': 'A'}
]

# Sort first, then group
students.sort(key=lambda x: x['grade'])
grouped_students = {grade: list(group) for grade, group in groupby(students, key=lambda x: x['grade'])}
print(grouped_students)

# Pairwise iteration (Python 3.10+)
data = [1, 2, 3, 4, 5]
pairs = list(itertools.pairwise(data))
print(pairs) # [(1, 2), (2, 3), (3, 4), (4, 5)]

# Sliding window (custom implementation for older Python)
def sliding_window(iterable, n):
iterators = itertools.tee(iterable, n)
iterators = [itertools.chain(itertools.repeat(None, i), it) for i, it in enumerate(iterators)]
return zip(*iterators)

data = [1, 2, 3, 4, 5, 6]
windows = list(sliding_window(data, 3))
print(windows) # [(1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]

# Flatten nested structure
nested = [[1, 2], [3, [4, 5]], [6, 7, [8, 9]]]
def flatten(lst):
for item in lst:
if isinstance(item, list):
yield from flatten(item)
else:
yield item

flattened = list(flatten(nested))
print(flattened) # [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Round-robin scheduling
def round_robin(*iterables):
iterators = [iter(it) for it in iterables]
while iterators:
for it in list(iterators):
try:
yield next(it)
except StopIteration:
iterators.remove(it)

team1 = ['Alice', 'Bob']
team2 = ['Charlie', 'David', 'Eve']
team3 = ['Frank']

schedule = list(round_robin(team1, team2, team3))
print(schedule) # ['Alice', 'Charlie', 'Frank', 'Bob', 'David', 'Eve']

# Batching data (Python 3.12+ has itertools.batched)
def batched(iterable, n):
"""Batch data into lists of length n"""
it = iter(iterable)
while True:
batch = list(itertools.islice(it, n))
if not batch:
break
yield batch

# Process data in batches
data = range(1, 21) # 1 to 20
for batch in batched(data, 5):
print(f"Processing batch: {batch}")
# Process each batch...

# Windowed iteration (sliding window)
def windowed(iterable, n):
"""Return sliding window of size n over iterable"""
it = iter(iterable)
window = list(itertools.islice(it, n))
if len(window) == n:
yield tuple(window)
for x in it:
window = window[1:] + [x]
yield tuple(window)

# Example: moving averages
prices = [10, 12, 13, 12, 15, 14, 16, 18, 17, 19]
moving_averages = [sum(window) / len(window) for window in windowed(prices, 3)]
print(f"Moving averages: {moving_averages}")

# Pairwise iteration (available in Python 3.10+)
def pairwise(iterable):
"""Return pairs of consecutive items"""
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)

# Calculate differences
numbers = [1, 4, 9, 16, 25]
differences = [b - a for a, b in pairwise(numbers)]
print(f"Differences: {differences}") # [3, 5, 7, 9]

# Partition data based on predicate
def partition(predicate, iterable):
"""Partition entries into true and false based on predicate"""
t1, t2 = itertools.tee(iterable)
return filter(predicate, t1), itertools.filterfalse(predicate, t2)

# Split numbers into even and odd
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
evens, odds = partition(lambda x: x % 2 == 0, numbers)
print(f"Evens: {list(evens)}") # [2, 4, 6, 8, 10]
print(f"Odds: {list(odds)}") # [1, 3, 5, 7, 9]

# Find first item matching condition
def first_match(predicate, iterable, default=None):
"""Return first item matching predicate"""
return next(filter(predicate, iterable), default)

# Find first even number
numbers = [1, 3, 5, 8, 9, 12]
first_even = first_match(lambda x: x % 2 == 0, numbers)
print(f"First even: {first_even}") # 8

# Consume iterator efficiently
def consume(iterator, n=None):
"""Advance iterator n steps or until exhausted"""
if n is None:
collections.deque(iterator, maxlen=0)
else:
next(itertools.islice(iterator, n, n), None)

# Skip first n items
def skip(iterable, n):
"""Skip first n items"""
return itertools.islice(iterable, n, None)

# Take first n items
def take(n, iterable):
"""Take first n items"""
return itertools.islice(iterable, n)

# Data processing pipeline example
def process_log_data(log_lines):
"""Process log data using itertools"""
# Filter out empty lines
non_empty = filter(str.strip, log_lines)

# Group by date (assume first 10 chars are date)
by_date = itertools.groupby(non_empty, key=lambda x: x[:10])

# Process each day's logs
for date, logs in by_date:
daily_logs = list(logs)
error_count = sum(1 for log in daily_logs if 'ERROR' in log)
print(f"Date: {date}, Total logs: {len(daily_logs)}, Errors: {error_count}")

# Memory-efficient file processing
def process_large_file(filename):
"""Process large file line by line"""
with open(filename, 'r') as f:
# Process in chunks to avoid memory issues
for chunk in batched(f, 1000):
# Process each chunk
for line in chunk:
# Process line...
pass

# Cyclic pattern generation
def cyclic_pattern(pattern, length):
"""Generate cyclic pattern of specified length"""
cycle = itertools.cycle(pattern)
return list(itertools.islice(cycle, length))

# Create alternating pattern
pattern = cyclic_pattern(['A', 'B', 'C'], 10)
print(f"Cyclic pattern: {pattern}") # ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A']

# Frequency analysis
def frequency_analysis(iterable):
"""Analyze frequency of items"""
from collections import Counter
counter = Counter(iterable)
# Group by frequency
by_freq = itertools.groupby(counter.most_common(), key=lambda x: x[1])
for freq, items in by_freq:
items_list = [item[0] for item in items]
print(f"Frequency {freq}: {items_list}")

# Example usage
text = "hello world hello python world"
frequency_analysis(text.split())

# Interleave multiple iterables
def interleave(*iterables):
"""Interleave multiple iterables"""
iterators = [iter(it) for it in iterables]
while iterators:
for it in list(iterators):
try:
yield next(it)
except StopIteration:
iterators.remove(it)

# Interleave sequences
seq1 = [1, 2, 3]
seq2 = ['a', 'b', 'c', 'd']
seq3 = ['x', 'y']
result = list(interleave(seq1, seq2, seq3))
print(f"Interleaved: {result}") # [1, 'a', 'x', 2, 'b', 'y', 3, 'c', 'd']

# Unique elements while preserving order
def unique_justseen(iterable, key=None):
"""Remove consecutive duplicate elements"""
return map(next, map(lambda x: x[1], itertools.groupby(iterable, key)))

# Remove consecutive duplicates
data = [1, 1, 2, 2, 2, 3, 3, 1, 1, 4, 4]
unique_consecutive = list(unique_justseen(data))
print(f"Unique consecutive: {unique_consecutive}") # [1, 2, 3, 1, 4]

# Quantify items matching condition
def quantify(iterable, predicate=bool):
"""Count items matching predicate"""
return sum(map(predicate, iterable))

# Count even numbers
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
even_count = quantify(numbers, lambda x: x % 2 == 0)
print(f"Even numbers count: {even_count}") # 5

# Efficient data transformation pipeline
def transform_data(data):
"""Transform data using itertools pipeline"""
# Chain of transformations
pipeline = itertools.chain(
# Filter positive numbers
filter(lambda x: x > 0, data),
# Square each number
map(lambda x: x ** 2, filter(lambda x: x > 0, data)),
# Add constant
map(lambda x: x + 10, map(lambda x: x ** 2, filter(lambda x: x > 0, data)))
)
return list(pipeline)

# Better approach with function composition
def pipeline_transform(data):
"""More efficient pipeline transformation"""
return [
x ** 2 + 10
for x in data
if x > 0
]

# Memory-efficient grouping
def group_by_size(iterable, size):
"""Group items by size using itertools"""
it = iter(iterable)
while True:
group = list(itertools.islice(it, size))
if not group:
break
yield group

# Process large dataset in chunks
large_data = range(1000000)
for chunk in group_by_size(large_data, 1000):
# Process each chunk of 1000 items
result = sum(chunk) # Example processing
print(f"Chunk sum: {result}")
if result > 500000: # Stop after certain condition
break

functools

Basic Utilities

import functools

# partial - partial function application
def multiply(x, y):
return x * y

double = functools.partial(multiply, 2)
print(double(5)) # 10

triple = functools.partial(multiply, 3)
print(triple(4)) # 12

# More complex example
def log_message(level, message, timestamp=None):
if timestamp is None:
timestamp = "now"
return f"[{timestamp}] {level}: {message}"

# Create specialized logging functions
log_error = functools.partial(log_message, "ERROR")
log_info = functools.partial(log_message, "INFO")
log_debug = functools.partial(log_message, "DEBUG", timestamp="2023-12-25")

print(log_error("Something went wrong")) # [now] ERROR: Something went wrong
print(log_info("System started")) # [now] INFO: System started
print(log_debug("Debug info")) # [2023-12-25] DEBUG: Debug info

Caching Decorators

# lru_cache - Least Recently Used cache
@functools.lru_cache(maxsize=128)
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-1) + fibonacci(n-2)

# Test performance
import time
start = time.time()
result = fibonacci(35)
end = time.time()
print(f"Fibonacci(35) = {result}, Time: {end - start:.4f}s")

# Cache info
print(fibonacci.cache_info()) # CacheInfo(hits=33, misses=36, maxsize=128, currsize=36)

# Clear cache
fibonacci.cache_clear()

# cache - Unlimited cache (Python 3.9+)
@functools.cache
def expensive_function(x):
time.sleep(0.1) # Simulate expensive operation
return x * x

# First call is slow
start = time.time()
result1 = expensive_function(5)
end = time.time()
print(f"First call: {result1}, Time: {end - start:.4f}s")

# Second call is fast
start = time.time()
result2 = expensive_function(5)
end = time.time()
print(f"Second call: {result2}, Time: {end - start:.4f}s")

Function Manipulation

# reduce - apply function cumulatively
from functools import reduce

numbers = [1, 2, 3, 4, 5]
sum_result = reduce(lambda x, y: x + y, numbers)
print(sum_result) # 15

# Find maximum
max_result = reduce(lambda x, y: x if x > y else y, numbers)
print(max_result) # 5

# String concatenation
words = ['hello', 'world', 'python']
sentence = reduce(lambda x, y: x + ' ' + y, words)
print(sentence) # hello world python

# wraps - preserve function metadata
def my_decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
print(f"Calling {func.__name__}")
return func(*args, **kwargs)
return wrapper

@my_decorator
def greet(name):
"""Greet someone by name"""
return f"Hello, {name}!"

print(greet("Alice")) # Calling greet, Hello, Alice!
print(greet.__name__) # greet (preserved)
print(greet.__doc__) # Greet someone by name (preserved)

Advanced Decorators

# singledispatch - function overloading
@functools.singledispatch
def process_data(data):
raise NotImplementedError(f"Unsupported type: {type(data)}")

@process_data.register(str)
def _(data):
return f"Processing string: {data.upper()}"

@process_data.register(int)
def _(data):
return f"Processing integer: {data * 2}"

@process_data.register(list)
def _(data):
return f"Processing list: {len(data)} items"

# Usage
print(process_data("hello")) # Processing string: HELLO
print(process_data(42)) # Processing integer: 84
print(process_data([1, 2, 3])) # Processing list: 3 items

# cached_property - cache property results
class DataProcessor:
def __init__(self, data):
self._data = data

@functools.cached_property
def processed_data(self):
print("Processing data...") # This will only run once
return [x * 2 for x in self._data]

processor = DataProcessor([1, 2, 3, 4, 5])
print(processor.processed_data) # Processing data... [2, 4, 6, 8, 10]
print(processor.processed_data) # [2, 4, 6, 8, 10] (cached)

Practical Examples

# Memoization decorator
def memoize(func):
cache = {}
@functools.wraps(func)
def wrapper(*args, **kwargs):
key = str(args) + str(kwargs)
if key not in cache:
cache[key] = func(*args, **kwargs)
return cache[key]
return wrapper

@memoize
def expensive_calculation(x, y):
time.sleep(0.1) # Simulate expensive operation
return x ** y

# Retry decorator
def retry(max_attempts=3, delay=1):
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_attempts - 1:
raise
time.sleep(delay)
return None
return wrapper
return decorator

@retry(max_attempts=3, delay=0.5)
def unreliable_function():
import random
if random.random() < 0.7:
raise Exception("Random failure")
return "Success!"

# Timing decorator
def timing_decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
result = func(*args, **kwargs)
end = time.time()
print(f"{func.__name__} took {end - start:.4f} seconds")
return result
return wrapper

@timing_decorator
def slow_function():
time.sleep(0.1)
return "Done"

# Validation decorator
def validate_types(*types):
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for i, (arg, expected_type) in enumerate(zip(args, types)):
if not isinstance(arg, expected_type):
raise TypeError(f"Argument {i} must be {expected_type.__name__}")
return func(*args, **kwargs)
return wrapper
return decorator

@validate_types(str, int)
def create_user(name, age):
return f"User: {name}, Age: {age}"

# Usage
print(create_user("Alice", 30)) # User: Alice, Age: 30
# create_user("Alice", "30") # TypeError: Argument 1 must be int

urllib and http.client

urllib.request - URL Opening

import urllib.request
import urllib.parse
import urllib.error

# Basic GET request
try:
response = urllib.request.urlopen('https://httpbin.org/get')
data = response.read().decode('utf-8')
print(data)
except urllib.error.URLError as e:
print(f"URL Error: {e}")

# Request with headers
request = urllib.request.Request('https://httpbin.org/get')
request.add_header('User-Agent', 'Python Script')
request.add_header('Accept', 'application/json')

try:
response = urllib.request.urlopen(request)
print(f"Status: {response.getcode()}")
print(f"Headers: {response.info()}")
data = response.read().decode('utf-8')
print(data)
except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code} - {e.reason}")

# POST request with data
post_data = {
'name': 'Alice',
'age': 30,
'city': 'New York'
}

# Encode data
encoded_data = urllib.parse.urlencode(post_data).encode('utf-8')

request = urllib.request.Request('https://httpbin.org/post', data=encoded_data)
request.add_header('Content-Type', 'application/x-www-form-urlencoded')

try:
response = urllib.request.urlopen(request)
result = response.read().decode('utf-8')
print(result)
except urllib.error.URLError as e:
print(f"Error: {e}")

# Download file
def download_file(url, filename):
try:
urllib.request.urlretrieve(url, filename)
print(f"Downloaded: {filename}")
except urllib.error.URLError as e:
print(f"Download failed: {e}")

# download_file('https://example.com/file.txt', 'downloaded_file.txt')

urllib.parse - URL Parsing

import urllib.parse

# Parse URL
url = 'https://example.com:8080/path/to/page?param1=value1&param2=value2#section'
parsed = urllib.parse.urlparse(url)
print(f"Scheme: {parsed.scheme}") # https
print(f"Netloc: {parsed.netloc}") # example.com:8080
print(f"Path: {parsed.path}") # /path/to/page
print(f"Query: {parsed.query}") # param1=value1&param2=value2
print(f"Fragment: {parsed.fragment}") # section

# Parse query string
query_params = urllib.parse.parse_qs(parsed.query)
print(query_params) # {'param1': ['value1'], 'param2': ['value2']}

# Build URL
base_url = 'https://api.example.com/search'
params = {
'q': 'python programming',
'limit': 10,
'offset': 0
}
query_string = urllib.parse.urlencode(params)
full_url = f"{base_url}?{query_string}"
print(full_url) # https://api.example.com/search?q=python+programming&limit=10&offset=0

# URL encoding/decoding
text = "Hello World! @#$%"
encoded = urllib.parse.quote(text)
print(encoded) # Hello%20World%21%20%40%23%24%25

decoded = urllib.parse.unquote(encoded)
print(decoded) # Hello World! @#$%

# Join URLs
base = 'https://example.com/api/'
endpoint = 'users/123'
full_url = urllib.parse.urljoin(base, endpoint)
print(full_url) # https://example.com/api/users/123

http.client - Low-level HTTP

import http.client
import json

# Basic HTTP connection
conn = http.client.HTTPSConnection("httpbin.org")

# GET request
conn.request("GET", "/get")
response = conn.getresponse()
print(f"Status: {response.status} {response.reason}")
print(f"Headers: {response.getheaders()}")
data = response.read().decode('utf-8')
print(data)

# POST request with JSON
post_data = {
'name': 'Alice',
'age': 30
}
json_data = json.dumps(post_data)
headers = {'Content-Type': 'application/json'}

conn.request("POST", "/post", body=json_data, headers=headers)
response = conn.getresponse()
print(f"Status: {response.status}")
result = response.read().decode('utf-8')
print(result)

conn.close()

# Multiple requests with same connection
conn = http.client.HTTPSConnection("httpbin.org")

# First request
conn.request("GET", "/get?page=1")
response1 = conn.getresponse()
data1 = response1.read()

# Second request
conn.request("GET", "/get?page=2")
response2 = conn.getresponse()
data2 = response2.read()

conn.close()

# Handle different response types
def make_request(host, path, method="GET", data=None, headers=None):
if headers is None:
headers = {}

try:
conn = http.client.HTTPSConnection(host)
conn.request(method, path, body=data, headers=headers)
response = conn.getresponse()

result = {
'status': response.status,
'reason': response.reason,
'headers': dict(response.getheaders()),
'data': response.read().decode('utf-8')
}

conn.close()
return result
except Exception as e:
return {'error': str(e)}

# Usage
result = make_request("httpbin.org", "/get")
print(f"Status: {result['status']}")
print(f"Data: {result['data']}")

Error Handling and Timeouts

import urllib.request
import urllib.error
import socket

# Timeout handling
def fetch_with_timeout(url, timeout=10):
try:
response = urllib.request.urlopen(url, timeout=timeout)
return response.read().decode('utf-8')
except socket.timeout:
return "Request timed out"
except urllib.error.HTTPError as e:
return f"HTTP Error: {e.code} - {e.reason}"
except urllib.error.URLError as e:
return f"URL Error: {e.reason}"

# Test with different scenarios
urls = [
'https://httpbin.org/get',
'https://httpbin.org/status/404',
'https://httpbin.org/delay/15', # Will timeout
'https://nonexistent-domain.com' # Will fail
]

for url in urls:
print(f"Fetching {url}:")
result = fetch_with_timeout(url, timeout=5)
print(f"Result: {result[:100]}...")
print()

# Retry mechanism
def fetch_with_retry(url, max_retries=3, delay=1):
import time

for attempt in range(max_retries):
try:
response = urllib.request.urlopen(url, timeout=10)
return response.read().decode('utf-8')
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(delay)
else:
return f"Failed after {max_retries} attempts"

# Custom opener with cookie support
import http.cookiejar

cookie_jar = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie_jar))

# Set as global opener
urllib.request.install_opener(opener)

# Now all requests will handle cookies automatically
response = urllib.request.urlopen('https://httpbin.org/cookies/set/test/value')
response = urllib.request.urlopen('https://httpbin.org/cookies')
print(response.read().decode('utf-8'))

sqlite3

Database Connection and Basic Operations

import sqlite3

# Connect to database (creates if doesn't exist)
conn = sqlite3.connect('example.db')
cursor = conn.cursor()

# Create table
cursor.execute('''
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
email TEXT UNIQUE NOT NULL,
age INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')

# Insert data
cursor.execute('''
INSERT INTO users (name, email, age) VALUES (?, ?, ?)
''', ('Alice', 'alice@example.com', 30))

# Insert multiple records
users_data = [
('Bob', 'bob@example.com', 25),
('Charlie', 'charlie@example.com', 35),
('David', 'david@example.com', 28)
]
cursor.executemany('''
INSERT INTO users (name, email, age) VALUES (?, ?, ?)
''', users_data)

# Commit changes
conn.commit()

# Query data
cursor.execute('SELECT * FROM users')
all_users = cursor.fetchall()
print("All users:", all_users)

# Query with conditions
cursor.execute('SELECT name, email FROM users WHERE age > ?', (25,))
adult_users = cursor.fetchall()
print("Adult users:", adult_users)

# Fetch one at a time
cursor.execute('SELECT * FROM users ORDER BY age')
while True:
row = cursor.fetchone()
if row is None:
break
print(f"User: {row[1]}, Age: {row[3]}")

# Close connection
conn.close()

Advanced Database Operations

import sqlite3
from datetime import datetime

# Connect with row factory for dict-like access
conn = sqlite3.connect('example.db')
conn.row_factory = sqlite3.Row # Enable dict-like access
cursor = conn.cursor()

# Create more complex tables
cursor.execute('''
CREATE TABLE IF NOT EXISTS products (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
price DECIMAL(10, 2),
category TEXT,
stock INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS orders (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER,
product_id INTEGER,
quantity INTEGER,
total_price DECIMAL(10, 2),
order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users (id),
FOREIGN KEY (product_id) REFERENCES products (id)
)
''')

# Insert products
products = [
('Laptop', 999.99, 'Electronics', 10),
('Mouse', 29.99, 'Electronics', 50),
('Keyboard', 79.99, 'Electronics', 30),
('Book', 19.99, 'Education', 100)
]

cursor.executemany('''
INSERT INTO products (name, price, category, stock) VALUES (?, ?, ?, ?)
''', products)

# Complex queries with JOINs
cursor.execute('''
SELECT u.name, p.name, o.quantity, o.total_price
FROM orders o
JOIN users u ON o.user_id = u.id
JOIN products p ON o.product_id = p.id
WHERE o.order_date >= datetime('now', '-7 days')
''')

recent_orders = cursor.fetchall()
for order in recent_orders:
print(f"User: {order['name']}, Product: {order[1]}, Quantity: {order['quantity']}")

# Aggregate functions
cursor.execute('''
SELECT category, COUNT(*) as count, AVG(price) as avg_price
FROM products
GROUP BY category
''')

category_stats = cursor.fetchall()
for stat in category_stats:
print(f"Category: {stat['category']}, Count: {stat['count']}, Avg Price: {stat['avg_price']:.2f}")

# Update and Delete operations
cursor.execute('''
UPDATE products SET stock = stock - 1 WHERE name = ?
''', ('Laptop',))

cursor.execute('''
DELETE FROM products WHERE stock = 0
''')

conn.commit()
conn.close()

Database Context Manager and Error Handling

import sqlite3
from contextlib import contextmanager

@contextmanager
def get_db_connection(db_path):
conn = None
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
yield conn
except sqlite3.Error as e:
if conn:
conn.rollback()
raise e
finally:
if conn:
conn.close()

# Database operations class
class UserManager:
def __init__(self, db_path):
self.db_path = db_path
self.init_db()

def init_db(self):
with get_db_connection(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT UNIQUE NOT NULL,
email TEXT UNIQUE NOT NULL,
password_hash TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()

def create_user(self, username, email, password_hash):
try:
with get_db_connection(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)
''', (username, email, password_hash))
conn.commit()
return cursor.lastrowid
except sqlite3.IntegrityError as e:
if 'username' in str(e):
raise ValueError("Username already exists")
elif 'email' in str(e):
raise ValueError("Email already exists")
else:
raise e

def get_user(self, username):
with get_db_connection(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('SELECT * FROM users WHERE username = ?', (username,))
user = cursor.fetchone()
return dict(user) if user else None

def update_user(self, user_id, **kwargs):
if not kwargs:
return False

# Build dynamic update query
set_clause = ', '.join(f"{key} = ?" for key in kwargs.keys())
query = f"UPDATE users SET {set_clause} WHERE id = ?"
values = list(kwargs.values()) + [user_id]

with get_db_connection(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(query, values)
conn.commit()
return cursor.rowcount > 0

def delete_user(self, user_id):
with get_db_connection(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM users WHERE id = ?', (user_id,))
conn.commit()
return cursor.rowcount > 0

def list_users(self, limit=10, offset=0):
with get_db_connection(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('SELECT * FROM users LIMIT ? OFFSET ?', (limit, offset))
return [dict(row) for row in cursor.fetchall()]

# Usage example
user_manager = UserManager('users.db')

# Create users
try:
user_id = user_manager.create_user('alice', 'alice@example.com', 'hashed_password')
print(f"Created user with ID: {user_id}")
except ValueError as e:
print(f"Error creating user: {e}")

# Get user
user = user_manager.get_user('alice')
if user:
print(f"Found user: {user['username']} ({user['email']})")

# Update user
user_manager.update_user(user_id, email='alice.new@example.com')

# List all users
users = user_manager.list_users()
for user in users:
print(f"User: {user['username']} - {user['email']}")

Advanced Features

import sqlite3
import json
from datetime import datetime

# Custom functions
def json_extract(json_str, path):
try:
data = json.loads(json_str)
keys = path.split('.')
for key in keys:
if isinstance(data, dict) and key in data:
data = data[key]
else:
return None
return data
except:
return None

# Connect and register custom function
conn = sqlite3.connect(':memory:')
conn.create_function('json_extract', 2, json_extract)

# Create table with JSON data
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE posts (
id INTEGER PRIMARY KEY,
title TEXT,
content TEXT,
metadata TEXT, -- JSON data
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')

# Insert data with JSON
posts_data = [
('First Post', 'Hello World', '{"author": "Alice", "tags": ["intro", "hello"], "views": 100}'),
('Second Post', 'Python Tutorial', '{"author": "Bob", "tags": ["python", "tutorial"], "views": 250}'),
('Third Post', 'Database Tips', '{"author": "Charlie", "tags": ["database", "tips"], "views": 180}')
]

cursor.executemany('''
INSERT INTO posts (title, content, metadata) VALUES (?, ?, ?)
''', posts_data)

# Query using custom JSON function
cursor.execute('''
SELECT title, json_extract(metadata, 'author') as author, json_extract(metadata, 'views') as views
FROM posts
WHERE json_extract(metadata, 'views') > 150
''')

popular_posts = cursor.fetchall()
for post in popular_posts:
print(f"Post: {post[0]}, Author: {post[1]}, Views: {post[2]}")

# Backup and restore
def backup_database(source_conn, backup_path):
with sqlite3.connect(backup_path) as backup_conn:
source_conn.backup(backup_conn)
print(f"Database backed up to {backup_path}")

def restore_database(backup_path, target_conn):
with sqlite3.connect(backup_path) as backup_conn:
backup_conn.backup(target_conn)
print(f"Database restored from {backup_path}")

# Usage
backup_database(conn, 'backup.db')

# Pragmas and optimization
cursor.execute('PRAGMA table_info(posts)')
table_info = cursor.fetchall()
print("Table structure:", table_info)

# Enable WAL mode for better concurrency
cursor.execute('PRAGMA journal_mode=WAL')

# Analyze query performance
cursor.execute('EXPLAIN QUERY PLAN SELECT * FROM posts WHERE title LIKE ?', ('%Python%',))
plan = cursor.fetchall()
print("Query plan:", plan)

# Create indexes for better performance
cursor.execute('CREATE INDEX IF NOT EXISTS idx_posts_title ON posts(title)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_posts_created ON posts(created_at)')

conn.commit()
conn.close()

Quick Reference

File Operations

# Read/Write files
with open('file.txt', 'r') as f: content = f.read()
with open('file.txt', 'w') as f: f.write('content')

# Path operations
from pathlib import Path
path = Path('folder/file.txt')
path.exists(), path.is_file(), path.parent, path.name

Date/Time

from datetime import datetime, timedelta
now = datetime.now()
formatted = now.strftime("%Y-%m-%d %H:%M:%S")
future = now + timedelta(days=7)

System Operations

import os
os.getcwd(), os.listdir('.'), os.environ.get('HOME')
os.path.join('folder', 'file.txt'), os.path.exists('file.txt')

JSON/Serialization

import json, pickle
data = json.loads(json_string)
json_string = json.dumps(data, indent=2)
pickled = pickle.dumps(object)

Regular Expressions

import re
re.search(r'pattern', text), re.findall(r'\d+', text)
re.sub(r'old', 'new', text), re.match(r'^pattern', text)

Collections

from collections import defaultdict, Counter, deque
dd = defaultdict(list); counter = Counter(items)
queue = deque(); queue.append(item); queue.popleft()

Itertools

import itertools
itertools.chain(list1, list2), itertools.combinations(items, 2)
itertools.product(a, b), itertools.groupby(data, key_func)

HTTP Requests

import urllib.request
response = urllib.request.urlopen('https://api.com/data')
data = response.read().decode('utf-8')

Database

import sqlite3
conn = sqlite3.connect('db.sqlite')
cursor = conn.cursor()
cursor.execute('SELECT * FROM table WHERE id = ?', (1,))
conn.commit(); conn.close()

This cheatsheet covers the most essential Python Standard Library modules. Each module has many more features - consult the official documentation for complete details.