Advanced Features
Generators and Iterators
Generator Functions
# Basic generator with yield
def countdown(n):
while n > 0:
yield n
n -= 1
# Usage
for num in countdown(5):
print(num) # 5, 4, 3, 2, 1
# Generator state is preserved
gen = countdown(3)
print(next(gen)) # 3
print(next(gen)) # 2
print(next(gen)) # 1
# next(gen) # StopIteration
Generator Expressions
# Generator expression (lazy evaluation)
squares = (x**2 for x in range(10))
evens = (x for x in range(20) if x % 2 == 0)
# Memory efficient for large datasets
data = (process_item(item) for item in huge_dataset)
# Convert to list when needed
result = list(squares)
Custom Iterators
class Counter:
def __init__(self, start, end):
self.start = start
self.end = end
def __iter__(self):
return self
def __next__(self):
if self.start < self.end:
self.start += 1
return self.start - 1
raise StopIteration
# Usage
for num in Counter(1, 5):
print(num) # 1, 2, 3, 4
Advanced Generator Techniques
# Generator with send() method
def generator_with_send():
value = None
while True:
received = yield value
if received is not None:
value = received * 2
gen = generator_with_send()
next(gen) # Start generator
print(gen.send(5)) # 10
print(gen.send(3)) # 6
# Generator delegation with yield from
def sub_generator():
yield 1
yield 2
yield 3
def main_generator():
yield from sub_generator()
yield 4
yield 5
list(main_generator()) # [1, 2, 3, 4, 5]
Context Managers
Using Context Managers
# Basic with statement
with open('file.txt', 'r') as f:
content = f.read()
# File automatically closed
# Multiple context managers
with open('input.txt', 'r') as infile, open('output.txt', 'w') as outfile:
outfile.write(infile.read())
Custom Context Managers (Class-based)
class DatabaseConnection:
def __init__(self, host, port):
self.host = host
self.port = port
self.connection = None
def __enter__(self):
print(f"Connecting to {self.host}:{self.port}")
self.connection = f"Connected to {self.host}"
return self.connection
def __exit__(self, exc_type, exc_val, exc_tb):
print("Closing connection")
if exc_type:
print(f"Exception occurred: {exc_val}")
return False # Don't suppress exceptions
# Usage
with DatabaseConnection('localhost', 5432) as conn:
print(f"Using {conn}")
Context Managers with contextlib
from contextlib import contextmanager, closing
import sqlite3
@contextmanager
def database_transaction():
conn = sqlite3.connect('database.db')
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
# Usage
with database_transaction() as conn:
cursor = conn.cursor()
cursor.execute("INSERT INTO users (name) VALUES (?)", ("Alice",))
# Suppress exceptions
from contextlib import suppress
with suppress(FileNotFoundError):
os.remove('nonexistent_file.txt')
Nested Context Managers
from contextlib import ExitStack
def process_files(filenames):
with ExitStack() as stack:
files = [stack.enter_context(open(fname)) for fname in filenames]
# All files will be closed automatically
for f in files:
print(f.read())
Metaclasses and Class Customization
Basic Metaclass
class SingletonMeta(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super().__call__(*args, **kwargs)
return cls._instances[cls]
class Database(metaclass=SingletonMeta):
def __init__(self):
self.connection = "Database connection"
# Usage
db1 = Database()
db2 = Database()
print(db1 is db2) # True
Class Decorators
def add_methods(cls):
def to_dict(self):
return self.__dict__
def from_dict(cls, data):
return cls(**data)
cls.to_dict = to_dict
cls.from_dict = classmethod(from_dict)
return cls
@add_methods
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
# Usage
person = Person("Alice", 30)
data = person.to_dict()
new_person = Person.from_dict(data)
new Method
class ImmutablePoint:
def __new__(cls, x, y):
instance = super().__new__(cls)
instance._x = x
instance._y = y
return instance
def __init__(self, x, y):
pass # __new__ already set attributes
@property
def x(self):
return self._x
@property
def y(self):
return self._y
point = ImmutablePoint(3, 4)
print(point.x, point.y) # 3 4
# point.x = 5 # AttributeError
Dynamic Class Creation
# Using type() to create classes
def init_method(self, name):
self.name = name
def greet_method(self):
return f"Hello, {self.name}"
# Create class dynamically
Person = type('Person', (), {
'__init__': init_method,
'greet': greet_method
})
person = Person("Alice")
print(person.greet()) # Hello, Alice
Descriptors and Properties
Basic Descriptors
class Descriptor:
def __init__(self, initial_value=None):
self.value = initial_value
def __get__(self, obj, objtype=None):
if obj is None:
return self
return self.value
def __set__(self, obj, value):
self.value = value
def __delete__(self, obj):
del self.value
class MyClass:
attr = Descriptor("default")
obj = MyClass()
print(obj.attr) # default
obj.attr = "new value"
print(obj.attr) # new value
Property Decorators
class Temperature:
def __init__(self, celsius=0):
self._celsius = celsius
@property
def celsius(self):
return self._celsius
@celsius.setter
def celsius(self, value):
if value < -273.15:
raise ValueError("Temperature below absolute zero")
self._celsius = value
@property
def fahrenheit(self):
return (self._celsius * 9/5) + 32
@fahrenheit.setter
def fahrenheit(self, value):
self.celsius = (value - 32) * 5/9
# Usage
temp = Temperature(25)
print(temp.fahrenheit) # 77.0
temp.fahrenheit = 86
print(temp.celsius) # 30.0
Advanced Descriptors
class ValidatedAttribute:
def __init__(self, validator):
self.validator = validator
self.name = None
def __set_name__(self, owner, name):
self.name = name
def __get__(self, obj, objtype=None):
if obj is None:
return self
return obj.__dict__[self.name]
def __set__(self, obj, value):
if not self.validator(value):
raise ValueError(f"Invalid value for {self.name}: {value}")
obj.__dict__[self.name] = value
class Person:
name = ValidatedAttribute(lambda x: isinstance(x, str) and len(x) > 0)
age = ValidatedAttribute(lambda x: isinstance(x, int) and 0 <= x <= 150)
person = Person()
person.name = "Alice"
person.age = 30
# person.age = -5 # ValueError
Async/Await and Asyncio
Basic Async/Await
import asyncio
async def fetch_data(url):
print(f"Fetching {url}")
await asyncio.sleep(1) # Simulate network delay
return f"Data from {url}"
async def main():
# Sequential execution
result1 = await fetch_data("url1")
result2 = await fetch_data("url2")
print(result1, result2)
# Run async function
asyncio.run(main())
Concurrent Execution
async def concurrent_fetch():
# Concurrent execution
task1 = asyncio.create_task(fetch_data("url1"))
task2 = asyncio.create_task(fetch_data("url2"))
task3 = asyncio.create_task(fetch_data("url3"))
results = await asyncio.gather(task1, task2, task3)
return results
# Alternative using asyncio.wait
async def concurrent_with_wait():
tasks = [fetch_data(f"url{i}") for i in range(3)]
done, pending = await asyncio.wait(tasks, return_when=asyncio.ALL_COMPLETED)
return [task.result() for task in done]
Async Context Managers
class AsyncDatabase:
async def __aenter__(self):
print("Opening database connection")
await asyncio.sleep(0.1)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
print("Closing database connection")
await asyncio.sleep(0.1)
return False
async def use_database():
async with AsyncDatabase() as db:
print("Using database")
await asyncio.sleep(0.5)
Async Iterators
class AsyncCounter:
def __init__(self, start, end):
self.start = start
self.end = end
def __aiter__(self):
return self
async def __anext__(self):
if self.start < self.end:
await asyncio.sleep(0.1) # Simulate async operation
self.start += 1
return self.start - 1
raise StopAsyncIteration
async def async_iteration():
async for num in AsyncCounter(1, 5):
print(num)
Coroutines and Async Generators
Async Generators
async def async_range(n):
for i in range(n):
await asyncio.sleep(0.1)
yield i
async def consume_async_generator():
async for value in async_range(5):
print(value)
Producer-Consumer Pattern
import asyncio
from asyncio import Queue
async def producer(queue):
for i in range(10):
await asyncio.sleep(0.1)
await queue.put(f"item_{i}")
await queue.put(None) # Sentinel value
async def consumer(queue):
while True:
item = await queue.get()
if item is None:
break
print(f"Consumed: {item}")
await asyncio.sleep(0.2)
async def main():
queue = Queue()
await asyncio.gather(
producer(queue),
consumer(queue)
)
Async with Timeout
async def slow_operation():
await asyncio.sleep(5)
return "Finally done!"
async def with_timeout():
try:
result = await asyncio.wait_for(slow_operation(), timeout=2.0)
print(result)
except asyncio.TimeoutError:
print("Operation timed out")
Memory Management and Garbage Collection
Memory Usage Tracking
import sys
import gc
from memory_profiler import profile
# Check object size
my_list = [1, 2, 3, 4, 5]
print(sys.getsizeof(my_list)) # Size in bytes
# Reference counting
import sys
x = [1, 2, 3]
print(sys.getrefcount(x)) # Number of references
# Garbage collection
print(gc.get_count()) # Current collection counts
gc.collect() # Force garbage collection
Weak References
import weakref
class ExpensiveObject:
def __init__(self, name):
self.name = name
def __del__(self):
print(f"Deleting {self.name}")
obj = ExpensiveObject("test")
weak_ref = weakref.ref(obj)
print(weak_ref()) # <__main__.ExpensiveObject object>
del obj
print(weak_ref()) # None
Memory Profiling
@profile
def memory_intensive_function():
# Large list
big_list = [i for i in range(1000000)]
# Dictionary
big_dict = {i: i**2 for i in range(100000)}
# Clean up
del big_list
del big_dict
# Run with: python -m memory_profiler script.py
Slots for Memory Optimization
class RegularClass:
def __init__(self, x, y):
self.x = x
self.y = y
class SlottedClass:
__slots__ = ['x', 'y']
def __init__(self, x, y):
self.x = x
self.y = y
# SlottedClass uses less memory
import sys
regular = RegularClass(1, 2)
slotted = SlottedClass(1, 2)
print(sys.getsizeof(regular.__dict__)) # Larger
print(sys.getsizeof(slotted)) # Smaller
Python Internals
Bytecode Inspection
import dis
def example_function(x, y):
return x + y * 2
# Disassemble function
dis.dis(example_function)
# Compile and inspect
code = compile("x + y * 2", "<string>", "eval")
dis.dis(code)
GIL (Global Interpreter Lock) Understanding
import threading
import time
# CPU-bound task (affected by GIL)
def cpu_bound_task():
count = 0
for i in range(100000000):
count += i
return count
# I/O-bound task (less affected by GIL)
def io_bound_task():
time.sleep(1)
return "I/O completed"
# Threading with CPU-bound tasks
def threaded_cpu_test():
start = time.time()
threads = []
for i in range(4):
t = threading.Thread(target=cpu_bound_task)
threads.append(t)
t.start()
for t in threads:
t.join()
print(f"Threaded CPU time: {time.time() - start}")
# Use multiprocessing for CPU-bound tasks
from multiprocessing import Pool
def multiprocessing_cpu_test():
start = time.time()
with Pool(4) as pool:
results = pool.map(cpu_bound_task, range(4))
print(f"Multiprocessing CPU time: {time.time() - start}")
Import System
import sys
import importlib
# Module search path
print(sys.path)
# Dynamic import
module_name = "math"
math_module = importlib.import_module(module_name)
print(math_module.sqrt(16))
# Reload module (for development)
importlib.reload(math_module)
# Custom import hook
class CustomFinder:
def find_spec(self, fullname, path, target=None):
if fullname == "custom_module":
return importlib.machinery.ModuleSpec(fullname, CustomLoader())
return None
class CustomLoader:
def create_module(self, spec):
return None
def exec_module(self, module):
module.custom_function = lambda: "Custom module loaded!"
# Install custom finder
sys.meta_path.insert(0, CustomFinder())
Advanced Decorators and Metaprogramming
Decorators with Arguments
def retry(max_attempts=3, delay=1):
def decorator(func):
def wrapper(*args, **kwargs):
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_attempts - 1:
raise
time.sleep(delay)
return wrapper
return decorator
@retry(max_attempts=5, delay=2)
def unreliable_function():
import random
if random.random() < 0.7:
raise Exception("Random failure")
return "Success!"
Class-based Decorators
class RateLimiter:
def __init__(self, max_calls=10, window=60):
self.max_calls = max_calls
self.window = window
self.calls = []
def __call__(self, func):
def wrapper(*args, **kwargs):
now = time.time()
self.calls = [call_time for call_time in self.calls if now - call_time < self.window]
if len(self.calls) >= self.max_calls:
raise Exception("Rate limit exceeded")
self.calls.append(now)
return func(*args, **kwargs)
return wrapper
@RateLimiter(max_calls=5, window=10)
def api_call():
return "API response"
Property Factories
def typed_property(expected_type):
def property_decorator(func):
name = f"_{func.__name__}"
def getter(self):
return getattr(self, name)
def setter(self, value):
if not isinstance(value, expected_type):
raise TypeError(f"Expected {expected_type.__name__}, got {type(value).__name__}")
setattr(self, name, value)
return property(getter, setter)
return property_decorator
class Person:
@typed_property(str)
def name(self): pass
@typed_property(int)
def age(self): pass
person = Person()
person.name = "Alice"
person.age = 30
# person.age = "thirty" # TypeError
Metaclass for Automatic Registration
class RegisteredMeta(type):
registry = {}
def __new__(cls, name, bases, dct):
new_class = super().__new__(cls, name, bases, dct)
cls.registry[name] = new_class
return new_class
class Plugin(metaclass=RegisteredMeta):
pass
class DatabasePlugin(Plugin):
def connect(self):
return "Database connected"
class CachePlugin(Plugin):
def get(self, key):
return f"Value for {key}"
# Access all registered plugins
print(RegisteredMeta.registry)
# {'Plugin': <class '__main__.Plugin'>, 'DatabasePlugin': ...}
Functional Programming Concepts
Higher-Order Functions
from functools import partial, reduce
# Partial application
def multiply(x, y):
return x * y
double = partial(multiply, 2)
triple = partial(multiply, 3)
print(double(5)) # 10
print(triple(5)) # 15
# Reduce
numbers = [1, 2, 3, 4, 5]
sum_result = reduce(lambda x, y: x + y, numbers)
product = reduce(lambda x, y: x * y, numbers)
Currying
def curry(func):
def curried(*args, **kwargs):
if len(args) + len(kwargs) >= func.__code__.co_argcount:
return func(*args, **kwargs)
return lambda *more_args, **more_kwargs: curried(*(args + more_args), **{**kwargs, **more_kwargs})
return curried
@curry
def add_three(x, y, z):
return x + y + z
# Usage
add_1_2 = add_three(1)(2)
result = add_1_2(3) # 6
# Or partial application
add_1 = add_three(1)
result = add_1(2, 3) # 6
Immutable Data Structures
from collections import namedtuple
from dataclasses import dataclass, replace
# Named tuples (immutable)
Point = namedtuple('Point', ['x', 'y'])
p1 = Point(1, 2)
p2 = p1._replace(x=3) # Point(3, 2)
# Frozen dataclasses
@dataclass(frozen=True)
class ImmutablePerson:
name: str
age: int
person = ImmutablePerson("Alice", 30)
# person.age = 31 # FrozenInstanceError
new_person = replace(person, age=31)
Monads (Simple Implementation)
class Maybe:
def __init__(self, value):
self.value = value
def bind(self, func):
if self.value is None:
return Maybe(None)
return Maybe(func(self.value))
def __bool__(self):
return self.value is not None
def __repr__(self):
return f"Maybe({self.value})"
# Usage
def safe_divide(x, y):
return x / y if y != 0 else None
def safe_sqrt(x):
return x ** 0.5 if x >= 0 else None
result = (Maybe(16)
.bind(lambda x: safe_sqrt(x))
.bind(lambda x: safe_divide(x, 2)))
print(result) # Maybe(2.0)
Performance Optimization Techniques
Caching and Memoization
from functools import lru_cache, cache
import time
@lru_cache(maxsize=128)
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-1) + fibonacci(n-2)
# Python 3.9+ simple cache
@cache
def expensive_computation(x):
time.sleep(1) # Simulate expensive operation
return x ** 2
# Custom cache implementation
class MemoizedFunction:
def __init__(self, func):
self.func = func
self.cache = {}
def __call__(self, *args):
if args in self.cache:
return self.cache[args]
result = self.func(*args)
self.cache[args] = result
return result
@MemoizedFunction
def slow_function(x, y):
time.sleep(1)
return x + y
Profiling and Optimization
import cProfile
import pstats
from functools import wraps
def profile_function(func):
@wraps(func)
def wrapper(*args, **kwargs):
pr = cProfile.Profile()
pr.enable()
result = func(*args, **kwargs)
pr.disable()
stats = pstats.Stats(pr)
stats.sort_stats('cumulative')
stats.print_stats(10)
return result
return wrapper
@profile_function
def computational_task():
return sum(x**2 for x in range(100000))
Efficient Data Processing
# Generator for memory efficiency
def process_large_file(filename):
with open(filename, 'r') as f:
for line in f:
yield line.strip().upper()
# Use itertools for efficient iteration
from itertools import islice, chain, compress
def chunked(iterable, n):
"""Yield successive n-sized chunks from iterable."""
it = iter(iterable)
while True:
chunk = list(islice(it, n))
if not chunk:
break
yield chunk
# Process in chunks
for chunk in chunked(range(1000), 10):
# Process chunk
pass
Real-world Examples
Web Scraping with Async
import asyncio
import aiohttp
from bs4 import BeautifulSoup
async def fetch_url(session, url):
async with session.get(url) as response:
return await response.text()
async def scrape_urls(urls):
async with aiohttp.ClientSession() as session:
tasks = [fetch_url(session, url) for url in urls]
results = await asyncio.gather(*tasks)
return results
# Usage
urls = ['http://example.com', 'http://google.com']
results = asyncio.run(scrape_urls(urls))
Data Pipeline with Generators
def read_csv_lines(filename):
with open(filename, 'r') as f:
for line in f:
yield line.strip().split(',')
def filter_valid_records(records):
for record in records:
if len(record) >= 3 and record[0]: # Has ID
yield record
def transform_records(records):
for record in records:
yield {
'id': record[0],
'name': record[1],
'value': float(record[2]) if record[2] else 0.0
}
# Pipeline
pipeline = transform_records(
filter_valid_records(
read_csv_lines('data.csv')
)
)
# Process one record at a time
for record in pipeline:
print(record)
Custom ORM-like Implementation
class Model:
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, data):
return cls(**data)
def to_dict(self):
return {key: value for key, value in self.__dict__.items()
if not key.startswith('_')}
class QuerySet:
def __init__(self, model_class, data):
self.model_class = model_class
self.data = data
def filter(self, **kwargs):
filtered_data = []
for item in self.data:
if all(getattr(item, key, None) == value for key, value in kwargs.items()):
filtered_data.append(item)
return QuerySet(self.model_class, filtered_data)
def first(self):
return self.data[0] if self.data else None
def all(self):
return list(self.data)
class User(Model):
@classmethod
def objects(cls):
# Simulate database data
data = [
cls(id=1, name="Alice", age=30),
cls(id=2, name="Bob", age=25),
cls(id=3, name="Charlie", age=35)
]
return QuerySet(cls, data)
# Usage
users = User.objects().filter(age=30)
alice = users.first()
print(alice.name) # Alice
This comprehensive cheatsheet covers the advanced Python features you'll encounter in professional development. Each section includes practical examples and real-world scenarios to help you understand when and how to use these powerful features effectively.