Skip to main content

argparse.FileType

The FileType class is a factory for creating file object types that can be passed to the type argument of ArgumentParser.add_argument(). It automatically handles file opening, encoding, and error handling for command-line file arguments.

Quick Reference

Module: argparse
Type: Factory Class
Purpose: Create file objects from command-line arguments with automatic error handling
Documentation: argparse.FileType
Common Usage: File input/output arguments

Constructor

argparse.FileType(mode='r', bufsize=-1, encoding=None, errors=None)

Parameters:

  • mode (str): File opening mode ('r', 'w', 'a', 'rb', 'wb', etc.)
  • bufsize (int): Buffer size (-1 for default)
  • encoding (str): Text encoding (None for default)
  • errors (str): Error handling strategy ('strict', 'ignore', 'replace')

Returns: A callable that opens files with the specified parameters.

Basic Usage

Text Files

import argparse

# Reading text files
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=argparse.FileType('r'))
parser.add_argument('--output', type=argparse.FileType('w'))

# Usage: python script.py --input data.txt --output result.txt
args = parser.parse_args(['--input', 'data.txt', '--output', 'result.txt'])

# Files are automatically opened
content = args.input.read()
args.output.write("Processed: " + content)

# Don't forget to close files
args.input.close()
args.output.close()

Binary Files

parser = argparse.ArgumentParser()
parser.add_argument('--binary-input', type=argparse.FileType('rb'))
parser.add_argument('--binary-output', type=argparse.FileType('wb'))

args = parser.parse_args(['--binary-input', 'image.jpg',
'--binary-output', 'copy.jpg'])

# Copy binary file
data = args.binary_input.read()
args.binary_output.write(data)

args.binary_input.close()
args.binary_output.close()

Append Mode

parser = argparse.ArgumentParser()
parser.add_argument('--log', type=argparse.FileType('a'))

args = parser.parse_args(['--log', 'application.log'])

args.log.write("New log entry\n")
args.log.close()

Advanced Usage

Encoding and Error Handling

import argparse

parser = argparse.ArgumentParser()

# UTF-8 encoding with error handling
parser.add_argument('--input',
type=argparse.FileType('r', encoding='utf-8', errors='replace'))

# Handle files with potentially bad encoding
parser.add_argument('--legacy-file',
type=argparse.FileType('r', encoding='latin1'))

args = parser.parse_args(['--input', 'unicode.txt',
'--legacy-file', 'old_file.txt'])

# Read with proper encoding
content = args.input.read()
legacy_content = args.legacy_file.read()

args.input.close()
args.legacy_file.close()

Standard Input/Output

import sys
import argparse

parser = argparse.ArgumentParser()

# Use '-' to represent stdin/stdout
parser.add_argument('--input', type=argparse.FileType('r'), default='-')
parser.add_argument('--output', type=argparse.FileType('w'), default='-')

# Usage examples:
# python script.py # stdin -> stdout
# python script.py --input file.txt # file.txt -> stdout
# echo "data" | python script.py # stdin -> stdout

args = parser.parse_args()

# Process data
for line in args.input:
processed_line = line.upper()
args.output.write(processed_line)

# Only close if not stdin/stdout
if args.input != sys.stdin:
args.input.close()
if args.output != sys.stdout:
args.output.close()

Practical Examples

1. File Processing Script

import argparse
import json
import csv

def create_file_processor():
"""Create a file processing argument parser."""
parser = argparse.ArgumentParser(description="Process various file formats")

# Input files
parser.add_argument('--json-input',
type=argparse.FileType('r', encoding='utf-8'),
help='JSON input file')

parser.add_argument('--csv-input',
type=argparse.FileType('r', encoding='utf-8'),
help='CSV input file')

# Output files
parser.add_argument('--json-output',
type=argparse.FileType('w', encoding='utf-8'),
help='JSON output file')

parser.add_argument('--csv-output',
type=argparse.FileType('w', encoding='utf-8'),
help='CSV output file')

# Log file
parser.add_argument('--log',
type=argparse.FileType('a', encoding='utf-8'),
default='process.log',
help='Log file (default: process.log)')

return parser

def process_files(args):
"""Process files based on arguments."""
import datetime

# Log start
timestamp = datetime.datetime.now().isoformat()
args.log.write(f"{timestamp}: Processing started\n")

try:
# Process JSON input
if args.json_input:
data = json.load(args.json_input)
args.log.write(f"Loaded JSON with {len(data)} items\n")

# Transform data (example)
processed_data = [
{**item, 'processed': True} for item in data
]

# Write JSON output
if args.json_output:
json.dump(processed_data, args.json_output, indent=2)
args.log.write("JSON output written\n")

# Process CSV input
if args.csv_input:
reader = csv.DictReader(args.csv_input)
rows = list(reader)
args.log.write(f"Loaded CSV with {len(rows)} rows\n")

# Write CSV output
if args.csv_output:
if rows:
writer = csv.DictWriter(args.csv_output,
fieldnames=rows[0].keys())
writer.writeheader()
writer.writerows(rows)
args.log.write("CSV output written\n")

args.log.write(f"{timestamp}: Processing completed successfully\n")

except Exception as e:
args.log.write(f"{timestamp}: Error - {e}\n")
raise

finally:
# Close all file handles
for attr_name in ['json_input', 'csv_input',
'json_output', 'csv_output', 'log']:
file_obj = getattr(args, attr_name, None)
if file_obj and not file_obj.closed:
file_obj.close()

# Usage
if __name__ == "__main__":
parser = create_file_processor()
args = parser.parse_args()
process_files(args)

2. Configuration File Merger

import argparse
import configparser
import json

def create_config_merger():
"""Create a configuration file merger."""
parser = argparse.ArgumentParser(
description="Merge multiple configuration files"
)

# Input configuration files
parser.add_argument('--ini-files',
type=argparse.FileType('r'),
nargs='+',
help='INI configuration files to merge')

parser.add_argument('--json-files',
type=argparse.FileType('r', encoding='utf-8'),
nargs='*',
help='JSON configuration files to merge')

# Output
parser.add_argument('--output',
type=argparse.FileType('w', encoding='utf-8'),
default='-',
help='Output file (default: stdout)')

parser.add_argument('--format',
choices=['json', 'ini'],
default='json',
help='Output format')

return parser

def merge_configs(args):
"""Merge configuration files."""
merged_config = {}

try:
# Process INI files
if args.ini_files:
for ini_file in args.ini_files:
config = configparser.ConfigParser()
config.read_file(ini_file)

# Convert to dict
for section in config.sections():
if section not in merged_config:
merged_config[section] = {}
merged_config[section].update(dict(config[section]))

ini_file.close()

# Process JSON files
if args.json_files:
for json_file in args.json_files:
config = json.load(json_file)

# Merge JSON config
def deep_merge(target, source):
for key, value in source.items():
if key in target and isinstance(target[key], dict) and isinstance(value, dict):
deep_merge(target[key], value)
else:
target[key] = value

deep_merge(merged_config, config)
json_file.close()

# Output merged configuration
if args.format == 'json':
json.dump(merged_config, args.output, indent=2)
elif args.format == 'ini':
config = configparser.ConfigParser()
for section, options in merged_config.items():
config.add_section(section)
for option, value in options.items():
config.set(section, option, str(value))
config.write(args.output)

finally:
if args.output != sys.stdout:
args.output.close()

# Usage example
if __name__ == "__main__":
parser = create_config_merger()
args = parser.parse_args()
merge_configs(args)

3. Log File Analyzer

import argparse
import re
from datetime import datetime
from collections import defaultdict, Counter

def create_log_analyzer():
"""Create a log file analyzer."""
parser = argparse.ArgumentParser(description="Analyze log files")

# Input log files
parser.add_argument('log_files',
type=argparse.FileType('r', encoding='utf-8', errors='replace'),
nargs='+',
help='Log files to analyze')

# Output reports
parser.add_argument('--summary',
type=argparse.FileType('w', encoding='utf-8'),
help='Summary report output file')

parser.add_argument('--detailed',
type=argparse.FileType('w', encoding='utf-8'),
help='Detailed report output file')

# Filters
parser.add_argument('--level',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help='Filter by log level')

parser.add_argument('--pattern',
help='Filter by regex pattern')

return parser

def analyze_logs(args):
"""Analyze log files and generate reports."""
stats = {
'total_lines': 0,
'by_level': Counter(),
'by_hour': Counter(),
'errors': [],
'patterns': defaultdict(int)
}

# Compile regex pattern if provided
pattern_regex = None
if args.pattern:
pattern_regex = re.compile(args.pattern, re.IGNORECASE)

try:
# Process each log file
for log_file in args.log_files:
for line_num, line in enumerate(log_file, 1):
line = line.strip()
if not line:
continue

stats['total_lines'] += 1

# Extract log level
level_match = re.search(r'(DEBUG|INFO|WARNING|ERROR|CRITICAL)', line)
if level_match:
level = level_match.group(1)

# Apply level filter
if args.level and level != args.level:
continue

stats['by_level'][level] += 1

# Extract timestamp and hour
time_match = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}):', line)
if time_match:
hour = time_match.group(1)
stats['by_hour'][hour] += 1

# Apply pattern filter
if pattern_regex:
if pattern_regex.search(line):
stats['patterns'][args.pattern] += 1

# Collect errors
if 'ERROR' in line or 'CRITICAL' in line:
stats['errors'].append({
'file': log_file.name,
'line': line_num,
'content': line
})

log_file.close()

# Generate summary report
if args.summary:
args.summary.write("Log Analysis Summary\n")
args.summary.write("===================\n\n")
args.summary.write(f"Total lines processed: {stats['total_lines']}\n\n")

args.summary.write("Log levels:\n")
for level, count in stats['by_level'].most_common():
args.summary.write(f" {level}: {count}\n")

args.summary.write(f"\nTotal errors/critical: {len(stats['errors'])}\n")

if pattern_regex:
args.summary.write(f"\nPattern matches: {stats['patterns'][args.pattern]}\n")

args.summary.close()

# Generate detailed report
if args.detailed:
args.detailed.write("Detailed Log Analysis\n")
args.detailed.write("====================\n\n")

# Hourly distribution
args.detailed.write("Hourly distribution:\n")
for hour in sorted(stats['by_hour'].keys()):
count = stats['by_hour'][hour]
args.detailed.write(f" {hour}: {count}\n")

# Error details
args.detailed.write(f"\nError details ({len(stats['errors'])} total):\n")
for error in stats['errors'][:10]: # Limit to first 10
args.detailed.write(f" {error['file']}:{error['line']}: {error['content']}\n")

if len(stats['errors']) > 10:
args.detailed.write(f" ... and {len(stats['errors']) - 10} more errors\n")

args.detailed.close()

except Exception as e:
print(f"Error during analysis: {e}")
# Ensure files are closed
for file_obj in [args.summary, args.detailed]:
if file_obj and not file_obj.closed:
file_obj.close()
raise

# Usage example
if __name__ == "__main__":
parser = create_log_analyzer()
args = parser.parse_args()
analyze_logs(args)

Error Handling

Custom FileType with Validation

import argparse
import os

class ValidatedFileType(argparse.FileType):
"""FileType with additional validation."""

def __init__(self, mode='r', **kwargs):
self.check_exists = kwargs.pop('check_exists', True)
self.max_size = kwargs.pop('max_size', None) # bytes
super().__init__(mode, **kwargs)

def __call__(self, string):
# Check if file exists (for reading)
if 'r' in self._mode and self.check_exists:
if not os.path.exists(string):
raise argparse.ArgumentTypeError(f"File does not exist: {string}")

# Check file size
if self.max_size and os.path.exists(string):
size = os.path.getsize(string)
if size > self.max_size:
raise argparse.ArgumentTypeError(
f"File too large: {size} bytes (max: {self.max_size})"
)

# Call parent implementation
return super().__call__(string)

# Usage
parser = argparse.ArgumentParser()
parser.add_argument('--input',
type=ValidatedFileType('r', max_size=1024*1024)) # 1MB max
parser.add_argument('--output',
type=ValidatedFileType('w', check_exists=False))

args = parser.parse_args(['--input', 'small_file.txt',
'--output', 'output.txt'])

Context Manager Integration

import argparse
from contextlib import contextmanager

class ContextFileType(argparse.FileType):
"""FileType that works well with context managers."""

def __call__(self, string):
# Return a context manager instead of raw file object
return FileContext(super().__call__(string))

class FileContext:
"""Context manager wrapper for file objects."""

def __init__(self, file_obj):
self.file_obj = file_obj

def __enter__(self):
return self.file_obj

def __exit__(self, exc_type, exc_val, exc_tb):
if not self.file_obj.closed:
self.file_obj.close()

def __getattr__(self, name):
return getattr(self.file_obj, name)

# Usage with context manager
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=ContextFileType('r'))
parser.add_argument('--output', type=ContextFileType('w'))

args = parser.parse_args(['--input', 'data.txt', '--output', 'result.txt'])

# Automatic file closing with context managers
with args.input as infile, args.output as outfile:
data = infile.read()
outfile.write(data.upper())
# Files are automatically closed here

Performance Considerations

Lazy File Opening

import argparse

class LazyFileType:
"""File type that defers opening until actually used."""

def __init__(self, mode='r', **kwargs):
self.mode = mode
self.kwargs = kwargs
self._file_cache = {}

def __call__(self, string):
return LazyFile(string, self.mode, **self.kwargs)

class LazyFile:
"""Lazy file object that opens on first access."""

def __init__(self, filename, mode, **kwargs):
self.filename = filename
self.mode = mode
self.kwargs = kwargs
self._file_obj = None

def _ensure_open(self):
if self._file_obj is None:
self._file_obj = open(self.filename, self.mode, **self.kwargs)

def read(self, *args, **kwargs):
self._ensure_open()
return self._file_obj.read(*args, **kwargs)

def write(self, *args, **kwargs):
self._ensure_open()
return self._file_obj.write(*args, **kwargs)

def close(self):
if self._file_obj:
self._file_obj.close()
self._file_obj = None

def __getattr__(self, name):
self._ensure_open()
return getattr(self._file_obj, name)

# Usage - files only opened when actually used
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=LazyFileType('r'))
parser.add_argument('--output', type=LazyFileType('w'))

args = parser.parse_args(['--input', 'data.txt', '--output', 'result.txt'])
# Files not opened yet

# File opened here on first read
content = args.input.read()
# File opened here on first write
args.output.write(content)

args.input.close()
args.output.close()

Testing and Debugging

import unittest
import tempfile
import os
import argparse

class TestFileType(unittest.TestCase):
"""Test argparse FileType functionality."""

def setUp(self):
"""Create temporary files for testing."""
self.temp_dir = tempfile.mkdtemp()
self.input_file = os.path.join(self.temp_dir, 'input.txt')
self.output_file = os.path.join(self.temp_dir, 'output.txt')

# Create input file with test content
with open(self.input_file, 'w') as f:
f.write("Test content\n")

def tearDown(self):
"""Clean up temporary files."""
import shutil
shutil.rmtree(self.temp_dir)

def test_file_reading(self):
"""Test reading files with FileType."""
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=argparse.FileType('r'))

args = parser.parse_args(['--input', self.input_file])
content = args.input.read()
args.input.close()

self.assertEqual(content, "Test content\n")

def test_file_writing(self):
"""Test writing files with FileType."""
parser = argparse.ArgumentParser()
parser.add_argument('--output', type=argparse.FileType('w'))

args = parser.parse_args(['--output', self.output_file])
args.output.write("New content")
args.output.close()

# Verify content was written
with open(self.output_file, 'r') as f:
content = f.read()

self.assertEqual(content, "New content")

def test_nonexistent_file_error(self):
"""Test error handling for nonexistent files."""
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=argparse.FileType('r'))

with self.assertRaises(SystemExit):
parser.parse_args(['--input', 'nonexistent.txt'])

if __name__ == '__main__':
unittest.main()
  • argparse.ArgumentParser - Main parser class that uses FileType
  • argparse.Action - Custom actions for file handling
  • argparse.Namespace - Container for parsed file objects

Additional Resources

The FileType class provides a convenient way to handle file arguments in command-line applications, with automatic error handling and support for various file modes and encodings.