Master reading, writing, and managing files in Python
Working with files is essential for any real-world application. Whether you're processing data, saving user preferences, or generating reports, file operations are fundamental to persistent data storage.
Python makes file handling straightforward and safe with built-in functions and context managers. You'll learn to read, write, and manipulate files with confidence.
# Opening a file (manual close - not recommended)
file = open('example.txt', 'r') # 'r' = read mode
content = file.read()
file.close() # Must remember to close!
# Better: Using with statement (recommended)
with open('example.txt', 'r') as file:
content = file.read()
print(content)
# File automatically closes when leaving the with block
# File modes
# 'r' - Read (default) - Error if file doesn't exist
# 'w' - Write - Creates new file or overwrites existing
# 'a' - Append - Adds to end of file
# 'x' - Exclusive create - Error if file exists
# 'r+' - Read and write
# 'b' - Binary mode (e.g., 'rb', 'wb')
# 't' - Text mode (default)
with statement. It automatically closes files even if errors occur, preventing resource leaks.
# Read entire file as a string
with open('data.txt', 'r') as file:
content = file.read()
print(content)
# Read entire file as list of lines
with open('data.txt', 'r') as file:
lines = file.readlines() # Returns list with \n characters
for line in lines:
print(line.strip()) # Remove \n
# Best for large files
with open('large_file.txt', 'r') as file:
for line in file: # Iterates one line at a time
print(line.strip())
# Read specific number of lines
with open('data.txt', 'r') as file:
first_line = file.readline()
second_line = file.readline()
print(f"First: {first_line.strip()}")
print(f"Second: {second_line.strip()}")
# Read in chunks (for very large files)
with open('huge_file.txt', 'r') as file:
while True:
chunk = file.read(1024) # Read 1KB at a time
if not chunk:
break
process_chunk(chunk)
def analyze_log_file(filename):
"""Analyze log file and count error occurrences."""
error_count = 0
warning_count = 0
try:
with open(filename, 'r') as file:
for line_num, line in enumerate(file, 1):
if 'ERROR' in line:
error_count += 1
print(f"Line {line_num}: {line.strip()}")
elif 'WARNING' in line:
warning_count += 1
print(f"\nSummary:")
print(f"Errors: {error_count}")
print(f"Warnings: {warning_count}")
except FileNotFoundError:
print(f"Error: {filename} not found")
except PermissionError:
print(f"Error: No permission to read {filename}")
# Usage
analyze_log_file('app.log')
# Write to file (overwrites existing content)
with open('output.txt', 'w') as file:
file.write("Hello, World!\n")
file.write("Python is awesome!\n")
# Write multiple lines at once
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open('output.txt', 'w') as file:
file.writelines(lines)
# Append to file (adds to end)
with open('output.txt', 'a') as file:
file.write("This is appended\n")
# Writing with print function
with open('output.txt', 'w') as file:
print("Hello", "World", file=file) # Uses print's formatting
def generate_report(data, filename='report.txt'):
"""Generate a formatted text report."""
with open(filename, 'w') as file:
# Header
file.write("=" * 50 + "\n")
file.write("SALES REPORT\n")
file.write("=" * 50 + "\n\n")
# Data
total = 0
for item, amount in data.items():
file.write(f"{item:<30} ${amount:>8.2f}\n")
total += amount
# Footer
file.write("\n" + "-" * 50 + "\n")
file.write(f"{'TOTAL':<30} ${total:>8.2f}\n")
file.write("=" * 50 + "\n")
print(f"Report generated: {filename}")
# Usage
sales_data = {
'Product A': 1250.50,
'Product B': 890.75,
'Product C': 2100.00
}
generate_report(sales_data)
import os
from pathlib import Path
# os.path module (traditional)
current_dir = os.getcwd()
file_path = os.path.join(current_dir, 'data', 'file.txt')
print(f"File path: {file_path}")
# Check if file/directory exists
if os.path.exists('data.txt'):
print("File exists")
# Get file info
if os.path.isfile('data.txt'):
size = os.path.getsize('data.txt')
print(f"File size: {size} bytes")
# Split path components
directory, filename = os.path.split('/home/user/data.txt')
name, extension = os.path.splitext(filename)
# pathlib module (modern, recommended)
path = Path('data') / 'subdir' / 'file.txt' # Works on all OS
print(path)
# Path operations
if path.exists():
print(f"Size: {path.stat().st_size}")
print(f"Name: {path.name}")
print(f"Extension: {path.suffix}")
print(f"Parent: {path.parent}")
# Create directories
Path('data/output').mkdir(parents=True, exist_ok=True)
# List files in directory
for file_path in Path('data').glob('*.txt'):
print(file_path)
import csv
# Reading CSV
with open('data.csv', 'r') as file:
csv_reader = csv.reader(file)
header = next(csv_reader) # Skip header
for row in csv_reader:
print(row) # Each row is a list
# Reading CSV as dictionaries
with open('data.csv', 'r') as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
print(row) # Each row is a dictionary
print(f"Name: {row['name']}, Age: {row['age']}")
# Writing CSV
data = [
['Name', 'Age', 'City'],
['Alice', 30, 'NYC'],
['Bob', 25, 'LA'],
['Charlie', 35, 'Chicago']
]
with open('output.csv', 'w', newline='') as file:
csv_writer = csv.writer(file)
csv_writer.writerows(data)
# Writing CSV from dictionaries
data = [
{'name': 'Alice', 'age': 30, 'city': 'NYC'},
{'name': 'Bob', 'age': 25, 'city': 'LA'}
]
with open('output.csv', 'w', newline='') as file:
fieldnames = ['name', 'age', 'city']
csv_writer = csv.DictWriter(file, fieldnames=fieldnames)
csv_writer.writeheader()
csv_writer.writerows(data)
import csv
def process_sales_csv(input_file, output_file):
"""Read sales data, calculate totals, write summary."""
sales_by_region = {}
# Read and aggregate
with open(input_file, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
region = row['region']
amount = float(row['amount'])
sales_by_region[region] = sales_by_region.get(region, 0) + amount
# Write summary
with open(output_file, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Region', 'Total Sales'])
for region, total in sorted(sales_by_region.items()):
writer.writerow([region, f'${total:,.2f}'])
print(f"Summary written to {output_file}")
# Usage
process_sales_csv('sales.csv', 'summary.csv')
import json
# Reading JSON
with open('data.json', 'r') as file:
data = json.load(file) # Parse JSON to Python object
print(data)
# Writing JSON
data = {
'name': 'Alice',
'age': 30,
'skills': ['Python', 'JavaScript', 'SQL'],
'active': True
}
with open('output.json', 'w') as file:
json.dump(data, file, indent=4) # Pretty print with 4 spaces
# Working with JSON strings
json_string = json.dumps(data, indent=2)
parsed_data = json.loads(json_string)
# Practical example: Configuration file
config = {
'database': {
'host': 'localhost',
'port': 5432,
'name': 'mydb'
},
'api': {
'key': 'secret123',
'timeout': 30
}
}
# Save config
with open('config.json', 'w') as file:
json.dump(config, file, indent=4)
# Load config
with open('config.json', 'r') as file:
loaded_config = json.load(file)
db_host = loaded_config['database']['host']
import json
from datetime import datetime
class UserDataManager:
def __init__(self, filename='users.json'):
self.filename = filename
self.users = self.load_users()
def load_users(self):
"""Load users from JSON file."""
try:
with open(self.filename, 'r') as file:
return json.load(file)
except FileNotFoundError:
return {}
def save_users(self):
"""Save users to JSON file."""
with open(self.filename, 'w') as file:
json.dump(self.users, file, indent=4)
def add_user(self, username, email):
"""Add a new user."""
self.users[username] = {
'email': email,
'created': datetime.now().isoformat(),
'active': True
}
self.save_users()
def get_user(self, username):
"""Get user data."""
return self.users.get(username)
def list_users(self):
"""List all users."""
for username, data in self.users.items():
print(f"{username}: {data['email']}")
# Usage
manager = UserDataManager()
manager.add_user('alice', 'alice@example.com')
manager.add_user('bob', 'bob@example.com')
manager.list_users()
encoding='utf-8' for text filesimport shutil
from pathlib import Path
from datetime import datetime
def backup_files(source_dir, backup_dir):
"""Create timestamped backup of all files in directory."""
source = Path(source_dir)
# Create backup directory with timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup = Path(backup_dir) / f"backup_{timestamp}"
backup.mkdir(parents=True, exist_ok=True)
# Copy all files
file_count = 0
for file_path in source.glob('*'):
if file_path.is_file():
shutil.copy2(file_path, backup / file_path.name)
file_count += 1
print(f"✅ Backed up {file_count} files to {backup}")
return backup
# Usage
backup_files('data', 'backups')
With file I/O mastered, you can now:
Test your understanding of file I/O!