File I/O in Python

Working with files is essential for any real-world application. Whether you're processing data, saving user preferences, or generating reports, file operations are fundamental to persistent data storage.

Python makes file handling straightforward and safe with built-in functions and context managers. You'll learn to read, write, and manipulate files with confidence.

1. Opening and Closing Files

Basic File Operations

# Opening a file (manual close - not recommended)
file = open('example.txt', 'r')  # 'r' = read mode
content = file.read()
file.close()  # Must remember to close!

# Better: Using with statement (recommended)
with open('example.txt', 'r') as file:
    content = file.read()
    print(content)
# File automatically closes when leaving the with block

# File modes
# 'r'  - Read (default) - Error if file doesn't exist
# 'w'  - Write - Creates new file or overwrites existing
# 'a'  - Append - Adds to end of file
# 'x'  - Exclusive create - Error if file exists
# 'r+' - Read and write
# 'b'  - Binary mode (e.g., 'rb', 'wb')
# 't'  - Text mode (default)

💡 Best Practice: Always use the with statement. It automatically closes files even if errors occur, preventing resource leaks.

2. Reading Files

Reading Entire File

# Read entire file as a string
with open('data.txt', 'r') as file:
    content = file.read()
    print(content)

# Read entire file as list of lines
with open('data.txt', 'r') as file:
    lines = file.readlines()  # Returns list with \n characters
    for line in lines:
        print(line.strip())  # Remove \n

Reading Line by Line (Memory Efficient)

# Best for large files
with open('large_file.txt', 'r') as file:
    for line in file:  # Iterates one line at a time
        print(line.strip())

# Read specific number of lines
with open('data.txt', 'r') as file:
    first_line = file.readline()
    second_line = file.readline()
    print(f"First: {first_line.strip()}")
    print(f"Second: {second_line.strip()}")

# Read in chunks (for very large files)
with open('huge_file.txt', 'r') as file:
    while True:
        chunk = file.read(1024)  # Read 1KB at a time
        if not chunk:
            break
        process_chunk(chunk)

Practical Example: Log File Analyzer

def analyze_log_file(filename):
    """Analyze log file and count error occurrences."""
    error_count = 0
    warning_count = 0
    
    try:
        with open(filename, 'r') as file:
            for line_num, line in enumerate(file, 1):
                if 'ERROR' in line:
                    error_count += 1
                    print(f"Line {line_num}: {line.strip()}")
                elif 'WARNING' in line:
                    warning_count += 1
        
        print(f"\nSummary:")
        print(f"Errors: {error_count}")
        print(f"Warnings: {warning_count}")
        
    except FileNotFoundError:
        print(f"Error: {filename} not found")
    except PermissionError:
        print(f"Error: No permission to read {filename}")

# Usage
analyze_log_file('app.log')

3. Writing to Files

Writing Text

# Write to file (overwrites existing content)
with open('output.txt', 'w') as file:
    file.write("Hello, World!\n")
    file.write("Python is awesome!\n")

# Write multiple lines at once
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open('output.txt', 'w') as file:
    file.writelines(lines)

# Append to file (adds to end)
with open('output.txt', 'a') as file:
    file.write("This is appended\n")

# Writing with print function
with open('output.txt', 'w') as file:
    print("Hello", "World", file=file)  # Uses print's formatting

Practical Example: Report Generator

def generate_report(data, filename='report.txt'):
    """Generate a formatted text report."""
    with open(filename, 'w') as file:
        # Header
        file.write("=" * 50 + "\n")
        file.write("SALES REPORT\n")
        file.write("=" * 50 + "\n\n")
        
        # Data
        total = 0
        for item, amount in data.items():
            file.write(f"{item:<30} ${amount:>8.2f}\n")
            total += amount
        
        # Footer
        file.write("\n" + "-" * 50 + "\n")
        file.write(f"{'TOTAL':<30} ${total:>8.2f}\n")
        file.write("=" * 50 + "\n")
    
    print(f"Report generated: {filename}")

# Usage
sales_data = {
    'Product A': 1250.50,
    'Product B': 890.75,
    'Product C': 2100.00
}
generate_report(sales_data)

4. Working with File Paths

import os
from pathlib import Path

# os.path module (traditional)
current_dir = os.getcwd()
file_path = os.path.join(current_dir, 'data', 'file.txt')
print(f"File path: {file_path}")

# Check if file/directory exists
if os.path.exists('data.txt'):
    print("File exists")

# Get file info
if os.path.isfile('data.txt'):
    size = os.path.getsize('data.txt')
    print(f"File size: {size} bytes")

# Split path components
directory, filename = os.path.split('/home/user/data.txt')
name, extension = os.path.splitext(filename)

# pathlib module (modern, recommended)
path = Path('data') / 'subdir' / 'file.txt'  # Works on all OS
print(path)

# Path operations
if path.exists():
    print(f"Size: {path.stat().st_size}")
    print(f"Name: {path.name}")
    print(f"Extension: {path.suffix}")
    print(f"Parent: {path.parent}")

# Create directories
Path('data/output').mkdir(parents=True, exist_ok=True)

# List files in directory
for file_path in Path('data').glob('*.txt'):
    print(file_path)

5. Working with CSV Files

import csv

# Reading CSV
with open('data.csv', 'r') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)  # Skip header
    
    for row in csv_reader:
        print(row)  # Each row is a list

# Reading CSV as dictionaries
with open('data.csv', 'r') as file:
    csv_reader = csv.DictReader(file)
    
    for row in csv_reader:
        print(row)  # Each row is a dictionary
        print(f"Name: {row['name']}, Age: {row['age']}")

# Writing CSV
data = [
    ['Name', 'Age', 'City'],
    ['Alice', 30, 'NYC'],
    ['Bob', 25, 'LA'],
    ['Charlie', 35, 'Chicago']
]

with open('output.csv', 'w', newline='') as file:
    csv_writer = csv.writer(file)
    csv_writer.writerows(data)

# Writing CSV from dictionaries
data = [
    {'name': 'Alice', 'age': 30, 'city': 'NYC'},
    {'name': 'Bob', 'age': 25, 'city': 'LA'}
]

with open('output.csv', 'w', newline='') as file:
    fieldnames = ['name', 'age', 'city']
    csv_writer = csv.DictWriter(file, fieldnames=fieldnames)
    
    csv_writer.writeheader()
    csv_writer.writerows(data)

Practical Example: CSV Data Processor

import csv

def process_sales_csv(input_file, output_file):
    """Read sales data, calculate totals, write summary."""
    sales_by_region = {}
    
    # Read and aggregate
    with open(input_file, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            region = row['region']
            amount = float(row['amount'])
            sales_by_region[region] = sales_by_region.get(region, 0) + amount
    
    # Write summary
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Region', 'Total Sales'])
        
        for region, total in sorted(sales_by_region.items()):
            writer.writerow([region, f'${total:,.2f}'])
    
    print(f"Summary written to {output_file}")

# Usage
process_sales_csv('sales.csv', 'summary.csv')

6. Working with JSON Files

import json

# Reading JSON
with open('data.json', 'r') as file:
    data = json.load(file)  # Parse JSON to Python object
    print(data)

# Writing JSON
data = {
    'name': 'Alice',
    'age': 30,
    'skills': ['Python', 'JavaScript', 'SQL'],
    'active': True
}

with open('output.json', 'w') as file:
    json.dump(data, file, indent=4)  # Pretty print with 4 spaces

# Working with JSON strings
json_string = json.dumps(data, indent=2)
parsed_data = json.loads(json_string)

# Practical example: Configuration file
config = {
    'database': {
        'host': 'localhost',
        'port': 5432,
        'name': 'mydb'
    },
    'api': {
        'key': 'secret123',
        'timeout': 30
    }
}

# Save config
with open('config.json', 'w') as file:
    json.dump(config, file, indent=4)

# Load config
with open('config.json', 'r') as file:
    loaded_config = json.load(file)
    db_host = loaded_config['database']['host']

Practical Example: User Data Manager

import json
from datetime import datetime

class UserDataManager:
    def __init__(self, filename='users.json'):
        self.filename = filename
        self.users = self.load_users()
    
    def load_users(self):
        """Load users from JSON file."""
        try:
            with open(self.filename, 'r') as file:
                return json.load(file)
        except FileNotFoundError:
            return {}
    
    def save_users(self):
        """Save users to JSON file."""
        with open(self.filename, 'w') as file:
            json.dump(self.users, file, indent=4)
    
    def add_user(self, username, email):
        """Add a new user."""
        self.users[username] = {
            'email': email,
            'created': datetime.now().isoformat(),
            'active': True
        }
        self.save_users()
    
    def get_user(self, username):
        """Get user data."""
        return self.users.get(username)
    
    def list_users(self):
        """List all users."""
        for username, data in self.users.items():
            print(f"{username}: {data['email']}")

# Usage
manager = UserDataManager()
manager.add_user('alice', 'alice@example.com')
manager.add_user('bob', 'bob@example.com')
manager.list_users()

7. Best Practices

✅ Do's

Use with statement: Ensures files are properly closed
Handle exceptions: Use try-except for file operations
Use pathlib: More intuitive than os.path for modern Python
Specify encoding: Use encoding='utf-8' for text files
Read in chunks: For large files to save memory
Validate paths: Check if files exist before opening

❌ Don'ts

Don't forget to close: Without with, files stay open
Don't use 'w' carelessly: It overwrites existing files
Don't load huge files: Use line-by-line reading
Don't hardcode paths: Use os.path.join or Path
Don't ignore errors: Always handle FileNotFoundError, etc.

8. Practice Exercise: File Backup System

Click to see solution

import shutil
from pathlib import Path
from datetime import datetime

def backup_files(source_dir, backup_dir):
    """Create timestamped backup of all files in directory."""
    source = Path(source_dir)
    
    # Create backup directory with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup = Path(backup_dir) / f"backup_{timestamp}"
    backup.mkdir(parents=True, exist_ok=True)
    
    # Copy all files
    file_count = 0
    for file_path in source.glob('*'):
        if file_path.is_file():
            shutil.copy2(file_path, backup / file_path.name)
            file_count += 1
    
    print(f"✅ Backed up {file_count} files to {backup}")
    return backup

# Usage
backup_files('data', 'backups')

Key Takeaways

✅ What You've Learned

File Operations: Open, read, write with proper resource management
Context Managers: Using with for automatic cleanup
Path Handling: Working with file paths across different OS
CSV Files: Reading and writing structured data
JSON Files: Working with configuration and data files
Best Practices: Error handling and efficient file processing

📝 Knowledge Check

Test your understanding of file I/O!

Question 1: Which mode opens a file for reading?

A) 'w'

B) 'r'

C) 'a'

D) 'x'

Question 2: What does the 'with' statement ensure?

A) File is created

B) File is readable

C) File is properly closed

D) File is deleted

Question 3: Which method reads all lines into a list?

A) readlines()

B) read()

C) readline()

D) getlines()

Question 4: What mode appends to a file without erasing it?

A) 'r'

B) 'w'

C) 'r+'

D) 'a'

Question 5: Which module helps work with file paths?

A) sys

B) os

C) file

D) path

📚 What You'll Learn