Updating Photo Metadata with ExifTool and Python
In this post, I’ll show you how I automated the process of updating photo metadata exported from Google Takeout. If you’re dealing with JSON files and media files from Google Photos, this guide will help you make sense of it all.
What You’ll Need
- Python (preferably Python 3.7 or higher).
- ExifTool for editing metadata in your media files:
- Download ExifTool from Phil Harvey’s website.
- Extract the
exiftool(-k).exe
file and rename it toexiftool.exe
. - Save it in a directory such as
C:\Tools\ExifTool
.
- My Python script (detailed below) to process and apply metadata.
Step 1: Install Python and Required Libraries
- Download and install Python from the official Python website.
- Install any required libraries. This script uses
subprocess
andsignal
, which are built-in with Python.
Step 2: Set Up ExifTool
- Download the ExifTool executable from here.
- Extract the ZIP file and rename the executable from
exiftool(-k).exe
toexiftool.exe
. - Move
exiftool.exe
to a location likeC:\Tools\ExifTool
. - Confirm ExifTool is installed properly by running this command in the terminal:
C:\Tools\ExifTool\exiftool.exe -ver
Step 3: Download and Run the Script
Save the following Python script in a .py
file (e.g., process_google_takeout.py
):
import os
import json
import subprocess
from datetime import datetime
import logging
import signal
import sys
from collections import defaultdict
import time
# Configure logging
logging.basicConfig(
filename='process_log_xmp.txt',
level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s'
)
# Path to ExifTool
exiftool_path = r"C:\Tools\ExifTool\exiftool.exe" # Update this path if needed
# File extensions to process
media_extensions = {".jpg", ".jpeg", ".png", ".heic", ".webp", ".gif", ".arw",
".mp4", ".mov", ".mkv", ".mts", ".m2ts", ".avi", ".wmv", ".flv", ".webm", ".tiff"}
# Metrics
metrics = {
"total_files": 0,
"skipped_files": 0,
"processed_files": 0,
"extension_breakdown": defaultdict(int),
"start_time": time.time(),
}
def log_message(message):
"""Log a message to both the console and log file."""
logging.info(message)
print(message)
def log_skipped(file_path, reason):
"""Log skipped files."""
log_message(f"SKIPPED: {file_path} - {reason}")
metrics["skipped_files"] += 1
def log_directory_complete(directory):
"""Log completion of a directory's processing."""
log_message(f"COMPLETED: Processing directory {directory}")
def process_metadata(file_path, metadata):
"""Apply metadata to a media file using ExifTool."""
exiftool_args = [
exiftool_path,
"-overwrite_original",
f"-XMP:DateTimeOriginal={metadata['DateTimeOriginal']}",
f"-XMP:CreateDate={metadata['CreateDate']}",
f"-XMP:ModifyDate={metadata['ModifyDate']}",
]
# Add keywords as XMP Subject
if metadata["Keywords"]:
exiftool_args.append(f"-XMP:Subject={' '.join(metadata['Keywords'])}")
# Add a comment with additional information
if metadata["Comment"]:
exiftool_args.append(f"-XMP:Description={metadata['Comment']}")
# Target file
exiftool_args.append(file_path)
try:
subprocess.run(exiftool_args, check=True, capture_output=True, text=True)
metrics["processed_files"] += 1
ext = os.path.splitext(file_path)[-1].lower()
metrics["extension_breakdown"][ext] += 1
except subprocess.CalledProcessError as e:
log_skipped(file_path, f"Metadata update failed: {e.stderr}")
def match_media_file(json_file, files_in_directory):
"""Match a media file to its corresponding JSON file using strict name matching."""
base_name = os.path.splitext(json_file)[0].lower()
for file in files_in_directory:
media_name, media_ext = os.path.splitext(file)
if media_name.lower() == base_name and media_ext.lower() in media_extensions:
return file
return None
def process_directory(directory, include_subfolders):
"""Process all JSON and media files in a directory."""
files = os.listdir(directory)
for file in files:
file_path = os.path.join(directory, file)
# Recurse into subdirectories if applicable
if os.path.isdir(file_path) and include_subfolders:
process_directory(file_path, include_subfolders)
continue
# Process only JSON files
if file.lower().endswith(".json"):
metrics["total_files"] += 1
json_path = file_path
media_file = match_media_file(file, files)
if not media_file:
log_skipped(json_path, "No matching media file found")
continue
media_file_path = os.path.join(directory, media_file)
try:
with open(json_path, "r", encoding="utf-8") as f:
metadata_json = json.load(f)
except json.JSONDecodeError as e:
log_skipped(json_path, f"Invalid JSON format: {e}")
continue
# Extract metadata
photo_taken_time = metadata_json.get("photoTakenTime", {}).get("timestamp")
creation_time = metadata_json.get("creationTime", {}).get("timestamp")
date_taken = datetime.fromtimestamp(int(photo_taken_time)).strftime("%Y-%m-%dT%H:%M:%S") if photo_taken_time else None
create_date = datetime.fromtimestamp(int(creation_time)).strftime("%Y-%m-%dT%H:%M:%S") if creation_time else None
keywords = metadata_json.get("people", [])
keywords_list = [person.get("name") for person in keywords if person.get("name")]
comment = f"Device: {metadata_json.get('googlePhotosOrigin', {}).get('mobileUpload', {}).get('deviceType', '')}; URL: {metadata_json.get('url', '')}"
if not date_taken or not create_date:
log_skipped(json_path, "Required metadata missing")
continue
metadata = {
"DateTimeOriginal": date_taken,
"CreateDate": create_date,
"ModifyDate": create_date,
"Keywords": keywords_list,
"Comment": comment.strip("; ")
}
process_metadata(media_file_path, metadata)
log_directory_complete(directory)
def generate_summary():
"""Generate a summary of the process."""
end_time = time.time()
elapsed_time = end_time - metrics["start_time"]
log_message("\n==== Summary ====")
log_message(f"Total Files: {metrics['total_files']}")
log_message(f"Processed Files: {metrics['processed_files']}")
log_message(f"Skipped Files: {metrics['skipped_files']}")
log_message("Extension Breakdown:")
for ext, count in metrics["extension_breakdown"].items():
log_message(f" {ext}: {count}")
log_message(f"Time Elapsed: {elapsed_time:.2f} seconds")
log_message("=================")
def handle_exit(signum, frame):
"""Handle script exit gracefully."""
generate_summary()
sys.exit(0)
if __name__ == "__main__":
# Handle interrupts (Ctrl+C)
signal.signal(signal.SIGINT, handle_exit)
base_directory = os.getcwd()
include_subfolders = input("Include subfolders? (y/n): ").strip().lower() == "y"
log_message(f"Starting metadata update for base directory: {base_directory}")
try:
process_directory(base_directory, include_subfolders)
finally:
generate_summary()
Step 4: Run the Script
- Open a terminal or command prompt.
- Navigate to the folder where your script is saved:
cd "path_to_folder_containing_the_script"
- Run the script:
python process_google_takeout.py
- Follow the prompts to include or exclude subfolders.
Key Notes
- File Matching: Each
.json
file is matched to its corresponding media file by exact name. If a media file has(1)
or(2)
in its name, it will not match unless the.json
file has the same naming convention. - Skipped Files: Skipped files are logged if:
- No matching
.json
or media file is found. - The JSON is invalid or missing required metadata.
- No matching
- Metrics: The script logs:
- Total files processed.
- Number of skipped files.
- Breakdown of file types processed.
- Time elapsed.
Example Use Case
Let’s say you’ve exported your Google Photos data to a folder D:\GooglePhotosExport
. After downloading my script and ExifTool:
- Save
exiftool.exe
inC:\Tools\ExifTool
. - Place the script in
D:\GooglePhotosExport
. - Run the script to process all files in the folder and its subfolders.
Troubleshooting
- ExifTool Not Found: Ensure the
exiftool.exe
path in the script matches where you saved it. - Invalid JSON: Double-check the
.json
files for proper formatting.
Wrapping Up
This script saves hours of manual metadata editing. Whether you’re managing personal photo libraries or archiving data professionally, it’s a flexible and powerful tool.
Feel free to leave comments if you encounter issues or have suggestions!