Here's a script to reduce size of a notebook with many png images

I use notebooks for tutorials, and sometimes they are more than 20 MB when I have a few screenshots in png format in them.
Here’s a script that will compress all images in markdown cells, and decrease the size of the notebook. In my case, it went down from 20 MB to 3 MB, without too much loss of image quality.

Hope this is useful to someone!

import nbformat
from PIL import Image
import io
import base64

def compress_images_in_notebook(input_notebook_path, output_notebook_path):
    # Load the notebook
    with open(input_notebook_path, 'r') as f:
        notebook = nbformat.read(f, as_version=4)
    
    # Iterate over cells in the notebook
    for cell in notebook.cells:
        # Check if the cell has an image attachment
        if cell.cell_type == "markdown" and "attachments" in cell:
            for attachment_name, attachment_data in cell['attachments'].items():
                if 'image/png' in attachment_data:
                    # Decode the base64 image data
                    png_data = base64.b64decode(attachment_data['image/png'])
                    
                    # Convert the PNG to JPEG and compress
                    with Image.open(io.BytesIO(png_data)) as img:
                        jpeg_buffer = io.BytesIO()
                        img.convert("RGB").save(jpeg_buffer, format="JPEG", quality=70)  # Adjust quality as needed
                        jpeg_data = jpeg_buffer.getvalue()
                    
                    # Encode the JPEG back to base64
                    cell['attachments'][attachment_name]['image/jpeg'] = base64.b64encode(jpeg_data).decode('utf-8')
                    
                    # Remove the original PNG data
                    del cell['attachments'][attachment_name]['image/png']
    
    # Save the modified notebook
    with open(output_notebook_path, 'w') as f:
        nbformat.write(notebook, f)

# Run the function with your input and output file paths
compress_images_in_notebook('task1.ipynb', 'compressed_task1.ipynb')
5 Likes

Excellent :+1:

Even better - use widely supported Webp image format.
That could lead to even smaller sizes than JPEG format

3 Likes

Hi @stesim, that’s a great suggestion! I got another factor 4x smaller size than jpeg.

I’ve also made a uv script, that you can run with one line in the terminal
uv run https://gist.githubusercontent.com/kolibril13/d21f969c9a3f8638e88036b17139d8fb/raw/4a89041945c722af9429c6e46f70487db7e808b7/nb_image_compressor.py

Here’s the full script.

# /// script
# requires-python = ">=3.11"
# dependencies = [
#     "nbformat",
#     "Pillow",
# ]
# ///
from pathlib import Path
import nbformat
from PIL import Image
import io
import base64

# Function to compress images in a notebook using WebP
def compress_images_in_notebook(input_notebook_path, output_notebook_path):
    # Load the notebook
    with open(input_notebook_path, 'r') as f:
        notebook = nbformat.read(f, as_version=4)
    
    # Iterate over cells in the notebook
    for cell in notebook.cells:
        # Check if the cell has an image attachment
        if cell.cell_type == "markdown" and "attachments" in cell:
            for attachment_name, attachment_data in cell['attachments'].items():
                if 'image/png' in attachment_data:
                    # Decode the base64 image data
                    png_data = base64.b64decode(attachment_data['image/png'])
                    
                    # Convert the PNG to WebP and compress
                    with Image.open(io.BytesIO(png_data)) as img:
                        webp_buffer = io.BytesIO()
                        img.convert("RGB").save(webp_buffer, format="WEBP", quality=70)  # Adjust quality as needed
                        webp_data = webp_buffer.getvalue()
                    
                    # Encode the WebP back to base64
                    cell['attachments'][attachment_name]['image/webp'] = base64.b64encode(webp_data).decode('utf-8')
                    
                    # Remove the original PNG data
                    del cell['attachments'][attachment_name]['image/png']
    
    # Save the modified notebook
    with open(output_notebook_path, 'w') as f:
        nbformat.write(notebook, f)

# Scan all notebooks in the current folder and compress images
for notebook_path in Path('.').glob('*.ipynb'):
    output_path = notebook_path.with_stem(notebook_path.stem + '_compressed_webp')
    compress_images_in_notebook(notebook_path, output_path)
3 Likes