I use notebooks for tutorials, and sometimes they are more than 20 MB when I have a few screenshots in png format in them.
Here’s a script that will compress all images in markdown cells, and decrease the size of the notebook. In my case, it went down from 20 MB to 3 MB, without too much loss of image quality.
Hope this is useful to someone!
import nbformat
from PIL import Image
import io
import base64
def compress_images_in_notebook(input_notebook_path, output_notebook_path):
# Load the notebook
with open(input_notebook_path, 'r') as f:
notebook = nbformat.read(f, as_version=4)
# Iterate over cells in the notebook
for cell in notebook.cells:
# Check if the cell has an image attachment
if cell.cell_type == "markdown" and "attachments" in cell:
for attachment_name, attachment_data in cell['attachments'].items():
if 'image/png' in attachment_data:
# Decode the base64 image data
png_data = base64.b64decode(attachment_data['image/png'])
# Convert the PNG to JPEG and compress
with Image.open(io.BytesIO(png_data)) as img:
jpeg_buffer = io.BytesIO()
img.convert("RGB").save(jpeg_buffer, format="JPEG", quality=70) # Adjust quality as needed
jpeg_data = jpeg_buffer.getvalue()
# Encode the JPEG back to base64
cell['attachments'][attachment_name]['image/jpeg'] = base64.b64encode(jpeg_data).decode('utf-8')
# Remove the original PNG data
del cell['attachments'][attachment_name]['image/png']
# Save the modified notebook
with open(output_notebook_path, 'w') as f:
nbformat.write(notebook, f)
# Run the function with your input and output file paths
compress_images_in_notebook('task1.ipynb', 'compressed_task1.ipynb')