PyMuPDF (also known as fitz) is the fastest and most powerful Python library for PDF manipulation, including image extraction. Here's a complete, working solution.
<code >pip install PyMuPDF Pillow</code>import fitz # PyMuPDFimport osfrom PIL import Imageimport iodef extract_images_from_pdf(pdf_path, output_dir="extracted_images"): """ Extract all images from PDF and save as PNG files """ # Create output directory os.makedirs(output_dir, exist_ok=True) # Open PDF pdf_document = fitz.open(pdf_path) # Counter for total images total_images = 0 print(f"📄 Processing PDF: {pdf_path}") print(f"📁 Output directory: {output_dir}") # Iterate through all pages for page_num in range(len(pdf_document)): page = pdf_document.load_page(page_num) # Get list of images on this page image_list = page.get_images(full=True) if image_list: print(f" Page {page_num + 1}: Found {len(image_list)} image(s)") # Extract each image for img_index, img in enumerate(image_list): # Extract image data xref = img[0] # Image reference base_image = pdf_document.extract_image(xref) image_bytes = base_image["image"] image_ext = base_image["ext"] # Skip very small images if len(image_bytes) < 1000: print(f" Skipping tiny image {img_index + 1}") continue # Save as PNG (highest quality) filename = f"page_{page_num + 1}_img_{img_index + 1}.png" filepath = os.path.join(output_dir, filename) # Write image file with open(filepath, "wb") as img_file: img_file.write(image_bytes) total_images += 1 print(f" ✓ Saved: {filename} ({len(image_bytes)} bytes)") else: print(f" Page {page_num + 1}: No images found") pdf_document.close() print(f"\n🎉 Extracted {total_images} images successfully!") return total_images# Usageif __name__ == "__main__": pdf_file = "your_document.pdf" # Replace with your PDF path extract_images_from_pdf(pdf_file)Advanced Version with Image Preview & Filtering
import fitzimport osfrom PIL import Imageimport iodef extract_images_advanced(pdf_path, output_dir="extracted_images", min_size=(100, 100)): """ Advanced image extraction with preview and size filtering """ os.makedirs(output_dir, exist_ok=True) doc = fitz.open(pdf_path) extracted = [] for page_num in range(len(doc)): page = doc[page_num] images = page.get_images() for img_index, img in enumerate(images): xref = img[0] pix = fitz.Pixmap(doc, xref) if pix.width < min_size[0] or pix.height < min_size[1]: pix = None continue # Convert to PNG mat = fitz.Matrix(2, 2) # 2x Zoom for better quality pix.set_origin(0, 0) img_data = pix.tobytes("png") filename = f"page_{page_num+1}_img_{img_index+1}.png" filepath = os.path.join(output_dir, filename) with open(filepath, "wb") as f: f.write(img_data) extracted.append({ 'page': page_num + 1, 'index': img_index + 1, 'size': (pix.width, pix.height), 'file': filename }) pix = None # Free memory doc.close() return extracted# Usage with filtering (min 100x100 pixels)images_found = extract_images_advanced("document.pdf")print(f"Extracted {len(images_found)} quality images")for img in images_found: print(f" Page {img['page']}: {img['size']}px -> {img['file']}")














Comments & Discussion
Join the discussion by logging into your account.