Module fusus.pdf
Expand source code Browse git
import os
import fitz
from .lib import DEFAULT_EXTENSION
"""Installation
```
pip3 install PyMuPDF
```
See [docs](https://pymupdf.readthedocs.io/en/latest/index.html)
"""
def pdf2png(inPdf, outDir, silent=True):
"""Extract all images in a PDF to an output directory.
"""
doc = fitz.open(inPdf)
if not os.path.exists(outDir):
os.makedirs(outDir, exist_ok=True)
p = 0
for i in range(len(doc)):
imgList = doc.getPageImageList(i)
for entry in imgList:
p += 1
xref = entry[0]
pix = fitz.Pixmap(doc, xref)
pix.writeImage(f"{outDir}/{p:>03}.{DEFAULT_EXTENSION}")
if not silent:
plural = "" if p == 1 else "s"
print(f"Written {p} image{plural}")
Functions
def pdf2png(inPdf, outDir, silent=True)
-
Extract all images in a PDF to an output directory.
Expand source code Browse git
def pdf2png(inPdf, outDir, silent=True): """Extract all images in a PDF to an output directory. """ doc = fitz.open(inPdf) if not os.path.exists(outDir): os.makedirs(outDir, exist_ok=True) p = 0 for i in range(len(doc)): imgList = doc.getPageImageList(i) for entry in imgList: p += 1 xref = entry[0] pix = fitz.Pixmap(doc, xref) pix.writeImage(f"{outDir}/{p:>03}.{DEFAULT_EXTENSION}") if not silent: plural = "" if p == 1 else "s" print(f"Written {p} image{plural}")