How can I arbitarily rotate, rearrange etc pdf pages in Python?

With PyPDF2, you can write a script to accomplish this task that looks very similar to your pseudocode.

Here’s some sample code, using a nightly build of the Homotopy Type Theory textbook as input:

#!/usr/bin/env python3
from PyPDF2 import PdfFileReader, PdfFileWriter

# matrix helper class

class AfMatrix:
    """ A matrix of a 2D affine transform. """

    __slots__ = ('__a', '__b', '__c', '__d', '__e', '__f')

    def __init__(self, a, b, c, d, e, f):
        self.__a = float(a)
        self.__b = float(b)
        self.__c = float(c)
        self.__d = float(d)
        self.__e = float(e)
        self.__f = float(f)

    def __iter__(self):
        yield self.__a
        yield self.__b
        yield self.__c
        yield self.__d
        yield self.__e
        yield self.__f

    def __hash__(self):
        return hash(tuple(self))

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    @classmethod
    def compose(cls, *what):
        a, b, c, d, e, f = (
            1, 0,
            0, 1,
            0, 0,
        )

        for rhs in what:
            A, B, C, D, E, F = rhs
            a, b, c, d, e, f = (
                a * A + b * C,
                a * B + b * D,
                c * A + d * C,
                c * B + d * D,
                e * A + f * C + E,
                e * B + f * D + F,
            )

        return cls(
            a, b,
            c, d,
            e, f
        )

    @classmethod
    def translate(cls, x=0, y=0):
        return cls(
            1, 0,
            0, 1,
            x, y
        )

    def __takes_origin(func):
        def translated_func(cls, *args, origin=(0, 0), **kwargs):
            if origin == (0, 0):
                return func(cls, *args, **kwargs)
            return cls.compose(
                cls.translate(-origin[0], -origin[1]),
                func(cls, *args, **kwargs),
                cls.translate(origin[0], origin[1])
            )
        return translated_func

    @classmethod
    @__takes_origin
    def shear(cls, x=1, y=1):
        return cls(
            x, 0,
            0, y,
            0, 0
        )

    @classmethod
    @__takes_origin
    def rotate(cls, angle):
        from math import cos, sin, radians

        angle = radians(angle)
        C = cos(angle)
        S = sin(angle)

        return cls(
             C,  S,
            -S,  C,
             0,  0
        )

#

reader = PdfFileReader('hott-online-1272-ga50f9bd.pdf')
writer = PdfFileWriter()

ipgs = [reader.getPage(i) for i in range(8)]

# page 1

writer.addPage(ipgs[0])

# page 2

opg1src = ipgs[2:5]

opg1 = writer.addBlankPage(0, 0)

yaccum = 0
for ipg in opg1src:
    opg1.mergeTransformedPage(ipg, AfMatrix.compose(
        AfMatrix.rotate(90),
        AfMatrix.translate(x=ipg.mediaBox.getHeight(), y=yaccum)
    ), expand=True)
    yaccum += ipg.mediaBox.getWidth()

# page 3

opg2 = writer.addBlankPage(
    ipgs[6].mediaBox.getWidth(),
    ipgs[6].mediaBox.getHeight()
)

opg2.mergeTransformedPage(ipgs[6], (
    AfMatrix.shear(x=1/3)
), expand=True)

opg2.mergeTransformedPage(ipgs[7], AfMatrix.compose(
    AfMatrix.translate(
        x=-opg2.mediaBox.getWidth() / 8,
        y=-opg2.mediaBox.getHeight() / 8
    ),
    AfMatrix.rotate(-15, origin=(opg2.mediaBox.getWidth(), 0)),
    AfMatrix.shear(x=0.75, y=0.75, origin=(opg2.mediaBox.getWidth(), 0))
), expand=False)

# output

with open('sample.pdf', 'wb') as ostream:
    writer.write(ostream)

And here’s the output:

Pages of the HoTT textbook, transformed by the script. Page 1 contains the book cover, unmodified. Page 2 contains three front matter pages, rotated 90° counterclockwise, laid next to each other bottom to up. Page 3 contains the two first pages of the table of contents, sheared and tilted.

Note on transformation matrices: in PDF and PostScript, the X coordinate grows rightwards and the Y coordinate grows upwards, like in mathematical custom (and unlike the custom in computer graphics, where Y grows downwards). Unlike mathematical custom, points are treated as row-vectors instead of column-vectors, and therefore appear on the left-hand side of matrix multiplication. This means matrix transformations compose left-to-right instead of right-to-left: the leftmost operation is applied first. Also, to make rotations by positive angles come out as counterclockwise (again like mathematical custom), the rotation matrix above appears transposed to its usual form.

When transforming pages, beware of content that falls off the page boundary on the original page; on the new page, it might actually render. (I have not found a solution to this yet.)


    import PyPDF2

    pdf_in = open('original.pdf', 'rb')
    pdf_reader = PyPDF2.PdfFileReader(pdf_in)
    pdf_writer = PyPDF2.PdfFileWriter()

    for pagenum in range(pdf_reader.numPages):
        page = pdf_reader.getPage(pagenum)
        if pagenum % 2:
            page.rotateClockwise(180)
        pdf_writer.addPage(page)

    pdf_out = open('rotated.pdf', 'wb')
    pdf_writer.write(pdf_out)
    pdf_out.close()
    pdf_in.close()

Source

or

import fitz                            # <- PyMuPDF v 1.9.3
doc = fitz.open("mypdf.pdf")           # open the PDF
page = doc[n]                          # read page n (zero-based)
page.setRotate(-90)                    # rotate page by 90 degrees counter-clockwise
doc.save(doc.name, incremental = True)  # update the file - a sub-second matter
doc.close()

source

Tags:

Python

Pdf