Module fusus.layout
Detect the page layout.
Pages consist of a header region, a body region, and a footer region, all of which are optional.
header The header consists of a caption and/or a page number. All headers will be discarded.
footer The footer consists of footnote bodies. All footers will be discarded.
body The body region consists of zero or more stripes.
stripe, block, line A stripe is a horizontal region of the body. If some parts of the body have two blocks and other parts have one block, we divide the body in stripes where each stripe has a fixed number of blocks, and neighbouring stripes have a different number of blocks.
If the whole body has the same number of blocks, we have just one stripe.
The stripes are numbered 1, 2, 3, … from top to bottom.
The block is the empty string if a stripe has just one block,
otherwise it is l
for the left block and r
for the right block.
We assume that all stripes on all pages have at most two blocks.
Blocks are divided into lines. The lines are numbered with the blocks that contain them.
Expand source code Browse git
"""Detect the page layout.
Pages consist of a header region, a body region, and a footer region, all of which
are optional.
**header**
The header consists of a caption and/or a page number.
All headers will be discarded.
**footer**
The footer consists of footnote bodies.
All footers will be discarded.
**body**
The body region consists of zero or more *stripes*.
**stripe, block, line**
A stripe is a horizontal region of the body.
If some parts of the body have two blocks and other parts have one block,
we divide the body in stripes where each stripe has a fixed number of blocks,
and neighbouring stripes have a different number of blocks.
If the whole body has the same number of blocks, we have just one stripe.
The stripes are numbered 1, 2, 3, ... from top to bottom.
The block is the empty string if a stripe has just one block,
otherwise it is `l` for the left block and `r` for the right block.
We assume that all stripes on all pages have at most two blocks.
Blocks are divided into *lines*.
The lines are numbered with the blocks that contain them.
"""
import collections
import cv2
from tf.core.helpers import rangesFromSet
from .lib import (
applyBandOffset,
FONT,
findRuns,
overlay,
showImage,
)
from .lines import getInkY
def addBlockName(img, top, left, right, marginX, letterColor, stripe, kind, size=1.0):
"""Adds the name of a block of the page near the block.
The function `fusus.page.Page.doLayout` divides the page into blocks.
This function puts the name of each block on the image, positioned
suitably w.r.t. the block.
Parameters
----------
img: image as np array
the image to operate on
top, left, right: integer
Where the top, left, right edges of the image are.
marginX: integer
Where we set the left margin
letterColor: color
The color of the letters
stripe: integer
The stripe number. Stripes are horizontal page-wide regions corresponding
to *vertical* block dividers.
kind: string
Whether the block spans the whole page width (`""`), is in the left block
("l") or in the right block ("r").
size: float
The font-size of the letters.
Returns
-------
None
The source image receives a modification.
"""
weight = 3
offsetX = 80 + marginX
halfOffsetX = int(offsetX // 2)
offsetY = 60
x = halfOffsetX if kind == "l" or kind == "" else (right - offsetX)
y = top + offsetY
sep = "" if not kind else "-"
cv2.putText(
img,
f"{stripe}{sep}{kind}",
(x, y),
FONT,
size,
letterColor,
weight,
cv2.LINE_AA,
)
def addHStroke(
img, isTop, i, block, thickness, top, left, right, letterColor, size=1.0
):
"""Marks a detected horizontal stroke on an image.
The layout algorithm detects horizontal strokes.
For feedback to the user, we draw a frame around the detected strokes and give
them a name.
Parameters
----------
img: image as np array
the image to operate on
isTop: boolean
whether the stroke separates the top header from the rest of the page.
i: integer
The number of the stroke
block: string {"l", "r", ""}
The block in which the stroke is found
thickness: integer
The thickness of the stroke as found on the image.
top, left, right: integer
Where the top, left, right edges of the image are.
letterColor: color
The color of the letters
size: float
The font-size of the letters.
Returns
-------
None
The source image receives a modification.
"""
weight = 3
colRep = f"-{block}" if block else ""
text = f"{'T' if isTop else 'B'}{i}{colRep}"
offsetX = 60
offsetY = 30 if isTop else -30 - 2 * thickness
x = int(left + (right - left - offsetX) // 2)
y = top - offsetY
cv2.putText(
img,
text,
(x, y),
FONT,
size,
letterColor,
weight,
cv2.LINE_AA,
)
def getStretches(C, info, stages, pageSize, horizontal, batch):
"""Gets significant horizontal or vertical strokes.
Significant strokes are those that are not part of letters,
but ones that are used as separators, e.g. of footnotes and blocks.
We single out 1-pixel wide lines longer than a small threshold
in the appropriate direction, and blacken the rest.
Then we blur in the perpendicular direction.
Now we single out longer 1-pixel wide lines and cluster in the perpendicular
direction.
Clusters are line segments with nearly the same constant coordinate.
If we do horizontal lines, clusters are pairs of x coordinates
for one y coordinate.
If we do vertical lines, clusters are pairs of y coordinates
for one x coordinate.
We return the clusters, i.e. a dict keyed by the fixed coordinate and
valued by the pair of segment coordinates.
Parameters
----------
C: object
The configuration object of the book engine.
info: function
To write messages to the console
stages: dict
Intermediate cv2 images, keyed by stage name
pageSize: int
The width or height in pixels of a complete page. Note that
the image we work with, might be a fraction of a page
horizontal: boolean
Whether we do horizontal of vertical lines.
batch: boolean
Whether we run in batch mode.
Returns
-------
dict
Per fixed coordinate the list of line segments on that coordinate.
A line segment is specified by its begin and end values and the thickness of
the cluster it is in.
"""
debug = C.debug
strokeColor = C.horizontalStrokeRGB if horizontal else C.verticalStrokeRGB
normalized = stages["normalized"]
img = normalized if horizontal else normalized.T
label = "HOR" if horizontal else "VER"
if not batch:
layout = stages["layout"]
out = layout if horizontal else cv2.transpose(layout)
minLength = int(pageSize // 30 if horizontal else pageSize // 50)
afterLength = int(pageSize // 10 if horizontal else pageSize // 17)
# initial blur
initBlur = (13, 7) if horizontal else (7, 13)
blurred = cv2.GaussianBlur(img, initBlur, 0, 0)
(th, threshed) = cv2.threshold(
blurred, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
)
# collect lines of a minimal length
sliced = threshed.copy()
for (n, row) in enumerate(sliced):
for (val, start, length) in zip(*findRuns(row)):
if val == 255:
if length < minLength:
row[start : start + length] = 0
if debug > 1:
showImage(sliced if horizontal else sliced.T)
# second blur, now stronger
strongBlur = (21, 11) if horizontal else (11, 21)
blurred = cv2.GaussianBlur(sliced, strongBlur, 0, 0)
(th, threshed) = cv2.threshold(
blurred, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
)
if debug > 1:
showImage(threshed if horizontal else threshed.T)
# collect lines of a certain length, longer than before
lines = collections.defaultdict(set)
for (n, row) in enumerate(threshed):
for (val, start, length) in zip(*findRuns(row)):
if val == 255:
if length >= afterLength:
lines[n] |= set(range(start, start + length))
# cluster lines in bins corresponding to their constant coordinates:
# horizontal lines are clustered in bins on their y coordinate.
# vertical lines are clustered in bins on their x coordinate.
bins = []
for n in sorted(lines):
found = False
for (i, (b, e)) in enumerate(bins):
if b - 3 <= n <= e + 3:
if n < b:
bins[i][0] = n
if n > e:
bins[i][1] = n
found = True
break
if not found:
bins.append([n, n])
# combine the segments of all lines that are in the same bin
stretches = {}
for (b, e) in bins:
middle = int((b + e) // 2)
thickness = int((abs(e - b) + 1) // 2)
if thickness <= 1:
continue
theseStretches = set()
for n in range(b, e):
if n in lines:
theseStretches |= lines[n]
segments = []
for (m1, m2) in rangesFromSet(theseStretches):
segments.append((m1, m2 + 1, thickness))
stretches[middle] = segments
for (n, segments) in sorted(stretches.items()):
for (f, t, half) in segments:
info(f"{label} @ {n:>4} thick={half:>2} from {f:>4} to {t:>4}", tm=False)
if not batch:
cv2.rectangle(out, (f, n - half - 2), (t, n + half + 2), strokeColor, 3)
if not batch:
stages["layout"] = out if horizontal else cv2.transpose(out)
if not batch and debug > 1:
showImage(stages["layout"])
return stretches
def getStripes(stages, stretchesV):
"""Infer horizontal stripes from a set of vertical bars.
A vertical bar defines a stripe on the page, i.e. a horizontal band that
contains that bar.
Between the vertical bars there are also stripes, they are undivided stripes.
We assume the vertical bars split the page in two portions, and not more,
and that they occur more or less in the middle of the page.
If many vertical bars have been detected, we sort them by y1 ascending and then
y2 descending and then by x.
We filter the bars: if the last bar reached to y = height, we only consider
bars that start lower than height.
!!! note "Fine tuning needed later on"
The vertical strokes give a rough estimate:
it is possible that they start and end in the middle of the lines beside them.
We will need histograms for the fine tuning.
Parameters
----------
stages: dict
We need access to the normalized stage to get the page size.
stretchesV: dict
Vertical line segments per x-coordinate, as delivered by `getStretches`.
Returns
-------
list
A list of stripes, specified as (x, y1, y2) values,
where the y-coordinates y1 and y2 specify the vertical extent of the stripe,
and x is the x coordinate of the dividing vertical stroke if there is one
and `None` otherwise.
"""
normalized = stages["normalized"]
(maxH, maxW) = normalized.shape[0:2]
lastHeight = 0
segments = []
for (x, ys) in stretchesV.items():
for (y1, y2, thickness) in ys:
segments.append((y1, y2, x, thickness))
stripes = []
for (y1, y2, x, thickness) in sorted(
segments, key=lambda z: (z[0], -z[1], -z[3], -z[2] or -1)
):
if y1 > lastHeight:
stripes.append((None, lastHeight, y1))
stripes.append((x, y1, y2))
lastHeight = y2
if lastHeight < maxH:
stripes.append((None, lastHeight, maxH))
return stripes
def getBlocks(C, info, stages, pageH, stripes, stretchesH, batch):
"""Fine-tune stripes into blocks.
We enlarge the stripes vertically by roughly a line height
and call `adjustVertical` to get precise vertical demarcations
for the blocks at both sides of the stripe if there is one or else
for the undivided stripe.
The idea is:
If a stripe has a vertical bar, we slightly extend the boxes left and right
so that the top and bottom lines next to the bar are completely included.
If a stripe has no vertical bar, we shrink the box
so that partial top and bottom lines are delegated to the boxes above
and below.
We only shrink if the box is close to the boxes above or below.
We do not grow boxes across significant horizontal strokes.
We write the box layout unto the `layout` layer.
Parameters
----------
C: object
Configuration settings
stages: dict
We need access to several intermediate results.
pageH: int
The height of a full page in pixels (the image might be a fraction of a page)
stripes: list
The preliminary stripe division of the page, as delivered by
`getStripes`.
stretchesH: list
The horizontal stretches across which we do not shrink of enlarge
batch: boolean
Whether we run in batch mode.
Returns
-------
dict
Blocks keyed by stripe number and block specification
(one of `""`, `"l"`, `"r"`).
The values form dicts themselves, with in particular the bounding box
information under key `box` specified as four numbers:
left, top, right, bottom.
The dict is ordered.
"""
marginX = C.blockMarginX
blockColor = C.blockRGB
letterColor = C.letterRGB
blurred = stages["blurred"]
normalized = stages["normalized"]
(maxH, maxW) = normalized.shape[0:2]
leeHeight = int(pageH // 20)
blocks = {}
upperHStretch = min(stretchesH) if stretchesH else 0
lowerHStretch = max(stretchesH) if stretchesH else maxH
if not batch:
layout = stages["layout"]
for (stripe, (x, yMin, yMax)) in enumerate(stripes):
yMinLee = max((0, yMin - leeHeight))
yMaxLee = min((maxH, yMax + leeHeight))
if x is None:
(theYMin, theYMax) = adjustVertical(
C, info, blurred, pageH, 0, maxW, yMin, yMinLee, yMax, yMaxLee, False
)
blocks[(stripe, "")] = dict(
box=(marginX, theYMin, maxW - marginX, theYMax),
sep=x,
)
if not batch:
cv2.rectangle(
layout,
(marginX, theYMin),
(maxW - marginX, theYMax),
blockColor,
4,
)
addBlockName(layout, theYMin, 0, maxW, marginX, letterColor, stripe, "")
else:
yMinLeeBound = (
yMinLee
if upperHStretch == 0 or upperHStretch > yMin
else max((yMinLee, max(y for y in stretchesH if y <= yMin)))
)
yMaxLeeBound = (
yMaxLee
if lowerHStretch == maxH or lowerHStretch < yMax
else min((yMaxLee, min(y for y in stretchesH if y >= yMax)))
)
(theYMinL, theYMaxL) = adjustVertical(
C,
info,
blurred,
pageH,
0,
x,
yMin,
yMinLeeBound,
yMax,
yMaxLeeBound,
True,
)
(theYMinR, theYMaxR) = adjustVertical(
C,
info,
blurred,
pageH,
x,
maxW,
yMin,
yMinLeeBound,
yMax,
yMaxLeeBound,
True,
)
blocks[(stripe, "l")] = dict(
box=(marginX, theYMinL, x - marginX, theYMaxL), sep=x
)
blocks[(stripe, "r")] = dict(
box=(x + marginX, theYMinR, maxW - marginX, theYMaxR), sep=x
)
if not batch:
cv2.rectangle(
layout,
(marginX, theYMinL),
(x - marginX, theYMaxL),
blockColor,
4,
)
addBlockName(layout, theYMinL, 0, x, marginX, letterColor, stripe, "l")
cv2.rectangle(
layout,
(x + marginX, theYMinR),
(maxW - marginX, theYMaxR),
blockColor,
4,
)
addBlockName(
layout, theYMinR, x, maxW, marginX, letterColor, stripe, "r"
)
return collections.OrderedDict(sorted(blocks.items()))
def applyHRules(C, stages, stretchesH, stripes, blocks, batch, boxed):
"""Trims regions above horizontal top lines and below bottom lines.
Inspect the horizontal strokes and specifiy which ones are
top separators and which ones are bottom separators.
First we map each horizontal stretch to one of the page stripes.
If a stretch occurs between stripes, we map it to the stripe above.
A horizontal stroke is a top separator if
* it is mapped to the first stripe **and**
* it is situated in the top fragment of the page.
We mark the discarded material on the layout page by overlaying
it with gray.
Parameters
----------
C: object
Configuration settings
stages: dict
We need access to several intermediate results.
stretchesH: dict
Horizontal line segments per y-coordinate, as delivered by `getStretches`.
stripes: list
The preliminary stripe division of the page, as delivered by
`getStripes`.
blocks: dict
The blocks as delivered by `getBlocks`.
boxed: boolean
Whether we run in boxed mode (generate boxes around wiped marks).
Returns
-------
None
The blocks dict will be updated: each block value gets a new key `inner`
with the bounding box info after stripping the top and bottom material.
"""
mColor = C.marginRGB
whit = C.whiteGRS
white = C.whiteRGB
letterColor = C.letterRGB
normalized = stages["normalized"]
demargined = normalized.copy()
stages["demargined"] = demargined
if not batch:
layout = stages["layout"]
if not batch or boxed:
normalizedC = stages["normalizedC"]
demarginedC = normalizedC.copy()
stages["demarginedC"] = demarginedC
(maxH, maxW) = normalized.shape[0:2]
topCriterion = maxH / 6
topXCriterion = maxH / 4
for ((stripe, block), data) in blocks.items():
(bL, bT, bR, bB) = data["box"]
x = data["sep"]
top = None
bottom = None
for (y, xs) in sorted(stretchesH.items()):
if y < bT:
continue
if bB < y:
break
for (x1, x2, thickness) in xs:
if x is not None:
if block == "l" and x1 >= x:
continue
if block == "r" and x2 <= x:
continue
isTop = stripe == 0 and (
len(stripes) == 1
and y < topCriterion
or len(stripes) > 1
and y < topXCriterion
)
if isTop:
top = y + 2 * thickness + 2
else:
if bottom is None:
bottom = y - 2 * thickness - 2
if not batch:
addHStroke(
layout,
isTop,
stripe,
block,
thickness,
y,
x1,
x2,
letterColor,
)
top = bT if top is None else top
bottom = bB if bottom is None else bottom
left = bL + 2
right = bR - 2
data["inner"] = (left, top, right, bottom)
if top != bT:
if not batch:
overlay(layout, left, bT + 2, right, top, white, mColor)
cv2.rectangle(demargined, (left, bT), (right, top), whit, -1)
if not batch or boxed:
overlay(demarginedC, left, bT + 2, right, top, white, mColor)
if bottom != bB:
if not batch:
overlay(layout, left, bottom, right, bB - 2, white, mColor)
cv2.rectangle(demargined, (left, bottom), (right, bB), whit, -1)
if not batch or boxed:
overlay(demarginedC, left, bottom, right, bB - 2, white, mColor)
def grayInterBlocks(C, stages, blocks):
"""Overlay the space between blocks with gray.
Remove also the empty blocks from the block list.
Parameters
----------
C: object
Configuration settings
stages: dict
We need access to several intermediate results.
blocks: dict
The blocks as delivered by `getBlocks`.
The blocks dict will be updated: empty blocks will be deleted from it.
with the band data.
Returns
-------
None.
"""
mColor = C.marginRGB
white = C.whiteRGB
layout = stages["layout"]
(maxH, maxW) = layout.shape[0:2]
prevBB = [0, 0]
prevX = None
maxStripe = max(x[0] for x in blocks)
marginX = C.blockMarginX
# overlay the space between blocks
for ((stripe, block), data) in sorted(blocks.items()):
bT = data["box"][1]
bB = data["box"][3]
x = data["sep"]
if block == "":
if prevX is None:
pB = prevBB[0]
overlay(layout, marginX, pB, maxW - marginX, bT, white, mColor)
else:
for (i, pB) in enumerate(prevBB):
if pB < bT:
(lf, rt) = (
(marginX, prevX - marginX)
if i == 0
else (prevX + marginX, maxW - marginX)
)
overlay(layout, lf, pB, rt, bT, white, mColor)
prevBB = [bB, bB]
prevX = None
elif block == "l":
pB = prevBB[0]
if pB < bT:
overlay(layout, marginX, pB, x - marginX, bT, white, mColor)
prevBB[0] = bB
prevX = x
elif block == "r":
pB = prevBB[1]
if pB < bT:
overlay(layout, x + marginX, pB, maxW - marginX, bT, white, mColor)
prevBB[1] = bB
prevX = x
if stripe == maxStripe:
if block == "":
if bB < maxH:
overlay(layout, marginX, bB, maxW - marginX, maxH, white, mColor)
elif block == "l":
if bB < maxH:
overlay(layout, marginX, bB, x - marginX, maxH, white, mColor)
elif block == "r":
if bB < maxH:
overlay(
layout, bB, maxW - marginX, maxH, white, x + marginX, mColor
)
def adjustVertical(
C, info, blurred, pageH, left, right, yMin, yMinLee, yMax, yMaxLee, preferExtend
):
"""Adjust the height of blocks.
When we determine the vertical sizes of blocks from the vertical block separators
on the page, we may find that these separators are too short.
We remedy this by finding the line divisision of the ink left and right from the
dividing line, and enlarging the blocks left and right so that they contain
complete lines.
Parameters
----------
C: object
Configuration settings
info: function
To write messages to the console
blurred: image as np array
The input image. It must be the `blurred` stage of the source image,
which is blurred and inverted.
pageH: int
size of a full page in pixels
left, right: int
The left and right edges of the block
yMin: integer
the initial top edge of the block
yMinLee: integer
the top edge of the block when the leeway is applied
yMax: integer
the initial bottom edge of the block
yMaxLee: integer
the bottom edge of the block when the leeway is applied
preferExtend: boolean
Whether we want to increase or rather decrease the vertical size of the block.
Blocks next to dividing lines are meant to be increased, blocks that
span the whole page width are meant to be decreased.
Returns
-------
tuple
The corrected top and bottom heights of the block.
"""
theYMin = None
theYMax = None
if yMin == yMinLee:
theYMin = yMin
if yMax == yMaxLee:
theYMax = yMax
if theYMin is not None and theYMax is not None:
return (theYMin, theYMax)
lines = getInkY(
C, info, blurred, pageH, left, yMinLee, right, yMaxLee, False, imgOut=None
)
normHLee = yMaxLee - yMinLee
topProper = yMin - yMinLee
botProper = yMax - yMinLee
normLines = applyBandOffset(C, normHLee, "main", lines)
if theYMin is None:
if preferExtend:
# look for the first lower boundary in the top of the strict region
# then take the corresponding upper boundary
for (i, (up, lo)) in enumerate(normLines):
if up <= topProper and lo >= topProper:
theYMin = up + yMinLee
break
else:
# look for the first upper boundary in the top of the strict region
for (up, lo) in normLines:
if up >= topProper and lo <= botProper:
theYMin = up + yMinLee
break
if theYMin is None:
theYMin = yMin
if theYMax is None:
if preferExtend:
# look for the last upper boundary in the bottom of the strict region
# then take the corresponding lower boundary
for (i, (up, lo)) in enumerate(reversed(normLines)):
if up <= botProper and lo >= botProper:
theYMax = lo + yMinLee
break
else:
# look for the last lower boundary in the bottom of the strict region
for (up, lo) in reversed(normLines):
if lo <= botProper and up >= topProper:
theYMax = lo + yMinLee
break
if theYMax is None:
theYMax = yMax
return (theYMin, theYMax)
Functions
def addBlockName(img, top, left, right, marginX, letterColor, stripe, kind, size=1.0)
-
Adds the name of a block of the page near the block.
The function
Page.doLayout()
divides the page into blocks. This function puts the name of each block on the image, positioned suitably w.r.t. the block.Parameters
img
:image as np array
- the image to operate on
top
,left
,right
:integer
- Where the top, left, right edges of the image are.
marginX
:integer
- Where we set the left margin
letterColor
:color
- The color of the letters
stripe
:integer
- The stripe number. Stripes are horizontal page-wide regions corresponding to vertical block dividers.
kind
:string
- Whether the block spans the whole page width (
""
), is in the left block ("l") or in the right block ("r"). size
:float
- The font-size of the letters.
Returns
None
- The source image receives a modification.
Expand source code Browse git
def addBlockName(img, top, left, right, marginX, letterColor, stripe, kind, size=1.0): """Adds the name of a block of the page near the block. The function `fusus.page.Page.doLayout` divides the page into blocks. This function puts the name of each block on the image, positioned suitably w.r.t. the block. Parameters ---------- img: image as np array the image to operate on top, left, right: integer Where the top, left, right edges of the image are. marginX: integer Where we set the left margin letterColor: color The color of the letters stripe: integer The stripe number. Stripes are horizontal page-wide regions corresponding to *vertical* block dividers. kind: string Whether the block spans the whole page width (`""`), is in the left block ("l") or in the right block ("r"). size: float The font-size of the letters. Returns ------- None The source image receives a modification. """ weight = 3 offsetX = 80 + marginX halfOffsetX = int(offsetX // 2) offsetY = 60 x = halfOffsetX if kind == "l" or kind == "" else (right - offsetX) y = top + offsetY sep = "" if not kind else "-" cv2.putText( img, f"{stripe}{sep}{kind}", (x, y), FONT, size, letterColor, weight, cv2.LINE_AA, )
def addHStroke(img, isTop, i, block, thickness, top, left, right, letterColor, size=1.0)
-
Marks a detected horizontal stroke on an image.
The layout algorithm detects horizontal strokes. For feedback to the user, we draw a frame around the detected strokes and give them a name.
Parameters
img
:image as np array
- the image to operate on
isTop
:boolean
- whether the stroke separates the top header from the rest of the page.
i
:integer
- The number of the stroke
block
:string {"l", "r", ""}
- The block in which the stroke is found
thickness
:integer
- The thickness of the stroke as found on the image.
top
,left
,right
:integer
- Where the top, left, right edges of the image are.
letterColor
:color
- The color of the letters
size
:float
- The font-size of the letters.
Returns
None
- The source image receives a modification.
Expand source code Browse git
def addHStroke( img, isTop, i, block, thickness, top, left, right, letterColor, size=1.0 ): """Marks a detected horizontal stroke on an image. The layout algorithm detects horizontal strokes. For feedback to the user, we draw a frame around the detected strokes and give them a name. Parameters ---------- img: image as np array the image to operate on isTop: boolean whether the stroke separates the top header from the rest of the page. i: integer The number of the stroke block: string {"l", "r", ""} The block in which the stroke is found thickness: integer The thickness of the stroke as found on the image. top, left, right: integer Where the top, left, right edges of the image are. letterColor: color The color of the letters size: float The font-size of the letters. Returns ------- None The source image receives a modification. """ weight = 3 colRep = f"-{block}" if block else "" text = f"{'T' if isTop else 'B'}{i}{colRep}" offsetX = 60 offsetY = 30 if isTop else -30 - 2 * thickness x = int(left + (right - left - offsetX) // 2) y = top - offsetY cv2.putText( img, text, (x, y), FONT, size, letterColor, weight, cv2.LINE_AA, )
def adjustVertical(C, info, blurred, pageH, left, right, yMin, yMinLee, yMax, yMaxLee, preferExtend)
-
Adjust the height of blocks.
When we determine the vertical sizes of blocks from the vertical block separators on the page, we may find that these separators are too short.
We remedy this by finding the line divisision of the ink left and right from the dividing line, and enlarging the blocks left and right so that they contain complete lines.
Parameters
C
:object
- Configuration settings
info
:function
- To write messages to the console
blurred
:image as np array
- The input image. It must be the
blurred
stage of the source image, which is blurred and inverted. pageH
:int
- size of a full page in pixels
left
,right
:int
- The left and right edges of the block
yMin
:integer
- the initial top edge of the block
yMinLee
:integer
- the top edge of the block when the leeway is applied
yMax
:integer
- the initial bottom edge of the block
yMaxLee
:integer
- the bottom edge of the block when the leeway is applied
preferExtend
:boolean
- Whether we want to increase or rather decrease the vertical size of the block. Blocks next to dividing lines are meant to be increased, blocks that span the whole page width are meant to be decreased.
Returns
tuple
- The corrected top and bottom heights of the block.
Expand source code Browse git
def adjustVertical( C, info, blurred, pageH, left, right, yMin, yMinLee, yMax, yMaxLee, preferExtend ): """Adjust the height of blocks. When we determine the vertical sizes of blocks from the vertical block separators on the page, we may find that these separators are too short. We remedy this by finding the line divisision of the ink left and right from the dividing line, and enlarging the blocks left and right so that they contain complete lines. Parameters ---------- C: object Configuration settings info: function To write messages to the console blurred: image as np array The input image. It must be the `blurred` stage of the source image, which is blurred and inverted. pageH: int size of a full page in pixels left, right: int The left and right edges of the block yMin: integer the initial top edge of the block yMinLee: integer the top edge of the block when the leeway is applied yMax: integer the initial bottom edge of the block yMaxLee: integer the bottom edge of the block when the leeway is applied preferExtend: boolean Whether we want to increase or rather decrease the vertical size of the block. Blocks next to dividing lines are meant to be increased, blocks that span the whole page width are meant to be decreased. Returns ------- tuple The corrected top and bottom heights of the block. """ theYMin = None theYMax = None if yMin == yMinLee: theYMin = yMin if yMax == yMaxLee: theYMax = yMax if theYMin is not None and theYMax is not None: return (theYMin, theYMax) lines = getInkY( C, info, blurred, pageH, left, yMinLee, right, yMaxLee, False, imgOut=None ) normHLee = yMaxLee - yMinLee topProper = yMin - yMinLee botProper = yMax - yMinLee normLines = applyBandOffset(C, normHLee, "main", lines) if theYMin is None: if preferExtend: # look for the first lower boundary in the top of the strict region # then take the corresponding upper boundary for (i, (up, lo)) in enumerate(normLines): if up <= topProper and lo >= topProper: theYMin = up + yMinLee break else: # look for the first upper boundary in the top of the strict region for (up, lo) in normLines: if up >= topProper and lo <= botProper: theYMin = up + yMinLee break if theYMin is None: theYMin = yMin if theYMax is None: if preferExtend: # look for the last upper boundary in the bottom of the strict region # then take the corresponding lower boundary for (i, (up, lo)) in enumerate(reversed(normLines)): if up <= botProper and lo >= botProper: theYMax = lo + yMinLee break else: # look for the last lower boundary in the bottom of the strict region for (up, lo) in reversed(normLines): if lo <= botProper and up >= topProper: theYMax = lo + yMinLee break if theYMax is None: theYMax = yMax return (theYMin, theYMax)
def applyHRules(C, stages, stretchesH, stripes, blocks, batch, boxed)
-
Trims regions above horizontal top lines and below bottom lines.
Inspect the horizontal strokes and specifiy which ones are top separators and which ones are bottom separators.
First we map each horizontal stretch to one of the page stripes. If a stretch occurs between stripes, we map it to the stripe above.
A horizontal stroke is a top separator if * it is mapped to the first stripe and * it is situated in the top fragment of the page.
We mark the discarded material on the layout page by overlaying it with gray.
Parameters
C
:object
- Configuration settings
stages
:dict
- We need access to several intermediate results.
stretchesH
:dict
- Horizontal line segments per y-coordinate, as delivered by
getStretches()
. stripes
:list
- The preliminary stripe division of the page, as delivered by
getStripes()
. blocks
:dict
- The blocks as delivered by
getBlocks()
. boxed
:boolean
- Whether we run in boxed mode (generate boxes around wiped marks).
Returns
None
- The blocks dict will be updated: each block value gets a new key
inner
with the bounding box info after stripping the top and bottom material.
Expand source code Browse git
def applyHRules(C, stages, stretchesH, stripes, blocks, batch, boxed): """Trims regions above horizontal top lines and below bottom lines. Inspect the horizontal strokes and specifiy which ones are top separators and which ones are bottom separators. First we map each horizontal stretch to one of the page stripes. If a stretch occurs between stripes, we map it to the stripe above. A horizontal stroke is a top separator if * it is mapped to the first stripe **and** * it is situated in the top fragment of the page. We mark the discarded material on the layout page by overlaying it with gray. Parameters ---------- C: object Configuration settings stages: dict We need access to several intermediate results. stretchesH: dict Horizontal line segments per y-coordinate, as delivered by `getStretches`. stripes: list The preliminary stripe division of the page, as delivered by `getStripes`. blocks: dict The blocks as delivered by `getBlocks`. boxed: boolean Whether we run in boxed mode (generate boxes around wiped marks). Returns ------- None The blocks dict will be updated: each block value gets a new key `inner` with the bounding box info after stripping the top and bottom material. """ mColor = C.marginRGB whit = C.whiteGRS white = C.whiteRGB letterColor = C.letterRGB normalized = stages["normalized"] demargined = normalized.copy() stages["demargined"] = demargined if not batch: layout = stages["layout"] if not batch or boxed: normalizedC = stages["normalizedC"] demarginedC = normalizedC.copy() stages["demarginedC"] = demarginedC (maxH, maxW) = normalized.shape[0:2] topCriterion = maxH / 6 topXCriterion = maxH / 4 for ((stripe, block), data) in blocks.items(): (bL, bT, bR, bB) = data["box"] x = data["sep"] top = None bottom = None for (y, xs) in sorted(stretchesH.items()): if y < bT: continue if bB < y: break for (x1, x2, thickness) in xs: if x is not None: if block == "l" and x1 >= x: continue if block == "r" and x2 <= x: continue isTop = stripe == 0 and ( len(stripes) == 1 and y < topCriterion or len(stripes) > 1 and y < topXCriterion ) if isTop: top = y + 2 * thickness + 2 else: if bottom is None: bottom = y - 2 * thickness - 2 if not batch: addHStroke( layout, isTop, stripe, block, thickness, y, x1, x2, letterColor, ) top = bT if top is None else top bottom = bB if bottom is None else bottom left = bL + 2 right = bR - 2 data["inner"] = (left, top, right, bottom) if top != bT: if not batch: overlay(layout, left, bT + 2, right, top, white, mColor) cv2.rectangle(demargined, (left, bT), (right, top), whit, -1) if not batch or boxed: overlay(demarginedC, left, bT + 2, right, top, white, mColor) if bottom != bB: if not batch: overlay(layout, left, bottom, right, bB - 2, white, mColor) cv2.rectangle(demargined, (left, bottom), (right, bB), whit, -1) if not batch or boxed: overlay(demarginedC, left, bottom, right, bB - 2, white, mColor)
def getBlocks(C, info, stages, pageH, stripes, stretchesH, batch)
-
Fine-tune stripes into blocks.
We enlarge the stripes vertically by roughly a line height and call
adjustVertical()
to get precise vertical demarcations for the blocks at both sides of the stripe if there is one or else for the undivided stripe.The idea is:
If a stripe has a vertical bar, we slightly extend the boxes left and right so that the top and bottom lines next to the bar are completely included.
If a stripe has no vertical bar, we shrink the box so that partial top and bottom lines are delegated to the boxes above and below. We only shrink if the box is close to the boxes above or below. We do not grow boxes across significant horizontal strokes.
We write the box layout unto the
layout
layer.Parameters
C
:object
- Configuration settings
stages
:dict
- We need access to several intermediate results.
pageH
:int
- The height of a full page in pixels (the image might be a fraction of a page)
stripes
:list
- The preliminary stripe division of the page, as delivered by
getStripes()
. stretchesH
:list
- The horizontal stretches across which we do not shrink of enlarge
batch
:boolean
- Whether we run in batch mode.
Returns
dict
-
Blocks keyed by stripe number and block specification (one of
""
,"l"
,"r"
). The values form dicts themselves, with in particular the bounding box information under keybox
specified as four numbers: left, top, right, bottom.The dict is ordered.
Expand source code Browse git
def getBlocks(C, info, stages, pageH, stripes, stretchesH, batch): """Fine-tune stripes into blocks. We enlarge the stripes vertically by roughly a line height and call `adjustVertical` to get precise vertical demarcations for the blocks at both sides of the stripe if there is one or else for the undivided stripe. The idea is: If a stripe has a vertical bar, we slightly extend the boxes left and right so that the top and bottom lines next to the bar are completely included. If a stripe has no vertical bar, we shrink the box so that partial top and bottom lines are delegated to the boxes above and below. We only shrink if the box is close to the boxes above or below. We do not grow boxes across significant horizontal strokes. We write the box layout unto the `layout` layer. Parameters ---------- C: object Configuration settings stages: dict We need access to several intermediate results. pageH: int The height of a full page in pixels (the image might be a fraction of a page) stripes: list The preliminary stripe division of the page, as delivered by `getStripes`. stretchesH: list The horizontal stretches across which we do not shrink of enlarge batch: boolean Whether we run in batch mode. Returns ------- dict Blocks keyed by stripe number and block specification (one of `""`, `"l"`, `"r"`). The values form dicts themselves, with in particular the bounding box information under key `box` specified as four numbers: left, top, right, bottom. The dict is ordered. """ marginX = C.blockMarginX blockColor = C.blockRGB letterColor = C.letterRGB blurred = stages["blurred"] normalized = stages["normalized"] (maxH, maxW) = normalized.shape[0:2] leeHeight = int(pageH // 20) blocks = {} upperHStretch = min(stretchesH) if stretchesH else 0 lowerHStretch = max(stretchesH) if stretchesH else maxH if not batch: layout = stages["layout"] for (stripe, (x, yMin, yMax)) in enumerate(stripes): yMinLee = max((0, yMin - leeHeight)) yMaxLee = min((maxH, yMax + leeHeight)) if x is None: (theYMin, theYMax) = adjustVertical( C, info, blurred, pageH, 0, maxW, yMin, yMinLee, yMax, yMaxLee, False ) blocks[(stripe, "")] = dict( box=(marginX, theYMin, maxW - marginX, theYMax), sep=x, ) if not batch: cv2.rectangle( layout, (marginX, theYMin), (maxW - marginX, theYMax), blockColor, 4, ) addBlockName(layout, theYMin, 0, maxW, marginX, letterColor, stripe, "") else: yMinLeeBound = ( yMinLee if upperHStretch == 0 or upperHStretch > yMin else max((yMinLee, max(y for y in stretchesH if y <= yMin))) ) yMaxLeeBound = ( yMaxLee if lowerHStretch == maxH or lowerHStretch < yMax else min((yMaxLee, min(y for y in stretchesH if y >= yMax))) ) (theYMinL, theYMaxL) = adjustVertical( C, info, blurred, pageH, 0, x, yMin, yMinLeeBound, yMax, yMaxLeeBound, True, ) (theYMinR, theYMaxR) = adjustVertical( C, info, blurred, pageH, x, maxW, yMin, yMinLeeBound, yMax, yMaxLeeBound, True, ) blocks[(stripe, "l")] = dict( box=(marginX, theYMinL, x - marginX, theYMaxL), sep=x ) blocks[(stripe, "r")] = dict( box=(x + marginX, theYMinR, maxW - marginX, theYMaxR), sep=x ) if not batch: cv2.rectangle( layout, (marginX, theYMinL), (x - marginX, theYMaxL), blockColor, 4, ) addBlockName(layout, theYMinL, 0, x, marginX, letterColor, stripe, "l") cv2.rectangle( layout, (x + marginX, theYMinR), (maxW - marginX, theYMaxR), blockColor, 4, ) addBlockName( layout, theYMinR, x, maxW, marginX, letterColor, stripe, "r" ) return collections.OrderedDict(sorted(blocks.items()))
def getStretches(C, info, stages, pageSize, horizontal, batch)
-
Gets significant horizontal or vertical strokes.
Significant strokes are those that are not part of letters, but ones that are used as separators, e.g. of footnotes and blocks.
We single out 1-pixel wide lines longer than a small threshold in the appropriate direction, and blacken the rest. Then we blur in the perpendicular direction. Now we single out longer 1-pixel wide lines and cluster in the perpendicular direction.
Clusters are line segments with nearly the same constant coordinate. If we do horizontal lines, clusters are pairs of x coordinates for one y coordinate. If we do vertical lines, clusters are pairs of y coordinates for one x coordinate. We return the clusters, i.e. a dict keyed by the fixed coordinate and valued by the pair of segment coordinates.
Parameters
C
:object
- The configuration object of the book engine.
info
:function
- To write messages to the console
stages
:dict
- Intermediate cv2 images, keyed by stage name
pageSize
:int
- The width or height in pixels of a complete page. Note that the image we work with, might be a fraction of a page
horizontal
:boolean
- Whether we do horizontal of vertical lines.
batch
:boolean
- Whether we run in batch mode.
Returns
dict
- Per fixed coordinate the list of line segments on that coordinate. A line segment is specified by its begin and end values and the thickness of the cluster it is in.
Expand source code Browse git
def getStretches(C, info, stages, pageSize, horizontal, batch): """Gets significant horizontal or vertical strokes. Significant strokes are those that are not part of letters, but ones that are used as separators, e.g. of footnotes and blocks. We single out 1-pixel wide lines longer than a small threshold in the appropriate direction, and blacken the rest. Then we blur in the perpendicular direction. Now we single out longer 1-pixel wide lines and cluster in the perpendicular direction. Clusters are line segments with nearly the same constant coordinate. If we do horizontal lines, clusters are pairs of x coordinates for one y coordinate. If we do vertical lines, clusters are pairs of y coordinates for one x coordinate. We return the clusters, i.e. a dict keyed by the fixed coordinate and valued by the pair of segment coordinates. Parameters ---------- C: object The configuration object of the book engine. info: function To write messages to the console stages: dict Intermediate cv2 images, keyed by stage name pageSize: int The width or height in pixels of a complete page. Note that the image we work with, might be a fraction of a page horizontal: boolean Whether we do horizontal of vertical lines. batch: boolean Whether we run in batch mode. Returns ------- dict Per fixed coordinate the list of line segments on that coordinate. A line segment is specified by its begin and end values and the thickness of the cluster it is in. """ debug = C.debug strokeColor = C.horizontalStrokeRGB if horizontal else C.verticalStrokeRGB normalized = stages["normalized"] img = normalized if horizontal else normalized.T label = "HOR" if horizontal else "VER" if not batch: layout = stages["layout"] out = layout if horizontal else cv2.transpose(layout) minLength = int(pageSize // 30 if horizontal else pageSize // 50) afterLength = int(pageSize // 10 if horizontal else pageSize // 17) # initial blur initBlur = (13, 7) if horizontal else (7, 13) blurred = cv2.GaussianBlur(img, initBlur, 0, 0) (th, threshed) = cv2.threshold( blurred, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU ) # collect lines of a minimal length sliced = threshed.copy() for (n, row) in enumerate(sliced): for (val, start, length) in zip(*findRuns(row)): if val == 255: if length < minLength: row[start : start + length] = 0 if debug > 1: showImage(sliced if horizontal else sliced.T) # second blur, now stronger strongBlur = (21, 11) if horizontal else (11, 21) blurred = cv2.GaussianBlur(sliced, strongBlur, 0, 0) (th, threshed) = cv2.threshold( blurred, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU ) if debug > 1: showImage(threshed if horizontal else threshed.T) # collect lines of a certain length, longer than before lines = collections.defaultdict(set) for (n, row) in enumerate(threshed): for (val, start, length) in zip(*findRuns(row)): if val == 255: if length >= afterLength: lines[n] |= set(range(start, start + length)) # cluster lines in bins corresponding to their constant coordinates: # horizontal lines are clustered in bins on their y coordinate. # vertical lines are clustered in bins on their x coordinate. bins = [] for n in sorted(lines): found = False for (i, (b, e)) in enumerate(bins): if b - 3 <= n <= e + 3: if n < b: bins[i][0] = n if n > e: bins[i][1] = n found = True break if not found: bins.append([n, n]) # combine the segments of all lines that are in the same bin stretches = {} for (b, e) in bins: middle = int((b + e) // 2) thickness = int((abs(e - b) + 1) // 2) if thickness <= 1: continue theseStretches = set() for n in range(b, e): if n in lines: theseStretches |= lines[n] segments = [] for (m1, m2) in rangesFromSet(theseStretches): segments.append((m1, m2 + 1, thickness)) stretches[middle] = segments for (n, segments) in sorted(stretches.items()): for (f, t, half) in segments: info(f"{label} @ {n:>4} thick={half:>2} from {f:>4} to {t:>4}", tm=False) if not batch: cv2.rectangle(out, (f, n - half - 2), (t, n + half + 2), strokeColor, 3) if not batch: stages["layout"] = out if horizontal else cv2.transpose(out) if not batch and debug > 1: showImage(stages["layout"]) return stretches
def getStripes(stages, stretchesV)
-
Infer horizontal stripes from a set of vertical bars.
A vertical bar defines a stripe on the page, i.e. a horizontal band that contains that bar.
Between the vertical bars there are also stripes, they are undivided stripes.
We assume the vertical bars split the page in two portions, and not more, and that they occur more or less in the middle of the page.
If many vertical bars have been detected, we sort them by y1 ascending and then y2 descending and then by x.
We filter the bars: if the last bar reached to y = height, we only consider bars that start lower than height.
Fine tuning needed later on
The vertical strokes give a rough estimate: it is possible that they start and end in the middle of the lines beside them. We will need histograms for the fine tuning.
Parameters
stages
:dict
- We need access to the normalized stage to get the page size.
stretchesV
:dict
- Vertical line segments per x-coordinate, as delivered by
getStretches()
.
Returns
list
- A list of stripes, specified as (x, y1, y2) values,
where the y-coordinates y1 and y2 specify the vertical extent of the stripe,
and x is the x coordinate of the dividing vertical stroke if there is one
and
None
otherwise.
Expand source code Browse git
def getStripes(stages, stretchesV): """Infer horizontal stripes from a set of vertical bars. A vertical bar defines a stripe on the page, i.e. a horizontal band that contains that bar. Between the vertical bars there are also stripes, they are undivided stripes. We assume the vertical bars split the page in two portions, and not more, and that they occur more or less in the middle of the page. If many vertical bars have been detected, we sort them by y1 ascending and then y2 descending and then by x. We filter the bars: if the last bar reached to y = height, we only consider bars that start lower than height. !!! note "Fine tuning needed later on" The vertical strokes give a rough estimate: it is possible that they start and end in the middle of the lines beside them. We will need histograms for the fine tuning. Parameters ---------- stages: dict We need access to the normalized stage to get the page size. stretchesV: dict Vertical line segments per x-coordinate, as delivered by `getStretches`. Returns ------- list A list of stripes, specified as (x, y1, y2) values, where the y-coordinates y1 and y2 specify the vertical extent of the stripe, and x is the x coordinate of the dividing vertical stroke if there is one and `None` otherwise. """ normalized = stages["normalized"] (maxH, maxW) = normalized.shape[0:2] lastHeight = 0 segments = [] for (x, ys) in stretchesV.items(): for (y1, y2, thickness) in ys: segments.append((y1, y2, x, thickness)) stripes = [] for (y1, y2, x, thickness) in sorted( segments, key=lambda z: (z[0], -z[1], -z[3], -z[2] or -1) ): if y1 > lastHeight: stripes.append((None, lastHeight, y1)) stripes.append((x, y1, y2)) lastHeight = y2 if lastHeight < maxH: stripes.append((None, lastHeight, maxH)) return stripes
def grayInterBlocks(C, stages, blocks)
-
Overlay the space between blocks with gray.
Remove also the empty blocks from the block list.
Parameters
C
:object
- Configuration settings
stages
:dict
- We need access to several intermediate results.
blocks
:dict
- The blocks as delivered by
getBlocks()
. The blocks dict will be updated: empty blocks will be deleted from it. with the band data.
Returns
None.
Expand source code Browse git
def grayInterBlocks(C, stages, blocks): """Overlay the space between blocks with gray. Remove also the empty blocks from the block list. Parameters ---------- C: object Configuration settings stages: dict We need access to several intermediate results. blocks: dict The blocks as delivered by `getBlocks`. The blocks dict will be updated: empty blocks will be deleted from it. with the band data. Returns ------- None. """ mColor = C.marginRGB white = C.whiteRGB layout = stages["layout"] (maxH, maxW) = layout.shape[0:2] prevBB = [0, 0] prevX = None maxStripe = max(x[0] for x in blocks) marginX = C.blockMarginX # overlay the space between blocks for ((stripe, block), data) in sorted(blocks.items()): bT = data["box"][1] bB = data["box"][3] x = data["sep"] if block == "": if prevX is None: pB = prevBB[0] overlay(layout, marginX, pB, maxW - marginX, bT, white, mColor) else: for (i, pB) in enumerate(prevBB): if pB < bT: (lf, rt) = ( (marginX, prevX - marginX) if i == 0 else (prevX + marginX, maxW - marginX) ) overlay(layout, lf, pB, rt, bT, white, mColor) prevBB = [bB, bB] prevX = None elif block == "l": pB = prevBB[0] if pB < bT: overlay(layout, marginX, pB, x - marginX, bT, white, mColor) prevBB[0] = bB prevX = x elif block == "r": pB = prevBB[1] if pB < bT: overlay(layout, x + marginX, pB, maxW - marginX, bT, white, mColor) prevBB[1] = bB prevX = x if stripe == maxStripe: if block == "": if bB < maxH: overlay(layout, marginX, bB, maxW - marginX, maxH, white, mColor) elif block == "l": if bB < maxH: overlay(layout, marginX, bB, x - marginX, maxH, white, mColor) elif block == "r": if bB < maxH: overlay( layout, bB, maxW - marginX, maxH, white, x + marginX, mColor )