|
import itertools |
|
import numpy as np |
|
from typing import List, Set |
|
from collections import Counter |
|
import networkx as nx |
|
from shapely.geometry import Polygon |
|
|
|
from ..utils import TextBlock, Quadrilateral, quadrilateral_can_merge_region |
|
|
|
def split_text_region( |
|
bboxes: List[Quadrilateral], |
|
connected_region_indices: Set[int], |
|
width, |
|
height, |
|
gamma = 0.5, |
|
sigma = 2 |
|
) -> List[Set[int]]: |
|
|
|
connected_region_indices = list(connected_region_indices) |
|
|
|
|
|
if len(connected_region_indices) == 1: |
|
return [set(connected_region_indices)] |
|
|
|
|
|
if len(connected_region_indices) == 2: |
|
fs1 = bboxes[connected_region_indices[0]].font_size |
|
fs2 = bboxes[connected_region_indices[1]].font_size |
|
fs = max(fs1, fs2) |
|
|
|
|
|
|
|
|
|
|
|
if bboxes[connected_region_indices[0]].distance(bboxes[connected_region_indices[1]]) < (1 + gamma) * fs \ |
|
and abs(bboxes[connected_region_indices[0]].angle - bboxes[connected_region_indices[1]].angle) < 0.2 * np.pi: |
|
return [set(connected_region_indices)] |
|
else: |
|
return [set([connected_region_indices[0]]), set([connected_region_indices[1]])] |
|
|
|
|
|
G = nx.Graph() |
|
for idx in connected_region_indices: |
|
G.add_node(idx) |
|
for (u, v) in itertools.combinations(connected_region_indices, 2): |
|
G.add_edge(u, v, weight=bboxes[u].distance(bboxes[v])) |
|
|
|
edges = nx.algorithms.tree.minimum_spanning_edges(G, algorithm='kruskal', data=True) |
|
edges = sorted(edges, key=lambda a: a[2]['weight'], reverse=True) |
|
distances_sorted = [a[2]['weight'] for a in edges] |
|
fontsize = np.mean([bboxes[idx].font_size for idx in connected_region_indices]) |
|
distances_std = np.std(distances_sorted) |
|
distances_mean = np.mean(distances_sorted) |
|
std_threshold = max(0.3 * fontsize + 5, 5) |
|
|
|
b1, b2 = bboxes[edges[0][0]], bboxes[edges[0][1]] |
|
max_poly_distance = Polygon(b1.pts).distance(Polygon(b2.pts)) |
|
max_centroid_alignment = min(abs(b1.centroid[0] - b2.centroid[0]), abs(b1.centroid[1] - b2.centroid[1])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (distances_sorted[0] <= distances_mean + distances_std * sigma \ |
|
or distances_sorted[0] <= fontsize * (1 + gamma)) \ |
|
and (distances_std < std_threshold \ |
|
or max_poly_distance == 0 and max_centroid_alignment < 5): |
|
return [set(connected_region_indices)] |
|
else: |
|
|
|
|
|
G = nx.Graph() |
|
for idx in connected_region_indices: |
|
G.add_node(idx) |
|
|
|
for edge in edges[1:]: |
|
G.add_edge(edge[0], edge[1]) |
|
ans = [] |
|
for node_set in nx.algorithms.components.connected_components(G): |
|
ans.extend(split_text_region(bboxes, node_set, width, height)) |
|
return ans |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def merge_bboxes_text_region(bboxes: List[Quadrilateral], width, height): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
G = nx.Graph() |
|
for i, box in enumerate(bboxes): |
|
G.add_node(i, box=box) |
|
|
|
for ((u, ubox), (v, vbox)) in itertools.combinations(enumerate(bboxes), 2): |
|
|
|
if quadrilateral_can_merge_region(ubox, vbox, aspect_ratio_tol=1.3, font_size_ratio_tol=2, |
|
char_gap_tolerance=1, char_gap_tolerance2=3): |
|
G.add_edge(u, v) |
|
|
|
|
|
region_indices: List[Set[int]] = [] |
|
for node_set in nx.algorithms.components.connected_components(G): |
|
region_indices.extend(split_text_region(bboxes, node_set, width, height)) |
|
|
|
|
|
for node_set in region_indices: |
|
|
|
nodes = list(node_set) |
|
txtlns: List[Quadrilateral] = np.array(bboxes)[nodes] |
|
|
|
|
|
fg_r = round(np.mean([box.fg_r for box in txtlns])) |
|
fg_g = round(np.mean([box.fg_g for box in txtlns])) |
|
fg_b = round(np.mean([box.fg_b for box in txtlns])) |
|
bg_r = round(np.mean([box.bg_r for box in txtlns])) |
|
bg_g = round(np.mean([box.bg_g for box in txtlns])) |
|
bg_b = round(np.mean([box.bg_b for box in txtlns])) |
|
|
|
|
|
dirs = [box.direction for box in txtlns] |
|
majority_dir_top_2 = Counter(dirs).most_common(2) |
|
if len(majority_dir_top_2) == 1 : |
|
majority_dir = majority_dir_top_2[0][0] |
|
elif majority_dir_top_2[0][1] == majority_dir_top_2[1][1] : |
|
max_aspect_ratio = -100 |
|
for box in txtlns : |
|
if box.aspect_ratio > max_aspect_ratio : |
|
max_aspect_ratio = box.aspect_ratio |
|
majority_dir = box.direction |
|
if 1.0 / box.aspect_ratio > max_aspect_ratio : |
|
max_aspect_ratio = 1.0 / box.aspect_ratio |
|
majority_dir = box.direction |
|
else : |
|
majority_dir = majority_dir_top_2[0][0] |
|
|
|
|
|
if majority_dir == 'h': |
|
nodes = sorted(nodes, key=lambda x: bboxes[x].centroid[1]) |
|
elif majority_dir == 'v': |
|
nodes = sorted(nodes, key=lambda x: -bboxes[x].centroid[0]) |
|
txtlns = np.array(bboxes)[nodes] |
|
|
|
|
|
yield txtlns, (fg_r, fg_g, fg_b), (bg_r, bg_g, bg_b) |
|
|
|
async def dispatch(textlines: List[Quadrilateral], width: int, height: int, verbose: bool = False) -> List[TextBlock]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_regions: List[TextBlock] = [] |
|
for (txtlns, fg_color, bg_color) in merge_bboxes_text_region(textlines, width, height): |
|
total_logprobs = 0 |
|
for txtln in txtlns: |
|
total_logprobs += np.log(txtln.prob) * txtln.area |
|
total_logprobs /= sum([txtln.area for txtln in textlines]) |
|
|
|
font_size = int(min([txtln.font_size for txtln in txtlns])) |
|
angle = np.rad2deg(np.mean([txtln.angle for txtln in txtlns])) - 90 |
|
if abs(angle) < 3: |
|
angle = 0 |
|
lines = [txtln.pts for txtln in txtlns] |
|
texts = [txtln.text for txtln in txtlns] |
|
region = TextBlock(lines, texts, font_size=font_size, angle=angle, prob=np.exp(total_logprobs), |
|
fg_color=fg_color, bg_color=bg_color) |
|
text_regions.append(region) |
|
return text_regions |
|
|