Spaces:
Sleeping
Sleeping
| # References | |
| # https://sashamaps.net/docs/resources/20-colors/ | |
| import numpy as np | |
| import cv2 | |
| from scipy import ndimage as ndi | |
| from PIL import Image, ImageDraw, ImageCms, ExifTags, ImageEnhance | |
| import requests | |
| from pathlib import Path | |
| import pandas as pd | |
| from scipy.sparse import coo_matrix | |
| from skimage.feature import peak_local_max | |
| from skimage.morphology import local_maxima | |
| from skimage.segmentation import watershed | |
| from moviepy.video.io.bindings import mplfig_to_npimage | |
| import io | |
| import os | |
| from enum import Enum | |
| COLORS = ( | |
| (230, 25, 75), | |
| (60, 180, 75), | |
| (255, 255, 25), | |
| (0, 130, 200), | |
| (245, 130, 48), | |
| (145, 30, 180), | |
| (70, 240, 250), | |
| (240, 50, 230), | |
| (210, 255, 60), | |
| (250, 190, 212), | |
| (0, 128, 128), | |
| (220, 190, 255), | |
| (170, 110, 40), | |
| (255, 250, 200), | |
| (128, 0, 0), | |
| (170, 255, 195), | |
| (128, 128, 0), | |
| (255, 215, 180), | |
| (0, 0, 128), | |
| (128, 128, 128), | |
| ) | |
| class PC_TYPE(Enum): | |
| HARRIS = 1 | |
| EDGES_CONTOURS = 2 | |
| GFTT = 3 | |
| FAST = 4 | |
| KAZE = 5 | |
| def _to_2d(img): | |
| # it use just first channel. if you want rgb2gray, use _to_grayscale | |
| if img.ndim == 3: | |
| return img[:, :, 0] | |
| else: | |
| return img | |
| def _to_3d(img): | |
| if img.ndim == 2: | |
| return np.dstack([img, img, img]) | |
| else: | |
| return img | |
| def _to_byte(img: Image, format) -> bytes: | |
| # BytesIO is a file-like buffer stored in memory | |
| imgByteArr = io.BytesIO() | |
| # image.save expects a file-like as a argument | |
| img.save(imgByteArr, format=format) | |
| # Turn the BytesIO object back into a bytes object | |
| imgByteArr = imgByteArr.getvalue() | |
| return imgByteArr | |
| def _get_width_and_height(img): | |
| if img.ndim == 2: | |
| h, w = img.shape | |
| else: | |
| h, w, _ = img.shape | |
| return w, h | |
| def _get_resolution(img): | |
| w, h = _get_width_and_height(img) | |
| res = w * h | |
| return res | |
| def _to_pil(img): | |
| if not isinstance(img, Image.Image): | |
| img = Image.fromarray(img, mode="RGB") | |
| return img | |
| def _to_array(img): | |
| img = np.array(img) | |
| return img | |
| def _bool_to_uint8(img): | |
| uint8 = img.astype("uint8") | |
| if ( | |
| np.array_equal(np.unique(uint8), np.array([0, 1])) | |
| or np.array_equal(np.unique(uint8), np.array([0])) | |
| or np.array_equal(np.unique(uint8), np.array([1])) | |
| ): | |
| return uint8 * 255 | |
| else: | |
| return uint8 | |
| def _figure_to_array(fig): | |
| arr = mplfig_to_npimage(fig) | |
| return arr | |
| def _preprocess_image(img): | |
| if img.dtype == "int32": | |
| img = _repaint_segmentation_map(img) | |
| if img.dtype == "bool": | |
| img = img.astype("uint8") * 255 | |
| if img.ndim == 2: | |
| if ( | |
| np.array_equal(np.unique(img), np.array([0, 255])) | |
| or np.array_equal(np.unique(img), np.array([0])) | |
| or np.array_equal(np.unique(img), np.array([255])) | |
| ): | |
| img = _to_3d(img) | |
| else: | |
| img = _apply_jet_colormap(img) | |
| return img | |
| def _blend_two_images(img1, img2, alpha=0.5): | |
| img1 = _to_pil(img1) | |
| img2 = _to_pil(img2) | |
| img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha) | |
| return _to_array(img_blended) | |
| def _repaint_segmentation_map(seg_map): | |
| canvas_r = _get_canvas_same_size_as_image(seg_map, black=True) | |
| canvas_g = _get_canvas_same_size_as_image(seg_map, black=True) | |
| canvas_b = _get_canvas_same_size_as_image(seg_map, black=True) | |
| remainder_map = seg_map % len(COLORS) + 1 | |
| for remainder, (r, g, b) in enumerate(COLORS, start=1): | |
| canvas_r[remainder_map == remainder] = r | |
| canvas_g[remainder_map == remainder] = g | |
| canvas_b[remainder_map == remainder] = b | |
| canvas_r[seg_map == 0] = 0 | |
| canvas_g[seg_map == 0] = 0 | |
| canvas_b[seg_map == 0] = 0 | |
| dstacked = np.dstack([canvas_r, canvas_g, canvas_b]) | |
| return dstacked | |
| def _get_canvas_same_size_as_image(img, black=False): | |
| if black: | |
| return np.zeros_like(img).astype("uint8") | |
| else: | |
| return (np.ones_like(img) * 255).astype("uint8") | |
| def _get_canvas(w, h, black=False): | |
| if black: | |
| return np.zeros((h, w, 3)).astype("uint8") | |
| else: | |
| return (np.ones((h, w, 3)) * 255).astype("uint8") | |
| def _invert_image(mask): | |
| return cv2.bitwise_not(mask.astype("uint8")) | |
| def _to_grayscale(img): | |
| gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY) | |
| return gray_img | |
| def _erode_mask(mask, kernel_size=3): | |
| kernel = cv2.getStructuringElement( | |
| shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size) | |
| ) | |
| if mask.dtype == "bool": | |
| mask = mask.astype("uint8") * 255 | |
| mask = cv2.erode(src=mask, kernel=kernel) | |
| return mask | |
| def _dilate_mask(mask, kernel_size=3): | |
| if kernel_size == 0: | |
| return mask | |
| kernel = cv2.getStructuringElement( | |
| shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size) | |
| ) | |
| if mask.dtype == "bool": | |
| mask = mask.astype("uint8") * 255 | |
| mask = cv2.dilate(src=mask, kernel=kernel) | |
| return mask | |
| def _gaussian_blur_mask(mask, kernel_size=5): | |
| blurred_mask = cv2.GaussianBlur( | |
| src=mask, ksize=(kernel_size, kernel_size), sigmaX=0 | |
| ) | |
| # mask = (blurred_mask >= 32).astype("uint8") * 255 | |
| mask = (blurred_mask != 0).astype("uint8") * 255 | |
| return mask | |
| def _blur(img, v=0.04): | |
| w, h = _get_width_and_height(img) | |
| kernel_size = round(min(w, h) * v) | |
| bl = cv2.GaussianBlur( | |
| src=img.copy(order="C"), | |
| ksize=(kernel_size // 2 * 2 + 1, kernel_size // 2 * 2 + 1), | |
| sigmaX=0, | |
| ) | |
| return bl | |
| def _get_adaptive_thresholded_image(img, invert=False, block_size=3): | |
| gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY) | |
| thrsh_type = cv2.THRESH_BINARY if not invert else cv2.THRESH_BINARY_INV | |
| img_thr = cv2.adaptiveThreshold( | |
| src=gray_img, | |
| maxValue=255, | |
| adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, | |
| thresholdType=thrsh_type, | |
| blockSize=block_size, | |
| C=0, | |
| ) | |
| return img_thr | |
| def _make_segmentation_map_rectangle(seg_map): | |
| seg_map_copied = seg_map.copy(order="C") | |
| for idx in range(1, np.max(seg_map_copied) + 1): | |
| seg_map_sub = seg_map_copied == idx | |
| nonzero_x = np.where((seg_map_sub != 0).any(axis=0))[0] | |
| nonzero_y = np.where((seg_map_sub != 0).any(axis=1))[0] | |
| if nonzero_x.size != 0 and nonzero_y.size != 0: | |
| seg_map_copied[ | |
| nonzero_y[0] : nonzero_y[-1], nonzero_x[0] : nonzero_x[-1] | |
| ] = idx | |
| return seg_map_copied | |
| def _apply_jet_colormap(img): | |
| img_jet = cv2.applyColorMap(src=(255 - img), colormap=cv2.COLORMAP_JET) | |
| return img_jet | |
| def _reverse_jet_colormap(img): | |
| gray_values = np.arange(256, dtype=np.uint8) | |
| color_values = list(map(tuple, _apply_jet_colormap(gray_values).reshape(256, 3))) | |
| color_to_gray_map = dict(zip(color_values, gray_values)) | |
| out = np.apply_along_axis( | |
| lambda bgr: color_to_gray_map[tuple(bgr)], axis=2, arr=img | |
| ) | |
| return out | |
| def _get_pixel_counts(arr, sort=False, include_zero=False): | |
| unique, cnts = np.unique(arr, return_counts=True) | |
| idx2cnt = dict(zip(unique, cnts)) | |
| if not include_zero: | |
| if 0 in idx2cnt: | |
| idx2cnt.pop(0) | |
| if not sort: | |
| return idx2cnt | |
| else: | |
| return dict(sorted(idx2cnt.items(), key=lambda x: x[1], reverse=True)) | |
| def _combine_masks(masks): | |
| canvas = _get_canvas_same_size_as_image(img=masks[0], black=True) | |
| for mask in masks: | |
| canvas = np.maximum(_to_3d(canvas), _to_3d(mask)) | |
| return canvas | |
| def _get_local_maxima_coordinates(region_score_map, region_seg_map=None, th=150): | |
| # `src_lang="ja"`일 때 `150`이 더 잘 작동함. | |
| if region_seg_map is None: | |
| _, region_mask = cv2.threshold( | |
| src=region_score_map, thresh=th, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| _, region_seg_map = cv2.connectedComponents(image=region_mask, connectivity=4) | |
| local_max = peak_local_max( | |
| image=region_score_map, | |
| min_distance=5, | |
| labels=region_seg_map, | |
| num_peaks_per_label=24, | |
| ) | |
| local_max = local_max[:, ::-1] # yx to xy | |
| return local_max | |
| def _get_local_maxima_array(region_score_map, region_seg_map=None, th=150): | |
| local_max_coor = _get_local_maxima_coordinates( | |
| region_score_map, region_seg_map=None, th=th | |
| ) | |
| _, h = _get_width_and_height(local_max_coor) | |
| vals = np.array([1] * h) | |
| rows = local_max_coor[:, 1] | |
| cols = local_max_coor[:, 0] | |
| local_max = ( | |
| coo_matrix((vals, (rows, cols)), shape=region_score_map.shape) | |
| .toarray() | |
| .astype("bool") | |
| ) | |
| return local_max | |
| def _mask_image(img, mask, invert=False): | |
| """img에서 mask 영역에 해당하는 부분만 추출 | |
| Args: | |
| img (_PIL or np.ndarray_): 이미지 | |
| mask (_PIL or np.ndarray_): 마스크 (H,W,C)일경우 흑백으로 변환 후 or (H,W) | |
| invert (bool, optional): invert_mask로 추출할지. | |
| Returns: | |
| _np.ndarray_: 결과 이미지 | |
| """ | |
| img = _to_array(img) | |
| mask = _to_2d(_to_array(mask)) | |
| if invert: | |
| mask = _invert_image(mask) | |
| return cv2.bitwise_and(src1=img, src2=img, mask=mask.astype("uint8")) | |
| def _ignore_small_regions_in_mask(mask, area_thresh=10): | |
| mask = _to_2d(mask) | |
| _, seg_map, stats, _ = cv2.connectedComponentsWithStats( | |
| mask.astype("uint8"), connectivity=4 | |
| ) | |
| bool = np.isin(seg_map, np.where(stats[:, cv2.CC_STAT_AREA] >= area_thresh)[0][1:]) | |
| new_mask = bool.astype("uint8") * 255 | |
| new_mask = _to_3d(new_mask) | |
| return new_mask | |
| def _crop_image(img, l, t, r, b): | |
| w, h = _get_width_and_height(img) | |
| return img[ | |
| int(max(0, t)) : int(min(h, b)), | |
| int(max(0, l)) : int(min(w, r)), | |
| ..., | |
| ] | |
| def _bboxes_to_mask(img, bboxes): | |
| canvas = _get_canvas_same_size_as_image(img=img, black=True) | |
| for row in bboxes.itertuples(): | |
| canvas[row.bbox_y1 : row.bbox_y2, row.bbox_x1 : row.bbox_x2] = 255 | |
| return _to_3d(canvas) | |
| def _apply_watershed(mask, region_score_map, th=150): | |
| local_max_arr = _get_local_maxima_array(region_score_map, th=th) | |
| _, markers = cv2.connectedComponents( | |
| image=local_max_arr.astype("uint8"), connectivity=4 | |
| ) | |
| seg_map = watershed(image=-region_score_map, markers=markers, mask=_to_2d(mask)) | |
| return seg_map | |
| def _perform_watershed(score_map, score_thresh=80): | |
| trimmed_score_map = score_map.copy() | |
| trimmed_score_map[trimmed_score_map < 190] = 0 | |
| markers = local_maxima(image=trimmed_score_map, allow_borders=False) | |
| _, markers = cv2.connectedComponents(image=markers.astype("int8"), connectivity=8) | |
| _, region_mask = cv2.threshold( | |
| src=score_map, thresh=score_thresh, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| watersheded = watershed(image=-score_map, markers=markers, mask=_to_2d(region_mask)) | |
| return watersheded | |
| def _get_region_segmentation_map(region_score_map, region_thresh=30): | |
| _, region_mask = cv2.threshold( | |
| src=region_score_map, thresh=region_thresh, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| region_seg_map = _apply_watershed( | |
| region_score_map=region_score_map, mask=region_mask | |
| ) | |
| return region_seg_map | |
| def _combine_two_segmentation_maps(seg_map1, seg_map2): | |
| seg_map = seg_map1 + _mask_image( | |
| img=seg_map2 + len(np.unique(seg_map1)) - 1, mask=(seg_map2 != 0) | |
| ) | |
| px_cnts = _get_pixel_counts(seg_map, sort=True, include_zero=True) | |
| seg_map = _mask_image(img=seg_map, mask=(seg_map != list(px_cnts)[0])) | |
| return seg_map | |
| def _get_image_segmentation_map(img, region_score_map=None, block_size=3): | |
| if region_score_map is not None: | |
| _, region_mask = cv2.threshold( | |
| src=region_score_map, thresh=20, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| region_mask = _dilate_mask(img=region_mask, kernel_size=16) | |
| img_masked = _mask_image(img=img, mask=region_mask) | |
| else: | |
| img_masked = img | |
| img_thr1 = _get_adaptive_thresholded_image( | |
| img=img_masked, invert=False, block_size=block_size | |
| ) | |
| img_thr2 = _get_adaptive_thresholded_image( | |
| img=img_masked, invert=True, block_size=block_size | |
| ) | |
| _, seg_map1 = cv2.connectedComponents(image=img_thr1, connectivity=4) | |
| _, seg_map2 = cv2.connectedComponents(image=img_thr2, connectivity=4) | |
| seg_map = _combine_two_segmentation_maps(seg_map1=seg_map1, seg_map2=seg_map2) | |
| return seg_map | |
| def _get_segmentation_map_overlapping_mask(seg_map, mask, overlap_thresh=0.6): | |
| img_pixel_counts = _get_pixel_counts(seg_map, sort=True, include_zero=False) | |
| overlapping_seg_map = _mask_image(img=seg_map, mask=(mask != 0)) | |
| overlapping_counts = _get_pixel_counts( | |
| overlapping_seg_map, sort=False, include_zero=False | |
| ) | |
| df_counts = pd.DataFrame.from_dict( | |
| img_pixel_counts, orient="index", columns=["total_pixel_count"] | |
| ) | |
| df_counts["overlap_pixel_count"] = df_counts.apply( | |
| lambda x: overlapping_counts.get(x.name, 0), axis=1 | |
| ) | |
| df_counts["ratio"] = ( | |
| df_counts["overlap_pixel_count"] / df_counts["total_pixel_count"] | |
| ) | |
| region_is_inside = df_counts[df_counts["ratio"] > overlap_thresh].index.tolist() | |
| mask = np.isin(seg_map, region_is_inside).astype("uint8") | |
| mask = _to_3d(mask * 255) | |
| return mask | |
| def _split_segmentation_map(seg_map, pccs): | |
| ls_idx = ( | |
| pccs[pccs["inside"]] | |
| .apply(lambda x: seg_map[x["y"], x["x"]], axis=1) | |
| .values.tolist() | |
| ) | |
| seg_map1 = _mask_image(img=seg_map, mask=np.isin(seg_map, ls_idx)) | |
| seg_map2 = _mask_image(img=seg_map, mask=~np.isin(seg_map, ls_idx)) | |
| return seg_map1, seg_map2 | |
| def _segmentation_map_to_mask(seg_map): | |
| return _to_3d((seg_map != 0).astype("uint8") * 255) | |
| def _get_pseudo_character_centers_from_mask(mask, bboxes: pd.DataFrame = None): | |
| """Mask 이미지로부터 label(글자)의 중심 좌표를 구하는 함수""" | |
| center_coords = [] | |
| num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( | |
| image=_to_2d(mask), connectivity=8 | |
| ) | |
| for i in range(1, num_labels): | |
| center_coords.append((int(centroids[i][0]), int(centroids[i][1]))) | |
| pccs = pd.DataFrame( | |
| center_coords, | |
| columns=[ | |
| "x", | |
| "y", | |
| ], | |
| ) | |
| if not bboxes.empty: | |
| # 벡터화 연산으로 bbox 안에 있는지 검사 | |
| pccs["inside"] = ( | |
| (pccs["x"].values[:, None] > bboxes["bbox_x1"].values) & | |
| (pccs["x"].values[:, None] < bboxes["bbox_x2"].values) & | |
| (pccs["y"].values[:, None] > bboxes["bbox_y1"].values) & | |
| (pccs["y"].values[:, None] < bboxes["bbox_y2"].values) | |
| ).any(axis=1) | |
| else: | |
| pccs["inside"] = True | |
| return pccs | |
| def _get_pseudo_character_centers( | |
| region_score_map, region_seg_map=None, bboxes=pd.DataFrame() | |
| ): | |
| local_max_coor = _get_local_maxima_coordinates( | |
| region_score_map, region_seg_map=region_seg_map | |
| ) | |
| pccs = pd.DataFrame(local_max_coor, columns=["x", "y"]) | |
| if not bboxes.empty: | |
| # 벡터화 연산으로 bbox 안에 있는지 검사 | |
| pccs["inside"] = ( | |
| (pccs["x"].values[:, None] > bboxes["bbox_x1"].values) & | |
| (pccs["x"].values[:, None] < bboxes["bbox_x2"].values) & | |
| (pccs["y"].values[:, None] > bboxes["bbox_y1"].values) & | |
| (pccs["y"].values[:, None] < bboxes["bbox_y2"].values) | |
| ).any(axis=1) | |
| else: | |
| pccs["inside"] = True | |
| return pccs | |
| def _convert_region_score_map_to_region_mask(region_score_map, region_score_thresh=170): | |
| _, region_mask = cv2.threshold( | |
| src=region_score_map, thresh=30, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| new_mask = _get_canvas_same_size_as_image(img=region_mask, black=True) | |
| n_labels, seg_map, _, _ = cv2.connectedComponentsWithStats( | |
| image=_to_2d(region_mask), connectivity=4 | |
| ) | |
| for k in range(1, n_labels): | |
| if np.max(region_score_map[seg_map == k]) < region_score_thresh: | |
| continue | |
| new_mask[seg_map == k] = 255 | |
| new_mask = _to_3d(new_mask) | |
| return new_mask | |
| def _split_mask(mask, region_score_map=None, bboxes=pd.DataFrame(), th=30): | |
| """mask를 두 종류로 나눕니다. 각각 inpainting과정에서 지워야할 mask와 복구해야할 mask 영역을 의미합니다. | |
| mask1과 mask2는 서로 겹칠수도 있습니다. | |
| 동작원리 : region_score_map(이 안주어질 경우 dst_mask_map)을 th로 이진화 및 segmap으로 변형(Connected components)후 | |
| label영역 별 Local maximum 포인트를 watershed의 marker로 여겨 watershed를 진행한 결과를 segmap으로 여기고, | |
| pccs를 peak_loacl_max(skimage)함수로 region_scoremap과 segmap을 이용해 구한다. 이때 bbox정보도 포함시켜, 각 pccs가 box안에 들어 오는지 확인한 후 | |
| bbox안에 있는 pccs에 대해 각 pccs가 속한 segmap의 label영역(seg_map1)과 속하지 못한 label 영역(seg_map2)로 나눈다. | |
| Args: | |
| mask (_np.ndarray_): (H,W,3)의 mask. values : (0 or 255) | |
| region_score_map (_np.ndarray_): region_score_map, craft의 결과. 글의 중심을 강조하는 Heat map | |
| bboxes (_pd.DataFrame_): 박스 좌표정보(bbox_x1,bbox_y1,bbox_x2,bbox_y2)가 포함된 dataFrame. | |
| Returns: | |
| _np.ndarray_: 지워야 하는 부분인 mask1. 복구해야 하는 부분인 mask2. | |
| """ | |
| if region_score_map is None: | |
| dst_mask_map = _to_2d(get_dst_mask(mask)) | |
| seg_map = _apply_watershed(mask=mask, region_score_map=dst_mask_map, th=th) | |
| pccs = _get_pseudo_character_centers( | |
| region_score_map=dst_mask_map, region_seg_map=seg_map, bboxes=bboxes | |
| ) | |
| else: | |
| seg_map = _apply_watershed(mask, region_score_map, th=th) | |
| pccs = _get_pseudo_character_centers( | |
| region_score_map=region_score_map, region_seg_map=seg_map, bboxes=bboxes | |
| ) | |
| box_mask = _bboxes_to_mask(seg_map, bboxes) | |
| seg_map1, seg_map2 = _split_segmentation_map(seg_map=seg_map, pccs=pccs) | |
| mask1 = _segmentation_map_to_mask(seg_map1) | |
| mask2 = _segmentation_map_to_mask(seg_map2) | |
| mask3 = _to_3d(_mask_image(mask1, box_mask, invert=True)) | |
| mask2 = _combine_masks([mask2, mask3]) | |
| return mask1, mask2 | |
| def get_word_segmentation_map(region_score_map, affinity_score_map): | |
| _, region_mask = cv2.threshold( | |
| src=region_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| _, affinity_mask = cv2.threshold( | |
| src=affinity_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| word_mask = region_mask + affinity_mask | |
| _, segmentation_map_word = cv2.connectedComponents(image=word_mask, connectivity=4) | |
| return segmentation_map_word | |
| def get_line_segmentation_map(line_score_map): | |
| _, line_mask = cv2.threshold( | |
| src=line_score_map, thresh=130, maxval=255, type=cv2.THRESH_BINARY | |
| ) | |
| _, line_segmentation_map = cv2.connectedComponents(image=line_mask, connectivity=4) | |
| return line_segmentation_map | |
| def _get_3d_block_segmentation_map(img, bboxes): | |
| segmentation_map_block = np.zeros( | |
| shape=(img.shape[0], img.shape[1], len(bboxes) + 1) | |
| ) | |
| for idx, (xmin, ymin, xmax, ymax) in enumerate( | |
| bboxes[["xmin", "ymin", "xmax", "ymax"]].values, start=1 | |
| ): | |
| segmentation_map_block[ymin:ymax, xmin:xmax, idx] = 255 | |
| return segmentation_map_block | |
| def compare_images(img1, img2, flag=cv2.CMP_EQ): | |
| # 두 이미지가 같은 영역을 255 아닌 영역을 0. flag는 cv2.CMP_XX참고(EQ==같으면1,NE==다르면1) | |
| return cv2.compare(img1, img2, flag) | |
| def convert_webp_png_get_data(img: np.ndarray): | |
| pil_img = _to_pil(img) | |
| convert_pil_img = pil_img.convert("RGB") | |
| convert_pil_img.save("temp.png") | |
| _, byte, format = load_image("temp.png", with_byte=True, with_format=True) | |
| os.remove("temp.png") | |
| return byte | |
| def add_water_mark(original_img, water_mark_img_path): | |
| if isinstance(original_img, np.ndarray): | |
| original_img = _to_pil(original_img) | |
| return_np = True | |
| else: | |
| return_np = False | |
| watermark = Image.open(water_mark_img_path).convert("RGBA") | |
| width_o, height_o = original_img.size | |
| width_wm, height_wm = watermark.size | |
| position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2) | |
| # 원본 이미지보다 크기가 작은 경우에만 워터마크 이미지를 비율에 맞게 조정 | |
| if width_wm > width_o or height_wm > height_o: | |
| # 워터마크 이미지의 가로 세로 비율 계산 | |
| ratio_w = width_o / width_wm | |
| ratio_h = height_o / height_wm | |
| # 더 작은 비율을 선택하여 워터마크 이미지를 조정 | |
| ratio = min(ratio_w, ratio_h) | |
| new_width = int(width_wm * ratio) | |
| new_height = int(height_wm * ratio) | |
| watermark = watermark.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| width_wm, height_wm = watermark.size | |
| # 새로 계산된 위치 | |
| position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2) | |
| original_img.paste(watermark, position, watermark) | |
| rgb_image = original_img.convert("RGB") | |
| if return_np: | |
| return _to_array(rgb_image) | |
| return rgb_image | |
| def load_image(url_or_path, with_byte=False, with_format=False): | |
| if "http" in url_or_path: | |
| url_or_path = str(url_or_path) | |
| response = requests.get(url_or_path) | |
| PIL_image = Image.open(io.BytesIO(response.content)) | |
| format = PIL_image.format | |
| image_bytes = response.content | |
| if format == "GIF": | |
| img_exif = None | |
| else: | |
| img_exif = PIL_image._getexif() | |
| if PIL_image.mode in ["L", "P", "PA", "RGBA"]: | |
| PIL_image = Image.open(io.BytesIO(response.content)).convert("RGB") | |
| if img_exif: | |
| for k in img_exif.keys(): | |
| attr = ExifTags.TAGS.get(k, "no_key") | |
| if attr != "no_key": | |
| if ExifTags.TAGS[k] == "Orientation": | |
| if img_exif[k] == 3: | |
| PIL_image = PIL_image.rotate(180, expand=True) | |
| elif img_exif[k] == 6: | |
| PIL_image = PIL_image.rotate(270, expand=True) | |
| elif img_exif[k] == 8: | |
| PIL_image = PIL_image.rotate(90, expand=True) | |
| break | |
| if PIL_image.mode == "CMYK": | |
| cmyk_profile = ImageCms.ImageCmsProfile("resources/USWebCoatedSWOP.icc") | |
| srgb_profile = ImageCms.ImageCmsProfile( | |
| "resources/sRGB Color Space Profile.icm" | |
| ) | |
| PIL_image = ImageCms.profileToProfile( | |
| PIL_image, cmyk_profile, srgb_profile, outputMode="RGB" | |
| ) | |
| img = np.array(PIL_image) | |
| else: | |
| img = np.array(PIL_image) | |
| else: | |
| # img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR) | |
| # img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB) | |
| PIL_image = Image.open(url_or_path) | |
| format = PIL_image.format | |
| byte_arr = io.BytesIO() | |
| if PIL_image.mode == "RGBA": | |
| PIL_image = PIL_image.convert("RGB") | |
| PIL_image.save(byte_arr, format="JPEG") | |
| image_bytes = byte_arr.getvalue() | |
| img = np.array(PIL_image) | |
| # if "http" in url_or_path: | |
| # img = cv2.imdecode( | |
| # np.asarray(bytearray(requests.get(url_or_path).content), dtype="uint8"), flags=cv2.IMREAD_COLOR | |
| # ) | |
| # else: | |
| # img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR) | |
| # img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB) | |
| if with_byte: | |
| if with_format: | |
| return img, image_bytes, format | |
| else: | |
| return img, image_bytes | |
| return img | |
| def save_image(img1, img2=None, alpha=0.5, path="") -> None: | |
| copied_img1 = _preprocess_image(_to_array(img1.copy(order="C"))) | |
| if img2 is None: | |
| img_arr = copied_img1 | |
| else: | |
| copied_img2 = _to_array(_preprocess_image(_to_array(img2.copy(order="C")))) | |
| img_arr = _to_array( | |
| _blend_two_images(img1=copied_img1, img2=copied_img2, alpha=alpha) | |
| ) | |
| path = Path(path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| if os.path.splitext(str(path))[1] == ".gif": | |
| pil = _to_pil(img1) | |
| pil.save(str(path)) | |
| return True | |
| if img_arr.ndim == 3: | |
| cv2.imwrite( | |
| filename=str(path), | |
| img=img_arr[:, :, ::-1], | |
| params=[cv2.IMWRITE_JPEG_QUALITY, 100], | |
| ) | |
| elif img_arr.ndim == 2: | |
| cv2.imwrite( | |
| filename=str(path), img=img_arr, params=[cv2.IMWRITE_JPEG_QUALITY, 100] | |
| ) | |
| def show_image(img1, img2=None, alpha=0.5): | |
| img1 = _to_pil(_preprocess_image(_to_array(img1))) | |
| if img2 is None: | |
| img1.show() | |
| else: | |
| img2 = _to_pil(_preprocess_image(_to_array(img2))) | |
| img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha) | |
| img_blended.show() | |
| def draw_bboxes(img, bboxes: pd.DataFrame, index=False): | |
| """속성추출전 원본 이미지와 bboxes정보를 가지고 이미지위에 bboxes를 시각화 해주는 함수.""" | |
| canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True)) | |
| draw = ImageDraw.Draw(canvas) | |
| dic = dict() | |
| for row in bboxes.itertuples(): | |
| h = row.bbox_y2 - row.bbox_y1 | |
| w = row.bbox_x2 - row.bbox_x1 | |
| smaller = min(w, h) | |
| thickness = max(1, smaller // 22) | |
| dic[row.Index] = ((0, 255, 0), (0, 100, 0), thickness) | |
| for row in bboxes.itertuples(): | |
| _, fill, thickness = dic[row.Index] | |
| draw.rectangle( | |
| xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
| outline=None, | |
| fill=fill, | |
| width=thickness, | |
| ) | |
| for row in bboxes.itertuples(): | |
| outline, _, thickness = dic[row.Index] | |
| draw.rectangle( | |
| xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
| outline=outline, | |
| fill=None, | |
| width=thickness, | |
| ) | |
| if index: | |
| from data_utils.rendering_utils import _get_font | |
| max_len = max(map(len, map(str, bboxes.index))) | |
| for row in bboxes.itertuples(): | |
| h = row.bbox_y2 - row.bbox_y1 | |
| w = row.bbox_x2 - row.bbox_x1 | |
| smaller = min(w, h) | |
| font_size = max(10, min(40, smaller // 4)) | |
| draw.text( | |
| xy=(row.bbox_x1, row.bbox_y1 - 4), | |
| text=str(row.Index).zfill(max_len), | |
| fill="white", | |
| stroke_fill="black", | |
| stroke_width=2, | |
| font=_get_font(lang="en", font_size=font_size), | |
| anchor="ls", | |
| ) | |
| return _blend_two_images(img1=canvas, img2=img, alpha=0.4) | |
| def visualize_clusters(img, bboxes, index=False): | |
| from data_utils.rendering_utils import _get_font | |
| canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True)) | |
| draw = ImageDraw.Draw(canvas) | |
| dic = dict() | |
| for row in bboxes.itertuples(): | |
| h = row.bbox_y2 - row.bbox_y1 | |
| w = row.bbox_x2 - row.bbox_x1 | |
| smaller = min(w, h) | |
| thickness = max(1, smaller // 22) | |
| dic[row.Index] = ((255, 255, 255), COLORS[row.cluster], thickness) | |
| for row in bboxes.itertuples(): | |
| _, fill, thickness = dic[row.Index] | |
| draw.rectangle( | |
| xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
| outline=None, | |
| fill=fill, | |
| width=1, | |
| ) | |
| for row in bboxes.itertuples(): | |
| outline, _, thickness = dic[row.Index] | |
| draw.rectangle( | |
| xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
| outline=outline, | |
| fill=None, | |
| width=1, | |
| ) | |
| if index: | |
| for row in bboxes.itertuples(): | |
| h = row.bbox_y2 - row.bbox_y1 | |
| w = row.bbox_x2 - row.bbox_x1 | |
| smaller = min(w, h) | |
| font_size = max(14, min(40, smaller * 0.35)) | |
| draw.text( | |
| xy=(row.bbox_x1, row.bbox_y1 - 4), | |
| text=str(row.cluster), | |
| fill="white", | |
| stroke_fill="black", | |
| stroke_width=2, | |
| font=_get_font(lang="en", font_size=font_size), | |
| anchor="ls", | |
| ) | |
| return _blend_two_images(img1=canvas, img2=img, alpha=0.25) | |
| def draw_bboxes_and_textboxes(bboxes, img): | |
| canvas = img.copy(order="C") | |
| for row in bboxes.itertuples(): | |
| cv2.rectangle( | |
| img=canvas, | |
| pt1=(row.bbox_x1, row.bbox_y1), | |
| pt2=(row.bbox_x2, row.bbox_y2), | |
| color=(0, 255, 0), | |
| thickness=4, | |
| ) | |
| cv2.rectangle( | |
| img=canvas, | |
| pt1=(row.tbox_x1, row.tbox_y1), | |
| pt2=(row.tbox_x2, row.tbox_y2), | |
| color=(255, 0, 0), | |
| thickness=2, | |
| ) | |
| return canvas | |
| def draw_pseudo_character_centers(img, pccs, margin=4): | |
| canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True)) | |
| draw = ImageDraw.Draw(canvas) | |
| for row in pccs.itertuples(): | |
| draw.ellipse( | |
| xy=(row.x - margin, row.y - margin, row.x + margin, row.y + margin), | |
| outline=(255, 0, 0), | |
| fill=(100, 0, 0), | |
| ) | |
| return _blend_two_images(img1=canvas, img2=img, alpha=0.3) | |
| def _resize_image(img, w, h): | |
| ori_w, ori_h = _get_width_and_height(img) | |
| if w < ori_w or h < ori_h: | |
| interpolation = cv2.INTER_AREA | |
| else: | |
| interpolation = cv2.INTER_LANCZOS4 | |
| resized_img = cv2.resize(src=img, dsize=(w, h), interpolation=interpolation) | |
| return resized_img | |
| def _resize_image_using_shorter_side(img, img_size=1530): | |
| ori_w, ori_h = _get_width_and_height(img) | |
| shorter = min(ori_w, ori_h) | |
| if shorter <= img_size: | |
| return img | |
| if ori_w < ori_h: | |
| resized_img = cv2.resize( | |
| src=img, | |
| dsize=(img_size, round(ori_h * (img_size / ori_w))), | |
| interpolation=cv2.INTER_AREA, | |
| ) | |
| else: | |
| resized_img = cv2.resize( | |
| src=img, | |
| dsize=(round(ori_w * (img_size / ori_h)), img_size), | |
| interpolation=cv2.INTER_AREA, | |
| ) | |
| return resized_img | |
| def _resize_image_using_longer_side(img, img_size=2560): | |
| ori_w, ori_h = _get_width_and_height(img) | |
| longer = max(ori_w, ori_h) | |
| if longer <= img_size: | |
| return img | |
| if ori_w < ori_h: | |
| resized_img = cv2.resize( | |
| src=img, | |
| dsize=(round(ori_w * (img_size / ori_h)), img_size), | |
| interpolation=cv2.INTER_AREA, | |
| ) | |
| else: | |
| resized_img = cv2.resize( | |
| src=img, | |
| dsize=(img_size, round(ori_h * (img_size / ori_w))), | |
| interpolation=cv2.INTER_AREA, | |
| ) | |
| return resized_img | |
| def _split_image_3(img, print=False): | |
| if img.ndim == 2: | |
| is_2d = True | |
| else: | |
| is_2d = False | |
| img = _to_3d(img) | |
| w, h = _get_width_and_height(img) | |
| if h >= w: | |
| if print: | |
| print(f"Resolution: {w}, {h} -> {w}, {h // 2}") | |
| img1 = img[: h // 2, :, :] | |
| img2 = img[h // 4 : h // 4 + h // 2, :, :] | |
| img3 = img[-h // 2 :, :, :] | |
| else: | |
| if print: | |
| print(f"Resolution: {w}, {h} -> {w // 2}, {h}") | |
| img1 = img[:, : w // 2, :] | |
| img2 = img[:, w // 2 // 2 : w // 2 // 2 + w // 2, :] | |
| img3 = img[:, -w // 2 :, :] | |
| if is_2d: | |
| img1 = _to_2d(img1) | |
| img2 = _to_2d(img2) | |
| img3 = _to_2d(img3) | |
| return img1, img2, img3 | |
| def _split_image_2(img, print=False): | |
| if img.ndim == 2: | |
| is_2d = True | |
| else: | |
| is_2d = False | |
| img = _to_3d(img) | |
| w, h = _get_width_and_height(img) | |
| if h >= w: | |
| if print: | |
| print(f"Resolution: {w}, {h} -> {w}, {h // 2}") | |
| img1 = img[: h // 2, :, :] | |
| img3 = img[-h // 2 :, :, :] | |
| else: | |
| if print: | |
| print(f"Resolution: {w}, {h} -> {w // 2}, {h}") | |
| img1 = img[:, : w // 2, :] | |
| img3 = img[:, -w // 2 :, :] | |
| if is_2d: | |
| img1 = _to_2d(img1) | |
| img3 = _to_2d(img3) | |
| return img1, img3 | |
| def _combine_images_3(img, img1, img2, img3): | |
| if (img1 is None) and (img2 is None) and (img3 is None): | |
| canvas = None | |
| else: | |
| img1 = _to_2d(img1) | |
| img2 = _to_2d(img2) | |
| img3 = _to_2d(img3) | |
| canvas = _get_canvas_same_size_as_image(_to_2d(img), black=True) | |
| w, h = _get_width_and_height(img) | |
| if h >= w: | |
| canvas[: h // 2, :] = img1 | |
| canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :] = np.maximum( | |
| canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :], img2 | |
| ) | |
| canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img3) | |
| else: | |
| canvas[:, : w // 2] = img1 | |
| canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2] = np.maximum( | |
| canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2], img2 | |
| ) | |
| canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img3) | |
| return canvas | |
| def _combine_images_2(img, img1, img2): | |
| if (img1 is None) and (img2 is None): | |
| canvas = None | |
| else: | |
| canvas = _get_canvas_same_size_as_image(img, black=True) | |
| w, h = _get_width_and_height(img) | |
| if h >= w: | |
| canvas[: h // 2, :] = img1 | |
| canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img2) | |
| else: | |
| canvas[:, : w // 2] = img1 | |
| canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img2) | |
| return canvas | |
| def _rotate_90_degrees(img, counterclockwise=False): | |
| return cv2.rotate( | |
| src=img, | |
| rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE | |
| if counterclockwise | |
| else cv2.ROTATE_90_CLOCKWISE, | |
| ) | |
| def save_image_patches(img, bboxes, dir): | |
| for row in bboxes.itertuples(): | |
| patch = _crop_image( | |
| img=img, | |
| l=row.bbox_x1, | |
| t=row.bbox_y1, | |
| r=row.bbox_x2, | |
| b=row.bbox_y2, | |
| ) | |
| patch_w = row.bbox_x2 - row.bbox_x1 | |
| patch_h = row.bbox_y2 - row.bbox_y1 | |
| if patch_h > patch_w: | |
| patch = _rotate_90_degrees(patch, counterclockwise=False) | |
| save_image(img1=patch, path=Path(dir) / f"{str(row.Index).zfill(4)}.jpg") | |
| def get_minimum_area_bounding_rectangle(mask): | |
| bool = _to_2d(mask.astype("uint8")) != 0 | |
| nonzero_x = np.where(bool.any(axis=0))[0] | |
| nonzero_y = np.where(bool.any(axis=1))[0] | |
| if len(nonzero_x) != 0 and len(nonzero_y) != 0: | |
| bbox_x1 = nonzero_x[0] | |
| bbox_x2 = nonzero_x[-1] | |
| bbox_y1 = nonzero_y[0] | |
| bbox_y2 = nonzero_y[-1] | |
| return int(bbox_x1), int(bbox_y1), int(bbox_x2), int(bbox_y2) | |
| else: | |
| return 0, 0, 0, 0 | |
| def get_minimum_area_bounding_rectangle2(mask, l, t, r, b): | |
| bool = _to_2d(mask.astype("uint8")) != 0 | |
| nonzero_x = np.where(bool.any(axis=0))[0] | |
| nonzero_y = np.where(bool.any(axis=1))[0] | |
| try: | |
| new_l = nonzero_x[np.where(l < nonzero_x)][0] | |
| except Exception: | |
| new_l = l | |
| try: | |
| new_t = nonzero_y[np.where(t < nonzero_y)][0] | |
| except Exception: | |
| new_t = t | |
| try: | |
| new_r = nonzero_x[np.where(nonzero_x < r)][-1] | |
| except Exception: | |
| new_r = r | |
| try: | |
| new_b = nonzero_y[np.where(nonzero_y < b)][-1] | |
| except Exception: | |
| new_b = b | |
| return new_l, new_t, new_r, new_b | |
| def _downsample_image(img): | |
| ori_w, ori_h = _get_width_and_height(img) | |
| resized = _resize_image(img, w=ori_w // 2, h=ori_h // 2) | |
| return resized | |
| def _upsample_image(img): | |
| ori_w, ori_h = _get_width_and_height(img) | |
| resized = _resize_image(img, w=ori_w * 2, h=ori_h * 2) | |
| return resized | |
| def _get_pseudo_image(img, mask, invert=False): | |
| if invert: | |
| mask = _invert_image(mask) | |
| rows, cols = np.nonzero(_to_2d(mask)) | |
| pseudo_outer = img[rows, cols, :].reshape((1, -1, 3)) | |
| return pseudo_outer | |
| def resize_coordinates_and_image_to_fit_to_maximum_pixel_counts( | |
| bboxes, img, max_pixel_counts=1530 | |
| ): | |
| w, h = _get_width_and_height(img) | |
| ratio = min(max_pixel_counts / h, max_pixel_counts / w) | |
| if ratio < 1: | |
| for col in ["xmin", "ymin", "xmax", "ymax"]: | |
| bboxes[col] = bboxes[col].apply(lambda x: int(x * ratio)) | |
| img = cv2.resize( | |
| src=img, | |
| dsize=(int(w * ratio), int(h * ratio)), | |
| interpolation=cv2.INTER_LANCZOS4, | |
| ) | |
| return bboxes, img | |
| def get_image_patches_3(img, text_stroke_mask, mask1, mask2): | |
| splitting_mask = get_splitting_mask(text_stroke_mask) | |
| _, _, stats, _ = cv2.connectedComponentsWithStats( | |
| image=_to_2d(splitting_mask), connectivity=4 | |
| ) | |
| ls_patches = list() | |
| for xmin, ymin, width, height, px_cnt in stats[1:, :]: | |
| xmax = xmin + width | |
| ymax = ymin + height | |
| cropped_img = _crop_image(img=img, l=xmin, t=ymin, r=xmax, b=ymax) | |
| cropped_mask1 = _crop_image(img=mask1, l=xmin, t=ymin, r=xmax, b=ymax) | |
| cropped_mask2 = _crop_image(img=mask2, l=xmin, t=ymin, r=xmax, b=ymax) | |
| ls_patches.append( | |
| { | |
| "xmin": xmin, | |
| "ymin": ymin, | |
| "xmax": xmax, | |
| "ymax": ymax, | |
| "img": cropped_img, | |
| "mask1": cropped_mask1, | |
| "mask2": cropped_mask2, | |
| } | |
| ) | |
| return ls_patches | |
| def get_image_patches_2(img, mask1, mask2): | |
| splitting_mask = get_splitting_mask(mask1) | |
| _, _, stats, _ = cv2.connectedComponentsWithStats( | |
| image=_to_2d(splitting_mask), connectivity=4 | |
| ) | |
| ls_patches = list() | |
| for x1, y1, w, h, _ in stats[1:, :]: | |
| x2 = x1 + w | |
| y2 = y1 + h | |
| cropped_img = _crop_image(img=img, l=x1, t=y1, r=x2, b=y2) | |
| cropped_mask1 = _crop_image(img=mask1, l=x1, t=y1, r=x2, b=y2) | |
| cropped_mask2 = _crop_image(img=mask2, l=x1, t=y1, r=x2, b=y2) | |
| ls_patches.append( | |
| { | |
| "x1": x1, | |
| "y1": y1, | |
| "x2": x2, | |
| "y2": y2, | |
| "img": cropped_img, | |
| "mask1": cropped_mask1, | |
| "mask2": cropped_mask2, | |
| } | |
| ) | |
| return ls_patches | |
| def get_splitting_mask(text_stroke_mask): | |
| splitting_mask = _dilate_mask(text_stroke_mask, kernel_size=200) | |
| return splitting_mask | |
| def enhance_sharpness(img): | |
| """img의 선명도를 높임. 3가지 방법이 있음(sharpening filter, unsharpening mask, pil sharpening) | |
| 3 방법 중 PIL 이 가장 원본의 색변화가 적음 | |
| Args: | |
| img (_np.ndarray_): 이미지 | |
| Returns: | |
| _np.ndarray_: 결과 이미지 | |
| """ | |
| # sharpening_k = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
| # hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) | |
| # sharpened_v = cv2.filter2D(hsv[..., 2], -1, sharpening_k) | |
| # hsv[..., 2] = sharpened_v | |
| # img_patch2 = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) | |
| # src_ycrcb = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb) | |
| # src_f = src_ycrcb[:, :, 0].astype(np.float32) | |
| # blr = cv2.GaussianBlur(src_f, (0, 0), 2.0) | |
| # src_ycrcb[:, :, 0] = np.clip(2. * src_f - blr, 0, 255).astype(np.uint8) | |
| # img_patch3 = cv2.cvtColor(src_ycrcb, cv2.COLOR_YCrCb2RGB) | |
| pil_img = _to_pil(img) | |
| sharpness_img = ImageEnhance.Sharpness(pil_img).enhance(2) | |
| result_img = _to_array(sharpness_img) | |
| return result_img | |
| def mask2point(mask): | |
| # mask (H,W,3) 0 or 255 -> (N,2) | |
| mask = _to_2d(mask) | |
| indices = np.argwhere(mask == 255) | |
| return indices | |
| def get_corner(corner_coords): | |
| # corner_coords (N,2) each point means (y,x) | |
| cy, cx = np.mean(corner_coords, axis=0) | |
| quadrant_1 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] >= cx)] | |
| rt = quadrant_1[:, 1].max(), quadrant_1[:, 0].min() | |
| quadrant_2 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] < cx)] | |
| lt = quadrant_2[:, 1].min(), quadrant_2[:, 0].min() | |
| quadrant_3 = corner_coords[(corner_coords[:, 0] >= cy) & (corner_coords[:, 1] < cx)] | |
| lb = quadrant_3[:, 1].min(), quadrant_3[:, 0].max() | |
| quadrant_4 = corner_coords[ | |
| (corner_coords[:, 0] >= cy) & (corner_coords[:, 1] >= cx) | |
| ] | |
| rb = quadrant_4[:, 1].max(), quadrant_4[:, 0].max() | |
| return lt, rt, rb, lb | |
| def get_dst_mask(mask): | |
| mask = _to_2d(mask) | |
| dst = cv2.distanceTransform(mask, cv2.DIST_L2, 5) | |
| # 거리 값을 0 ~ 255 범위로 정규화 ---② | |
| dist_transform_normalized = cv2.normalize( | |
| dst, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U | |
| ) | |
| return _to_3d(dist_transform_normalized) | |
| def unwarp(img, src, dst): | |
| h, w = img.shape[:2] | |
| # use cv2.getPerspectiveTransform() to get M, the transform matrix, and Minv, the inverse | |
| M = cv2.getPerspectiveTransform(src, dst) | |
| # use cv2.warpPerspective() to warp your image to a top-down view | |
| warped = cv2.warpPerspective(img, M, (w, h), flags=cv2.INTER_LINEAR) | |
| return warped, M | |
| def perspective_correction(img, src=None, vis=False, method: PC_TYPE = PC_TYPE.HARRIS): | |
| # img (H,W,C) 0~255, src=[[ltx,lty],[rtx,rty],[rbx,rby],[lbx,lby]] | |
| if src is None: | |
| gray = _to_grayscale(img) | |
| if not isinstance(method, PC_TYPE): | |
| raise ValueError( | |
| f"Invalid method: {method}. Expected one of {list(PC_TYPE)}." | |
| ) | |
| if method == PC_TYPE.HARRIS: | |
| corner = cv2.cornerHarris(gray, 5, 3, 0.04) # (H,W) value: corner score | |
| threshold = 0.005 * corner.max() | |
| corner_coords = np.argwhere(corner > threshold) | |
| elif method == PC_TYPE.EDGES_CONTOURS: | |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
| edges = cv2.Canny(blurred, 50, 150) | |
| contours, _ = cv2.findContours( | |
| edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE | |
| ) | |
| contour_points = [] | |
| for cs in contours: | |
| c = [css for css in cs] | |
| contour_points.extend(c) | |
| corner_coords = np.array(contour_points).reshape(-1, 2)[..., ::-1] | |
| elif method == PC_TYPE.GFTT: | |
| corners = cv2.goodFeaturesToTrack( | |
| gray, 0, 0.01, 5, blockSize=3, useHarrisDetector=True, k=0.03 | |
| ) | |
| corner_coords = corners.reshape(corners.shape[0], 2)[..., ::-1] | |
| elif method == PC_TYPE.FAST: | |
| th = 50 | |
| fast = cv2.FastFeatureDetector_create(th) | |
| keypoints = fast.detect(gray) | |
| corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints]) | |
| elif method == PC_TYPE.KAZE: | |
| # feature = cv2.SIFT_create() | |
| feature = cv2.KAZE_create() | |
| keypoints = feature.detect(gray) | |
| corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints]) | |
| if vis: | |
| view_img = img.copy() | |
| for corner in corner_coords: | |
| y, x = corner | |
| cv2.circle(view_img, (int(x), int(y)), 3, (255, 0, 0), 2) | |
| save_image(view_img, path="vis_corner.png") | |
| lt, rt, rb, lb = get_corner(corner_coords) | |
| src = np.float32([lt, rt, rb, lb]) | |
| dst = np.float32( | |
| [ | |
| (0, 0), | |
| (img.shape[1] - 1, 0), | |
| (img.shape[1] - 1, img.shape[0] - 1), | |
| (0, img.shape[0] - 1), | |
| ] | |
| ) | |
| result, M = unwarp(img, src, dst) | |
| save_image(result, path="cv_result.png") | |
| return result | |
| if __name__ == "__main__": | |
| image_url = "https://d2reotjpatzlok.cloudfront.net/qr-place/item/QR_20240726_2441_2_LZ1ZFCT38HN7PPCEZR8H.jpg" | |
| img, imgdata, format = load_image(image_url, with_byte=True, with_format=True) | |
| perspective_correction(img, vis=True) | |