diff --git a/.gitignore b/.gitignore index 058598c..87b7783 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ *.pyc -.DS_Store \ No newline at end of file +.DS_Store +*.egg-info/ +dist/ +build/ +*.egg +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index 9ce8d29..2ee393b 100644 --- a/README.md +++ b/README.md @@ -15,16 +15,25 @@ This project makes use of the transform and imutils modules from pyimagesearch ( ![Image Directory of images to be processed](https://github.com/andrewdcampbell/doc_scanner/blob/master/before_after.gif) #### Here are some examples of images before and after scan: - + - + - + - + ### Usage +you can install the repo via the command +``` +pip install git+https://github.com/user/repo.git +``` +then you can run in your terminal +``` +document-scanner (--images | --image ) [-i] +``` +or you can clone the repo and run the command ``` python scan.py (--images | --image ) [-i] ``` diff --git a/pyimagesearch/__init__.py b/opencv_document_scanner/__init__.py similarity index 100% rename from pyimagesearch/__init__.py rename to opencv_document_scanner/__init__.py diff --git a/polygon_interacter.py b/opencv_document_scanner/polygon_interacter.py similarity index 100% rename from polygon_interacter.py rename to opencv_document_scanner/polygon_interacter.py diff --git a/opencv_document_scanner/pyimagesearch/__init__.py b/opencv_document_scanner/pyimagesearch/__init__.py new file mode 100644 index 0000000..57600df --- /dev/null +++ b/opencv_document_scanner/pyimagesearch/__init__.py @@ -0,0 +1 @@ +from . import * # noqa: F403 diff --git a/pyimagesearch/imutils.py b/opencv_document_scanner/pyimagesearch/imutils.py similarity index 100% rename from pyimagesearch/imutils.py rename to opencv_document_scanner/pyimagesearch/imutils.py diff --git a/pyimagesearch/transform.py b/opencv_document_scanner/pyimagesearch/transform.py similarity index 100% rename from pyimagesearch/transform.py rename to opencv_document_scanner/pyimagesearch/transform.py diff --git a/opencv_document_scanner/scan.py b/opencv_document_scanner/scan.py new file mode 100644 index 0000000..4ad866f --- /dev/null +++ b/opencv_document_scanner/scan.py @@ -0,0 +1,359 @@ +# USAGE: +# python scan.py (--images | --image ) [-i] +# For example, to scan a single image with interactive mode: +# python scan.py --image sample_images/desk.JPG -i +# To scan all images in a directory automatically: +# python scan.py --images sample_images + +# Scanned images will be output to directory named 'output' + +from .pyimagesearch import transform +from .pyimagesearch import imutils +from scipy.spatial import distance as dist +from matplotlib.patches import Polygon +from . import polygon_interacter as poly_i +import numpy as np +import matplotlib.pyplot as plt +import itertools +import math +import cv2 +from pylsd.lsd import lsd + +import argparse +import os + +class DocScanner(object): + """An image scanner""" + + def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40): + """ + Args: + interactive (boolean): If True, user can adjust screen contour before + transformation occurs in interactive pyplot window. + MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners + do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO + of the original image. Defaults to 0.25. + MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range + of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40. + """ + self.interactive = interactive + self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO + self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE + + def filter_corners(self, corners, min_dist=20): + """Filters corners that are within min_dist of others""" + def predicate(representatives, corner): + return all(dist.euclidean(representative, corner) >= min_dist + for representative in representatives) + + filtered_corners = [] + for c in corners: + if predicate(filtered_corners, c): + filtered_corners.append(c) + return filtered_corners + + def angle_between_vectors_degrees(self, u, v): + """Returns the angle between two vectors in degrees""" + return np.degrees( + math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)))) + + def get_angle(self, p1, p2, p3): + """ + Returns the angle between the line segment from p2 to p1 + and the line segment from p2 to p3 in degrees + """ + a = np.radians(np.array(p1)) + b = np.radians(np.array(p2)) + c = np.radians(np.array(p3)) + + avec = a - b + cvec = c - b + + return self.angle_between_vectors_degrees(avec, cvec) + + def angle_range(self, quad): + """ + Returns the range between max and min interior angles of quadrilateral. + The input quadrilateral must be a numpy array with vertices ordered clockwise + starting with the top left vertex. + """ + tl, tr, br, bl = quad + ura = self.get_angle(tl[0], tr[0], br[0]) + ula = self.get_angle(bl[0], tl[0], tr[0]) + lra = self.get_angle(tr[0], br[0], bl[0]) + lla = self.get_angle(br[0], bl[0], tl[0]) + + angles = [ura, ula, lra, lla] + return np.ptp(angles) + + def get_corners(self, img): + """ + Returns a list of corners ((x, y) tuples) found in the input image. With proper + pre-processing and filtering, it should output at most 10 potential corners. + This is a utility function used by get_contours. The input image is expected + to be rescaled and Canny filtered prior to be passed in. + """ + lines = lsd(img) + + # massages the output from LSD + # LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines + # 1. separate out the lines into horizontal and vertical lines. + # 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer. + # 3. Run connected-components on the new canvas + # 4. Get the bounding box for each component, and the bounding box is final line. + # 5. The ends of each line is a corner + # 6. Repeat for vertical lines + # 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners + + corners = [] + if lines is not None: + # separate out the horizontal and vertical lines, and draw them back onto separate canvases + lines = lines.squeeze().astype(np.int32).tolist() + horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8) + vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8) + for line in lines: + x1, y1, x2, y2, _ = line + if abs(x2 - x1) > abs(y2 - y1): + (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0]) + cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2) + else: + (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1]) + cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2) + + lines = [] + + # find the horizontal lines (connected-components -> bounding boxes -> final lines) + (contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2] + horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8) + for contour in contours: + contour = contour.reshape((contour.shape[0], contour.shape[2])) + min_x = np.amin(contour[:, 0], axis=0) + 2 + max_x = np.amax(contour[:, 0], axis=0) - 2 + left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1])) + right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1])) + lines.append((min_x, left_y, max_x, right_y)) + cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1) + corners.append((min_x, left_y)) + corners.append((max_x, right_y)) + + # find the vertical lines (connected-components -> bounding boxes -> final lines) + (contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2] + vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8) + for contour in contours: + contour = contour.reshape((contour.shape[0], contour.shape[2])) + min_y = np.amin(contour[:, 1], axis=0) + 2 + max_y = np.amax(contour[:, 1], axis=0) - 2 + top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0])) + bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0])) + lines.append((top_x, min_y, bottom_x, max_y)) + cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1) + corners.append((top_x, min_y)) + corners.append((bottom_x, max_y)) + + # find the corners + corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2) + corners += zip(corners_x, corners_y) + + # remove corners in close proximity + corners = self.filter_corners(corners) + return corners + + def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT): + """Returns True if the contour satisfies all requirements set at instantitation""" + + return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO + and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE) + + + def get_contour(self, rescaled_image): + """ + Returns a numpy array of shape (4, 2) containing the vertices of the four corners + of the document in the image. It considers the corners returned from get_corners() + and uses heuristics to choose the four corners that most likely represent + the corners of the document. If no corners were found, or the four corners represent + a quadrilateral that is too small or convex, it returns the original four corners. + """ + + # these constants are carefully chosen + MORPH = 9 + CANNY = 84 + HOUGH = 25 + + IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape + + # convert the image to grayscale and blur it slightly + gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (7,7), 0) + + # dilate helps to remove potential holes between edge segments + kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH)) + dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel) + + # find edges and mark them in the output map using the Canny algorithm + edged = cv2.Canny(dilated, 0, CANNY) + test_corners = self.get_corners(edged) + + approx_contours = [] + + if len(test_corners) >= 4: + quads = [] + + for quad in itertools.combinations(test_corners, 4): + points = np.array(quad) + points = transform.order_points(points) + points = np.array([[p] for p in points], dtype = "int32") + quads.append(points) + + # get top five quadrilaterals by area + quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5] + # sort candidate quadrilaterals by their angle range, which helps remove outliers + quads = sorted(quads, key=self.angle_range) + + approx = quads[0] + if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT): + approx_contours.append(approx) + + # for debugging: uncomment the code below to draw the corners and countour found + # by get_corners() and overlay it on the image + + # cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2) + # plt.scatter(*zip(*test_corners)) + # plt.imshow(rescaled_image) + # plt.show() + + # also attempt to find contours directly from the edged image, which occasionally + # produces better results + (cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5] + + # loop over the contours + for c in cnts: + # approximate the contour + approx = cv2.approxPolyDP(c, 80, True) + if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT): + approx_contours.append(approx) + break + + # If we did not find any valid contours, just use the whole image + if not approx_contours: + TOP_RIGHT = (IM_WIDTH, 0) + BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT) + BOTTOM_LEFT = (0, IM_HEIGHT) + TOP_LEFT = (0, 0) + screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]]) + + else: + screenCnt = max(approx_contours, key=cv2.contourArea) + + return screenCnt.reshape(4, 2) + + def interactive_get_contour(self, screenCnt, rescaled_image): + poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5) + fig, ax = plt.subplots() + ax.add_patch(poly) + ax.set_title(('Drag the corners of the box to the corners of the document. \n' + 'Close the window when finished.')) + p = poly_i.PolygonInteractor(ax, poly) + plt.imshow(rescaled_image) + plt.show() + + new_points = p.get_poly_points()[:4] + new_points = np.array([[p] for p in new_points], dtype = "int32") + return new_points.reshape(4, 2) + + def scan(self, image_path): + + RESCALED_HEIGHT = 500.0 + OUTPUT_DIR = 'output' + + # load the image and compute the ratio of the old height + # to the new height, clone it, and resize it + image = cv2.imread(image_path) + + assert(image is not None) + + ratio = image.shape[0] / RESCALED_HEIGHT + orig = image.copy() + rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT)) + + # get the contour of the document + screenCnt = self.get_contour(rescaled_image) + + if self.interactive: + screenCnt = self.interactive_get_contour(screenCnt, rescaled_image) + + # apply the perspective transformation + warped = transform.four_point_transform(orig, screenCnt * ratio) + + # convert the warped image to grayscale + gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) + + # sharpen image + sharpen = cv2.GaussianBlur(gray, (0,0), 3) + sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0) + + # apply adaptive threshold to get black and white effect + thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15) + + # save the transformed image + basename = os.path.basename(image_path) + cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh) + print("Proccessed " + basename) + + +def scan_document(image_path, interactive=False): + """ + Wrapper function to scan a single document image. + + Args: + image_path (str): Path to the image file to scan + interactive (bool): If True, allows manual corner adjustment + """ + scanner = DocScanner(interactive) + # Create output directory if it doesn't exist + if not os.path.exists('output'): + os.makedirs('output') + scanner.scan(image_path) + +def scan_documents(directory_path, interactive=False): + """ + Wrapper function to scan all documents in a directory. + + Args: + directory_path (str): Path to directory containing images + interactive (bool): If True, allows manual corner adjustment + """ + scanner = DocScanner(interactive) + # Create output directory if it doesn't exist + if not os.path.exists('output'): + os.makedirs('output') + + valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"] + get_ext = lambda f: os.path.splitext(f)[1].lower() + + im_files = [f for f in os.listdir(directory_path) if get_ext(f) in valid_formats] + for im in im_files: + scanner.scan(os.path.join(directory_path, im)) + +def main(): + ap = argparse.ArgumentParser() + group = ap.add_mutually_exclusive_group(required=True) + group.add_argument("--images", help="Directory of images to be scanned") + group.add_argument("--image", help="Path to single image to be scanned") + ap.add_argument("-i", action='store_true', + help = "Flag for manually verifying and/or setting document corners") + + args = vars(ap.parse_args()) + im_dir = args["images"] + im_file_path = args["image"] + interactive_mode = args["i"] + + if im_file_path: + scan_document(im_file_path, interactive_mode) + else: + scan_documents(im_dir, interactive_mode) + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..fa9f9e0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/scan.py b/scan.py index 4aa835e..b69bc20 100644 --- a/scan.py +++ b/scan.py @@ -1,333 +1,4 @@ -# USAGE: -# python scan.py (--images | --image ) [-i] -# For example, to scan a single image with interactive mode: -# python scan.py --image sample_images/desk.JPG -i -# To scan all images in a directory automatically: -# python scan.py --images sample_images - -# Scanned images will be output to directory named 'output' - -from pyimagesearch import transform -from pyimagesearch import imutils -from scipy.spatial import distance as dist -from matplotlib.patches import Polygon -import polygon_interacter as poly_i -import numpy as np -import matplotlib.pyplot as plt -import itertools -import math -import cv2 -from pylsd.lsd import lsd - -import argparse -import os - -class DocScanner(object): - """An image scanner""" - - def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40): - """ - Args: - interactive (boolean): If True, user can adjust screen contour before - transformation occurs in interactive pyplot window. - MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners - do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO - of the original image. Defaults to 0.25. - MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range - of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40. - """ - self.interactive = interactive - self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO - self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE - - def filter_corners(self, corners, min_dist=20): - """Filters corners that are within min_dist of others""" - def predicate(representatives, corner): - return all(dist.euclidean(representative, corner) >= min_dist - for representative in representatives) - - filtered_corners = [] - for c in corners: - if predicate(filtered_corners, c): - filtered_corners.append(c) - return filtered_corners - - def angle_between_vectors_degrees(self, u, v): - """Returns the angle between two vectors in degrees""" - return np.degrees( - math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)))) - - def get_angle(self, p1, p2, p3): - """ - Returns the angle between the line segment from p2 to p1 - and the line segment from p2 to p3 in degrees - """ - a = np.radians(np.array(p1)) - b = np.radians(np.array(p2)) - c = np.radians(np.array(p3)) - - avec = a - b - cvec = c - b - - return self.angle_between_vectors_degrees(avec, cvec) - - def angle_range(self, quad): - """ - Returns the range between max and min interior angles of quadrilateral. - The input quadrilateral must be a numpy array with vertices ordered clockwise - starting with the top left vertex. - """ - tl, tr, br, bl = quad - ura = self.get_angle(tl[0], tr[0], br[0]) - ula = self.get_angle(bl[0], tl[0], tr[0]) - lra = self.get_angle(tr[0], br[0], bl[0]) - lla = self.get_angle(br[0], bl[0], tl[0]) - - angles = [ura, ula, lra, lla] - return np.ptp(angles) - - def get_corners(self, img): - """ - Returns a list of corners ((x, y) tuples) found in the input image. With proper - pre-processing and filtering, it should output at most 10 potential corners. - This is a utility function used by get_contours. The input image is expected - to be rescaled and Canny filtered prior to be passed in. - """ - lines = lsd(img) - - # massages the output from LSD - # LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines - # 1. separate out the lines into horizontal and vertical lines. - # 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer. - # 3. Run connected-components on the new canvas - # 4. Get the bounding box for each component, and the bounding box is final line. - # 5. The ends of each line is a corner - # 6. Repeat for vertical lines - # 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners - - corners = [] - if lines is not None: - # separate out the horizontal and vertical lines, and draw them back onto separate canvases - lines = lines.squeeze().astype(np.int32).tolist() - horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8) - vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8) - for line in lines: - x1, y1, x2, y2, _ = line - if abs(x2 - x1) > abs(y2 - y1): - (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0]) - cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2) - else: - (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1]) - cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2) - - lines = [] - - # find the horizontal lines (connected-components -> bounding boxes -> final lines) - (contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) - contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2] - horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8) - for contour in contours: - contour = contour.reshape((contour.shape[0], contour.shape[2])) - min_x = np.amin(contour[:, 0], axis=0) + 2 - max_x = np.amax(contour[:, 0], axis=0) - 2 - left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1])) - right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1])) - lines.append((min_x, left_y, max_x, right_y)) - cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1) - corners.append((min_x, left_y)) - corners.append((max_x, right_y)) - - # find the vertical lines (connected-components -> bounding boxes -> final lines) - (contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) - contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2] - vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8) - for contour in contours: - contour = contour.reshape((contour.shape[0], contour.shape[2])) - min_y = np.amin(contour[:, 1], axis=0) + 2 - max_y = np.amax(contour[:, 1], axis=0) - 2 - top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0])) - bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0])) - lines.append((top_x, min_y, bottom_x, max_y)) - cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1) - corners.append((top_x, min_y)) - corners.append((bottom_x, max_y)) - - # find the corners - corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2) - corners += zip(corners_x, corners_y) - - # remove corners in close proximity - corners = self.filter_corners(corners) - return corners - - def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT): - """Returns True if the contour satisfies all requirements set at instantitation""" - - return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO - and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE) - - - def get_contour(self, rescaled_image): - """ - Returns a numpy array of shape (4, 2) containing the vertices of the four corners - of the document in the image. It considers the corners returned from get_corners() - and uses heuristics to choose the four corners that most likely represent - the corners of the document. If no corners were found, or the four corners represent - a quadrilateral that is too small or convex, it returns the original four corners. - """ - - # these constants are carefully chosen - MORPH = 9 - CANNY = 84 - HOUGH = 25 - - IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape - - # convert the image to grayscale and blur it slightly - gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY) - gray = cv2.GaussianBlur(gray, (7,7), 0) - - # dilate helps to remove potential holes between edge segments - kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH)) - dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel) - - # find edges and mark them in the output map using the Canny algorithm - edged = cv2.Canny(dilated, 0, CANNY) - test_corners = self.get_corners(edged) - - approx_contours = [] - - if len(test_corners) >= 4: - quads = [] - - for quad in itertools.combinations(test_corners, 4): - points = np.array(quad) - points = transform.order_points(points) - points = np.array([[p] for p in points], dtype = "int32") - quads.append(points) - - # get top five quadrilaterals by area - quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5] - # sort candidate quadrilaterals by their angle range, which helps remove outliers - quads = sorted(quads, key=self.angle_range) - - approx = quads[0] - if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT): - approx_contours.append(approx) - - # for debugging: uncomment the code below to draw the corners and countour found - # by get_corners() and overlay it on the image - - # cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2) - # plt.scatter(*zip(*test_corners)) - # plt.imshow(rescaled_image) - # plt.show() - - # also attempt to find contours directly from the edged image, which occasionally - # produces better results - (cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5] - - # loop over the contours - for c in cnts: - # approximate the contour - approx = cv2.approxPolyDP(c, 80, True) - if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT): - approx_contours.append(approx) - break - - # If we did not find any valid contours, just use the whole image - if not approx_contours: - TOP_RIGHT = (IM_WIDTH, 0) - BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT) - BOTTOM_LEFT = (0, IM_HEIGHT) - TOP_LEFT = (0, 0) - screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]]) - - else: - screenCnt = max(approx_contours, key=cv2.contourArea) - - return screenCnt.reshape(4, 2) - - def interactive_get_contour(self, screenCnt, rescaled_image): - poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5) - fig, ax = plt.subplots() - ax.add_patch(poly) - ax.set_title(('Drag the corners of the box to the corners of the document. \n' - 'Close the window when finished.')) - p = poly_i.PolygonInteractor(ax, poly) - plt.imshow(rescaled_image) - plt.show() - - new_points = p.get_poly_points()[:4] - new_points = np.array([[p] for p in new_points], dtype = "int32") - return new_points.reshape(4, 2) - - def scan(self, image_path): - - RESCALED_HEIGHT = 500.0 - OUTPUT_DIR = 'output' - - # load the image and compute the ratio of the old height - # to the new height, clone it, and resize it - image = cv2.imread(image_path) - - assert(image is not None) - - ratio = image.shape[0] / RESCALED_HEIGHT - orig = image.copy() - rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT)) - - # get the contour of the document - screenCnt = self.get_contour(rescaled_image) - - if self.interactive: - screenCnt = self.interactive_get_contour(screenCnt, rescaled_image) - - # apply the perspective transformation - warped = transform.four_point_transform(orig, screenCnt * ratio) - - # convert the warped image to grayscale - gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) - - # sharpen image - sharpen = cv2.GaussianBlur(gray, (0,0), 3) - sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0) - - # apply adaptive threshold to get black and white effect - thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15) - - # save the transformed image - basename = os.path.basename(image_path) - cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh) - print("Proccessed " + basename) - +from opencv_document_scanner.scan import main if __name__ == "__main__": - ap = argparse.ArgumentParser() - group = ap.add_mutually_exclusive_group(required=True) - group.add_argument("--images", help="Directory of images to be scanned") - group.add_argument("--image", help="Path to single image to be scanned") - ap.add_argument("-i", action='store_true', - help = "Flag for manually verifying and/or setting document corners") - - args = vars(ap.parse_args()) - im_dir = args["images"] - im_file_path = args["image"] - interactive_mode = args["i"] - - scanner = DocScanner(interactive_mode) - - valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"] - - get_ext = lambda f: os.path.splitext(f)[1].lower() - - # Scan single image specified by command line argument --image - if im_file_path: - scanner.scan(im_file_path) - - # Scan all valid images in directory specified by command line argument --images - else: - im_files = [f for f in os.listdir(im_dir) if get_ext(f) in valid_formats] - for im in im_files: - scanner.scan(im_dir + '/' + im) + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b836204 --- /dev/null +++ b/setup.py @@ -0,0 +1,48 @@ +from setuptools import setup, find_packages + +# Read README content +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + +setup( + name="opencv-document-scanner", + version="0.1.0", + description="A document scanner application using OpenCV", + long_description=long_description, + long_description_content_type="text/markdown", + author="Andrew Campbell", + author_email="andrewc@captricity.com", + packages=find_packages(exclude=["tests*"]), + install_requires=[ + "opencv-python>=4.5.0", + "numpy>=1.19.0", + "imutils>=0.5.4", + "matplotlib>=3.3.0", + "pylsd @ git+https://github.com/primetang/pylsd@refs/pull/17/head" + ], + python_requires=">=3.7", + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Multimedia :: Graphics", + "Operating System :: OS Independent", + ], + keywords="opencv document scanner image-processing computer-vision", + project_urls={ + "Bug Reports": "https://github.com/username/OpenCV-Document-Scanner/issues", + "Source": "https://github.com/username/OpenCV-Document-Scanner", + }, + include_package_data=True, + zip_safe=False, + entry_points={ + 'console_scripts': [ + 'document-scanner=opencv_document_scanner.scan:main', + ], + }, +) \ No newline at end of file