diff --git a/.gitignore b/.gitignore
index 058598c..87b7783 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,7 @@
*.pyc
-.DS_Store
\ No newline at end of file
+.DS_Store
+*.egg-info/
+dist/
+build/
+*.egg
+__pycache__/
\ No newline at end of file
diff --git a/README.md b/README.md
index 9ce8d29..2ee393b 100644
--- a/README.md
+++ b/README.md
@@ -15,16 +15,25 @@ This project makes use of the transform and imutils modules from pyimagesearch (

#### Here are some examples of images before and after scan:
-
+
-
+
-
+
-
+
### Usage
+you can install the repo via the command
+```
+pip install git+https://github.com/user/repo.git
+```
+then you can run in your terminal
+```
+document-scanner (--images | --image ) [-i]
+```
+or you can clone the repo and run the command
```
python scan.py (--images | --image ) [-i]
```
diff --git a/pyimagesearch/__init__.py b/opencv_document_scanner/__init__.py
similarity index 100%
rename from pyimagesearch/__init__.py
rename to opencv_document_scanner/__init__.py
diff --git a/polygon_interacter.py b/opencv_document_scanner/polygon_interacter.py
similarity index 100%
rename from polygon_interacter.py
rename to opencv_document_scanner/polygon_interacter.py
diff --git a/opencv_document_scanner/pyimagesearch/__init__.py b/opencv_document_scanner/pyimagesearch/__init__.py
new file mode 100644
index 0000000..57600df
--- /dev/null
+++ b/opencv_document_scanner/pyimagesearch/__init__.py
@@ -0,0 +1 @@
+from . import * # noqa: F403
diff --git a/pyimagesearch/imutils.py b/opencv_document_scanner/pyimagesearch/imutils.py
similarity index 100%
rename from pyimagesearch/imutils.py
rename to opencv_document_scanner/pyimagesearch/imutils.py
diff --git a/pyimagesearch/transform.py b/opencv_document_scanner/pyimagesearch/transform.py
similarity index 100%
rename from pyimagesearch/transform.py
rename to opencv_document_scanner/pyimagesearch/transform.py
diff --git a/opencv_document_scanner/scan.py b/opencv_document_scanner/scan.py
new file mode 100644
index 0000000..4ad866f
--- /dev/null
+++ b/opencv_document_scanner/scan.py
@@ -0,0 +1,359 @@
+# USAGE:
+# python scan.py (--images | --image ) [-i]
+# For example, to scan a single image with interactive mode:
+# python scan.py --image sample_images/desk.JPG -i
+# To scan all images in a directory automatically:
+# python scan.py --images sample_images
+
+# Scanned images will be output to directory named 'output'
+
+from .pyimagesearch import transform
+from .pyimagesearch import imutils
+from scipy.spatial import distance as dist
+from matplotlib.patches import Polygon
+from . import polygon_interacter as poly_i
+import numpy as np
+import matplotlib.pyplot as plt
+import itertools
+import math
+import cv2
+from pylsd.lsd import lsd
+
+import argparse
+import os
+
+class DocScanner(object):
+ """An image scanner"""
+
+ def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40):
+ """
+ Args:
+ interactive (boolean): If True, user can adjust screen contour before
+ transformation occurs in interactive pyplot window.
+ MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners
+ do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO
+ of the original image. Defaults to 0.25.
+ MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range
+ of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40.
+ """
+ self.interactive = interactive
+ self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO
+ self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE
+
+ def filter_corners(self, corners, min_dist=20):
+ """Filters corners that are within min_dist of others"""
+ def predicate(representatives, corner):
+ return all(dist.euclidean(representative, corner) >= min_dist
+ for representative in representatives)
+
+ filtered_corners = []
+ for c in corners:
+ if predicate(filtered_corners, c):
+ filtered_corners.append(c)
+ return filtered_corners
+
+ def angle_between_vectors_degrees(self, u, v):
+ """Returns the angle between two vectors in degrees"""
+ return np.degrees(
+ math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))))
+
+ def get_angle(self, p1, p2, p3):
+ """
+ Returns the angle between the line segment from p2 to p1
+ and the line segment from p2 to p3 in degrees
+ """
+ a = np.radians(np.array(p1))
+ b = np.radians(np.array(p2))
+ c = np.radians(np.array(p3))
+
+ avec = a - b
+ cvec = c - b
+
+ return self.angle_between_vectors_degrees(avec, cvec)
+
+ def angle_range(self, quad):
+ """
+ Returns the range between max and min interior angles of quadrilateral.
+ The input quadrilateral must be a numpy array with vertices ordered clockwise
+ starting with the top left vertex.
+ """
+ tl, tr, br, bl = quad
+ ura = self.get_angle(tl[0], tr[0], br[0])
+ ula = self.get_angle(bl[0], tl[0], tr[0])
+ lra = self.get_angle(tr[0], br[0], bl[0])
+ lla = self.get_angle(br[0], bl[0], tl[0])
+
+ angles = [ura, ula, lra, lla]
+ return np.ptp(angles)
+
+ def get_corners(self, img):
+ """
+ Returns a list of corners ((x, y) tuples) found in the input image. With proper
+ pre-processing and filtering, it should output at most 10 potential corners.
+ This is a utility function used by get_contours. The input image is expected
+ to be rescaled and Canny filtered prior to be passed in.
+ """
+ lines = lsd(img)
+
+ # massages the output from LSD
+ # LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines
+ # 1. separate out the lines into horizontal and vertical lines.
+ # 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer.
+ # 3. Run connected-components on the new canvas
+ # 4. Get the bounding box for each component, and the bounding box is final line.
+ # 5. The ends of each line is a corner
+ # 6. Repeat for vertical lines
+ # 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners
+
+ corners = []
+ if lines is not None:
+ # separate out the horizontal and vertical lines, and draw them back onto separate canvases
+ lines = lines.squeeze().astype(np.int32).tolist()
+ horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
+ vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
+ for line in lines:
+ x1, y1, x2, y2, _ = line
+ if abs(x2 - x1) > abs(y2 - y1):
+ (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0])
+ cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2)
+ else:
+ (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1])
+ cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2)
+
+ lines = []
+
+ # find the horizontal lines (connected-components -> bounding boxes -> final lines)
+ (contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+ contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
+ horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
+ for contour in contours:
+ contour = contour.reshape((contour.shape[0], contour.shape[2]))
+ min_x = np.amin(contour[:, 0], axis=0) + 2
+ max_x = np.amax(contour[:, 0], axis=0) - 2
+ left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1]))
+ right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1]))
+ lines.append((min_x, left_y, max_x, right_y))
+ cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1)
+ corners.append((min_x, left_y))
+ corners.append((max_x, right_y))
+
+ # find the vertical lines (connected-components -> bounding boxes -> final lines)
+ (contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+ contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
+ vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
+ for contour in contours:
+ contour = contour.reshape((contour.shape[0], contour.shape[2]))
+ min_y = np.amin(contour[:, 1], axis=0) + 2
+ max_y = np.amax(contour[:, 1], axis=0) - 2
+ top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0]))
+ bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0]))
+ lines.append((top_x, min_y, bottom_x, max_y))
+ cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1)
+ corners.append((top_x, min_y))
+ corners.append((bottom_x, max_y))
+
+ # find the corners
+ corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2)
+ corners += zip(corners_x, corners_y)
+
+ # remove corners in close proximity
+ corners = self.filter_corners(corners)
+ return corners
+
+ def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT):
+ """Returns True if the contour satisfies all requirements set at instantitation"""
+
+ return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO
+ and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE)
+
+
+ def get_contour(self, rescaled_image):
+ """
+ Returns a numpy array of shape (4, 2) containing the vertices of the four corners
+ of the document in the image. It considers the corners returned from get_corners()
+ and uses heuristics to choose the four corners that most likely represent
+ the corners of the document. If no corners were found, or the four corners represent
+ a quadrilateral that is too small or convex, it returns the original four corners.
+ """
+
+ # these constants are carefully chosen
+ MORPH = 9
+ CANNY = 84
+ HOUGH = 25
+
+ IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape
+
+ # convert the image to grayscale and blur it slightly
+ gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
+ gray = cv2.GaussianBlur(gray, (7,7), 0)
+
+ # dilate helps to remove potential holes between edge segments
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH))
+ dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
+
+ # find edges and mark them in the output map using the Canny algorithm
+ edged = cv2.Canny(dilated, 0, CANNY)
+ test_corners = self.get_corners(edged)
+
+ approx_contours = []
+
+ if len(test_corners) >= 4:
+ quads = []
+
+ for quad in itertools.combinations(test_corners, 4):
+ points = np.array(quad)
+ points = transform.order_points(points)
+ points = np.array([[p] for p in points], dtype = "int32")
+ quads.append(points)
+
+ # get top five quadrilaterals by area
+ quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5]
+ # sort candidate quadrilaterals by their angle range, which helps remove outliers
+ quads = sorted(quads, key=self.angle_range)
+
+ approx = quads[0]
+ if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
+ approx_contours.append(approx)
+
+ # for debugging: uncomment the code below to draw the corners and countour found
+ # by get_corners() and overlay it on the image
+
+ # cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2)
+ # plt.scatter(*zip(*test_corners))
+ # plt.imshow(rescaled_image)
+ # plt.show()
+
+ # also attempt to find contours directly from the edged image, which occasionally
+ # produces better results
+ (cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
+
+ # loop over the contours
+ for c in cnts:
+ # approximate the contour
+ approx = cv2.approxPolyDP(c, 80, True)
+ if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
+ approx_contours.append(approx)
+ break
+
+ # If we did not find any valid contours, just use the whole image
+ if not approx_contours:
+ TOP_RIGHT = (IM_WIDTH, 0)
+ BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT)
+ BOTTOM_LEFT = (0, IM_HEIGHT)
+ TOP_LEFT = (0, 0)
+ screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]])
+
+ else:
+ screenCnt = max(approx_contours, key=cv2.contourArea)
+
+ return screenCnt.reshape(4, 2)
+
+ def interactive_get_contour(self, screenCnt, rescaled_image):
+ poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5)
+ fig, ax = plt.subplots()
+ ax.add_patch(poly)
+ ax.set_title(('Drag the corners of the box to the corners of the document. \n'
+ 'Close the window when finished.'))
+ p = poly_i.PolygonInteractor(ax, poly)
+ plt.imshow(rescaled_image)
+ plt.show()
+
+ new_points = p.get_poly_points()[:4]
+ new_points = np.array([[p] for p in new_points], dtype = "int32")
+ return new_points.reshape(4, 2)
+
+ def scan(self, image_path):
+
+ RESCALED_HEIGHT = 500.0
+ OUTPUT_DIR = 'output'
+
+ # load the image and compute the ratio of the old height
+ # to the new height, clone it, and resize it
+ image = cv2.imread(image_path)
+
+ assert(image is not None)
+
+ ratio = image.shape[0] / RESCALED_HEIGHT
+ orig = image.copy()
+ rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT))
+
+ # get the contour of the document
+ screenCnt = self.get_contour(rescaled_image)
+
+ if self.interactive:
+ screenCnt = self.interactive_get_contour(screenCnt, rescaled_image)
+
+ # apply the perspective transformation
+ warped = transform.four_point_transform(orig, screenCnt * ratio)
+
+ # convert the warped image to grayscale
+ gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
+
+ # sharpen image
+ sharpen = cv2.GaussianBlur(gray, (0,0), 3)
+ sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)
+
+ # apply adaptive threshold to get black and white effect
+ thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
+
+ # save the transformed image
+ basename = os.path.basename(image_path)
+ cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh)
+ print("Proccessed " + basename)
+
+
+def scan_document(image_path, interactive=False):
+ """
+ Wrapper function to scan a single document image.
+
+ Args:
+ image_path (str): Path to the image file to scan
+ interactive (bool): If True, allows manual corner adjustment
+ """
+ scanner = DocScanner(interactive)
+ # Create output directory if it doesn't exist
+ if not os.path.exists('output'):
+ os.makedirs('output')
+ scanner.scan(image_path)
+
+def scan_documents(directory_path, interactive=False):
+ """
+ Wrapper function to scan all documents in a directory.
+
+ Args:
+ directory_path (str): Path to directory containing images
+ interactive (bool): If True, allows manual corner adjustment
+ """
+ scanner = DocScanner(interactive)
+ # Create output directory if it doesn't exist
+ if not os.path.exists('output'):
+ os.makedirs('output')
+
+ valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"]
+ get_ext = lambda f: os.path.splitext(f)[1].lower()
+
+ im_files = [f for f in os.listdir(directory_path) if get_ext(f) in valid_formats]
+ for im in im_files:
+ scanner.scan(os.path.join(directory_path, im))
+
+def main():
+ ap = argparse.ArgumentParser()
+ group = ap.add_mutually_exclusive_group(required=True)
+ group.add_argument("--images", help="Directory of images to be scanned")
+ group.add_argument("--image", help="Path to single image to be scanned")
+ ap.add_argument("-i", action='store_true',
+ help = "Flag for manually verifying and/or setting document corners")
+
+ args = vars(ap.parse_args())
+ im_dir = args["images"]
+ im_file_path = args["image"]
+ interactive_mode = args["i"]
+
+ if im_file_path:
+ scan_document(im_file_path, interactive_mode)
+ else:
+ scan_documents(im_dir, interactive_mode)
+
+if __name__ == "__main__":
+ main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..fa9f9e0
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+ "setuptools>=42",
+ "wheel"
+]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/scan.py b/scan.py
index 4aa835e..b69bc20 100644
--- a/scan.py
+++ b/scan.py
@@ -1,333 +1,4 @@
-# USAGE:
-# python scan.py (--images | --image ) [-i]
-# For example, to scan a single image with interactive mode:
-# python scan.py --image sample_images/desk.JPG -i
-# To scan all images in a directory automatically:
-# python scan.py --images sample_images
-
-# Scanned images will be output to directory named 'output'
-
-from pyimagesearch import transform
-from pyimagesearch import imutils
-from scipy.spatial import distance as dist
-from matplotlib.patches import Polygon
-import polygon_interacter as poly_i
-import numpy as np
-import matplotlib.pyplot as plt
-import itertools
-import math
-import cv2
-from pylsd.lsd import lsd
-
-import argparse
-import os
-
-class DocScanner(object):
- """An image scanner"""
-
- def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40):
- """
- Args:
- interactive (boolean): If True, user can adjust screen contour before
- transformation occurs in interactive pyplot window.
- MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners
- do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO
- of the original image. Defaults to 0.25.
- MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range
- of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40.
- """
- self.interactive = interactive
- self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO
- self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE
-
- def filter_corners(self, corners, min_dist=20):
- """Filters corners that are within min_dist of others"""
- def predicate(representatives, corner):
- return all(dist.euclidean(representative, corner) >= min_dist
- for representative in representatives)
-
- filtered_corners = []
- for c in corners:
- if predicate(filtered_corners, c):
- filtered_corners.append(c)
- return filtered_corners
-
- def angle_between_vectors_degrees(self, u, v):
- """Returns the angle between two vectors in degrees"""
- return np.degrees(
- math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))))
-
- def get_angle(self, p1, p2, p3):
- """
- Returns the angle between the line segment from p2 to p1
- and the line segment from p2 to p3 in degrees
- """
- a = np.radians(np.array(p1))
- b = np.radians(np.array(p2))
- c = np.radians(np.array(p3))
-
- avec = a - b
- cvec = c - b
-
- return self.angle_between_vectors_degrees(avec, cvec)
-
- def angle_range(self, quad):
- """
- Returns the range between max and min interior angles of quadrilateral.
- The input quadrilateral must be a numpy array with vertices ordered clockwise
- starting with the top left vertex.
- """
- tl, tr, br, bl = quad
- ura = self.get_angle(tl[0], tr[0], br[0])
- ula = self.get_angle(bl[0], tl[0], tr[0])
- lra = self.get_angle(tr[0], br[0], bl[0])
- lla = self.get_angle(br[0], bl[0], tl[0])
-
- angles = [ura, ula, lra, lla]
- return np.ptp(angles)
-
- def get_corners(self, img):
- """
- Returns a list of corners ((x, y) tuples) found in the input image. With proper
- pre-processing and filtering, it should output at most 10 potential corners.
- This is a utility function used by get_contours. The input image is expected
- to be rescaled and Canny filtered prior to be passed in.
- """
- lines = lsd(img)
-
- # massages the output from LSD
- # LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines
- # 1. separate out the lines into horizontal and vertical lines.
- # 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer.
- # 3. Run connected-components on the new canvas
- # 4. Get the bounding box for each component, and the bounding box is final line.
- # 5. The ends of each line is a corner
- # 6. Repeat for vertical lines
- # 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners
-
- corners = []
- if lines is not None:
- # separate out the horizontal and vertical lines, and draw them back onto separate canvases
- lines = lines.squeeze().astype(np.int32).tolist()
- horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
- vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
- for line in lines:
- x1, y1, x2, y2, _ = line
- if abs(x2 - x1) > abs(y2 - y1):
- (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0])
- cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2)
- else:
- (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1])
- cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2)
-
- lines = []
-
- # find the horizontal lines (connected-components -> bounding boxes -> final lines)
- (contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
- contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
- horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
- for contour in contours:
- contour = contour.reshape((contour.shape[0], contour.shape[2]))
- min_x = np.amin(contour[:, 0], axis=0) + 2
- max_x = np.amax(contour[:, 0], axis=0) - 2
- left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1]))
- right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1]))
- lines.append((min_x, left_y, max_x, right_y))
- cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1)
- corners.append((min_x, left_y))
- corners.append((max_x, right_y))
-
- # find the vertical lines (connected-components -> bounding boxes -> final lines)
- (contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
- contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
- vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
- for contour in contours:
- contour = contour.reshape((contour.shape[0], contour.shape[2]))
- min_y = np.amin(contour[:, 1], axis=0) + 2
- max_y = np.amax(contour[:, 1], axis=0) - 2
- top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0]))
- bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0]))
- lines.append((top_x, min_y, bottom_x, max_y))
- cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1)
- corners.append((top_x, min_y))
- corners.append((bottom_x, max_y))
-
- # find the corners
- corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2)
- corners += zip(corners_x, corners_y)
-
- # remove corners in close proximity
- corners = self.filter_corners(corners)
- return corners
-
- def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT):
- """Returns True if the contour satisfies all requirements set at instantitation"""
-
- return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO
- and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE)
-
-
- def get_contour(self, rescaled_image):
- """
- Returns a numpy array of shape (4, 2) containing the vertices of the four corners
- of the document in the image. It considers the corners returned from get_corners()
- and uses heuristics to choose the four corners that most likely represent
- the corners of the document. If no corners were found, or the four corners represent
- a quadrilateral that is too small or convex, it returns the original four corners.
- """
-
- # these constants are carefully chosen
- MORPH = 9
- CANNY = 84
- HOUGH = 25
-
- IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape
-
- # convert the image to grayscale and blur it slightly
- gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
- gray = cv2.GaussianBlur(gray, (7,7), 0)
-
- # dilate helps to remove potential holes between edge segments
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH))
- dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
-
- # find edges and mark them in the output map using the Canny algorithm
- edged = cv2.Canny(dilated, 0, CANNY)
- test_corners = self.get_corners(edged)
-
- approx_contours = []
-
- if len(test_corners) >= 4:
- quads = []
-
- for quad in itertools.combinations(test_corners, 4):
- points = np.array(quad)
- points = transform.order_points(points)
- points = np.array([[p] for p in points], dtype = "int32")
- quads.append(points)
-
- # get top five quadrilaterals by area
- quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5]
- # sort candidate quadrilaterals by their angle range, which helps remove outliers
- quads = sorted(quads, key=self.angle_range)
-
- approx = quads[0]
- if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
- approx_contours.append(approx)
-
- # for debugging: uncomment the code below to draw the corners and countour found
- # by get_corners() and overlay it on the image
-
- # cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2)
- # plt.scatter(*zip(*test_corners))
- # plt.imshow(rescaled_image)
- # plt.show()
-
- # also attempt to find contours directly from the edged image, which occasionally
- # produces better results
- (cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
-
- # loop over the contours
- for c in cnts:
- # approximate the contour
- approx = cv2.approxPolyDP(c, 80, True)
- if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
- approx_contours.append(approx)
- break
-
- # If we did not find any valid contours, just use the whole image
- if not approx_contours:
- TOP_RIGHT = (IM_WIDTH, 0)
- BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT)
- BOTTOM_LEFT = (0, IM_HEIGHT)
- TOP_LEFT = (0, 0)
- screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]])
-
- else:
- screenCnt = max(approx_contours, key=cv2.contourArea)
-
- return screenCnt.reshape(4, 2)
-
- def interactive_get_contour(self, screenCnt, rescaled_image):
- poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5)
- fig, ax = plt.subplots()
- ax.add_patch(poly)
- ax.set_title(('Drag the corners of the box to the corners of the document. \n'
- 'Close the window when finished.'))
- p = poly_i.PolygonInteractor(ax, poly)
- plt.imshow(rescaled_image)
- plt.show()
-
- new_points = p.get_poly_points()[:4]
- new_points = np.array([[p] for p in new_points], dtype = "int32")
- return new_points.reshape(4, 2)
-
- def scan(self, image_path):
-
- RESCALED_HEIGHT = 500.0
- OUTPUT_DIR = 'output'
-
- # load the image and compute the ratio of the old height
- # to the new height, clone it, and resize it
- image = cv2.imread(image_path)
-
- assert(image is not None)
-
- ratio = image.shape[0] / RESCALED_HEIGHT
- orig = image.copy()
- rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT))
-
- # get the contour of the document
- screenCnt = self.get_contour(rescaled_image)
-
- if self.interactive:
- screenCnt = self.interactive_get_contour(screenCnt, rescaled_image)
-
- # apply the perspective transformation
- warped = transform.four_point_transform(orig, screenCnt * ratio)
-
- # convert the warped image to grayscale
- gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
-
- # sharpen image
- sharpen = cv2.GaussianBlur(gray, (0,0), 3)
- sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)
-
- # apply adaptive threshold to get black and white effect
- thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
-
- # save the transformed image
- basename = os.path.basename(image_path)
- cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh)
- print("Proccessed " + basename)
-
+from opencv_document_scanner.scan import main
if __name__ == "__main__":
- ap = argparse.ArgumentParser()
- group = ap.add_mutually_exclusive_group(required=True)
- group.add_argument("--images", help="Directory of images to be scanned")
- group.add_argument("--image", help="Path to single image to be scanned")
- ap.add_argument("-i", action='store_true',
- help = "Flag for manually verifying and/or setting document corners")
-
- args = vars(ap.parse_args())
- im_dir = args["images"]
- im_file_path = args["image"]
- interactive_mode = args["i"]
-
- scanner = DocScanner(interactive_mode)
-
- valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"]
-
- get_ext = lambda f: os.path.splitext(f)[1].lower()
-
- # Scan single image specified by command line argument --image
- if im_file_path:
- scanner.scan(im_file_path)
-
- # Scan all valid images in directory specified by command line argument --images
- else:
- im_files = [f for f in os.listdir(im_dir) if get_ext(f) in valid_formats]
- for im in im_files:
- scanner.scan(im_dir + '/' + im)
+ main()
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..b836204
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,48 @@
+from setuptools import setup, find_packages
+
+# Read README content
+with open("README.md", "r", encoding="utf-8") as fh:
+ long_description = fh.read()
+
+setup(
+ name="opencv-document-scanner",
+ version="0.1.0",
+ description="A document scanner application using OpenCV",
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ author="Andrew Campbell",
+ author_email="andrewc@captricity.com",
+ packages=find_packages(exclude=["tests*"]),
+ install_requires=[
+ "opencv-python>=4.5.0",
+ "numpy>=1.19.0",
+ "imutils>=0.5.4",
+ "matplotlib>=3.3.0",
+ "pylsd @ git+https://github.com/primetang/pylsd@refs/pull/17/head"
+ ],
+ python_requires=">=3.7",
+ classifiers=[
+ "Development Status :: 3 - Alpha",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Topic :: Software Development :: Libraries :: Python Modules",
+ "Topic :: Multimedia :: Graphics",
+ "Operating System :: OS Independent",
+ ],
+ keywords="opencv document scanner image-processing computer-vision",
+ project_urls={
+ "Bug Reports": "https://github.com/username/OpenCV-Document-Scanner/issues",
+ "Source": "https://github.com/username/OpenCV-Document-Scanner",
+ },
+ include_package_data=True,
+ zip_safe=False,
+ entry_points={
+ 'console_scripts': [
+ 'document-scanner=opencv_document_scanner.scan:main',
+ ],
+ },
+)
\ No newline at end of file