Merge pull request #4 from bendyworks/image-orientation

Image orientation
bendyworks · Mar 31, 2020 · be4ad84 · be4ad84
2 parents 2180c17 + 860b6fb
commit be4ad84
Show file tree

Hide file tree

Showing 13 changed files with 199 additions and 100 deletions.
diff --git a/README.md b/README.md
@@ -7,6 +7,22 @@ Machine Learning Plugin for Capacitor. Currently offered implementations include
 
     TextDetector expects the image to be sent in portrait mode only, i.e. with text facing up. It will try to process even otherwise, but note that it might result in gibberish.
 
+## Compatibility Chart
+
+| Feature                          | ios                         | android                     |
+| -------------------------------- | --------------------------- | --------------------------- |
+| ML Framework                     | CoreML Vision               | Firebase MLKit              |
+| Text Detection with Still Images | Yes                         | Yes                         |
+| Detects lines of text            | Yes                         | Yes                         |
+| Bounding Coordinates for Text    | Yes                         | Yes                         |
+| Image Orientation                | Yes (Up, Left, Right, Down) | Yes (Up, Left, Right, Down) |
+| Skewed Text                      | Yes                         | Unreliable                  |
+| Rotated Text (<~ 45deg)          | Yes                         | Yes (but with noise)        |
+| On-Device                        | Yes                         | Yes                         |
+| SDK/ios Version                  | ios 13.0 or newer           | Targets API level >= 16<br>Uses Gradle >= 4.1<br>com.android.tools.build:gradle >= v3.2.1<br>compileSdkVersion >= 28 |
+| | | |
+
+
 ## Installation
 
 ```
@@ -17,7 +33,8 @@ npm install cap-ml
 
 TextDetector exposes only one method `detectText` that returns a Promise with an array of text detections -
 ```
-detectText(filename: string): Promise<TextDetection[]>
+// Orientation here is not the current orientation of the image, but the direction in which the image should be turned to make it upright
+detectText(filename: string, orientation?: ImageOrientation): Promise<TextDetection[]>
 
 ```
 TextDetection looks like  -
@@ -30,6 +47,16 @@ interface TextDetection {
   text: string;
 }
 ```
+
+ImageOrientation is an enum  -
+```
+enum ImageOrientation {
+  Up = "UP",
+  Down = "DOWN",
+  Left = "LEFT",
+  Right = "RIGHT",
+}
+```
 bottomLeft[x,y], bottomRight[x,y], topLeft[x,y], topRight[x,y] provide the coordinates for the bounding quadrangle for the detected 'text'. Often, this would be a rectangle, but the text might be skewed.
 
 
@@ -54,6 +81,9 @@ and used like:
   const td = new TextDetector();
   const textDetections = await td.detectText(imageFile.path!)
 
+  # or with orientation -
+  # const textDetections = await td.detectText(imageFile.path!, ImageOrientation.Up)
+
   # textDetections is an array of detected texts and corresponding bounding box coordinates
   # which can be accessed like -
   textDetections.forEach((detection: TextDetection) => {
@@ -85,10 +115,22 @@ If you're using it in an Android app (generated through Ionic), there is an addi
   - Open the app in Android Studio by running `npx cap open android` from the sample app's root directory. ie here, at examples/text-detection/ImageReader
   - Open app/manifests/AndroidManifest.xml
   - Add the corresponding permissions to the app -
-    <uses-permission android:name="android.permission.INTERNET" />
-    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
-    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
-    <uses-permission android:name="android.permission.CAMERA" />
+    - android.permission.INTERNET
+    - android.permission.READ_EXTERNAL_STORAGE
+    - android.permission.WRITE_EXTERNAL_STORAGE
+    - android.permission.CAMERA
+
+  -  Note: Sample App is set up to download Firebase's OCR model for Text Detection upon installing the app. If the app errors out with something like -  `Considering local module com.google.android.gms.vision.ocr:0 and remote module com.google.android.gms.vision.ocr:0.
+  E/Vision: Error loading module com.google.android.gms.vision.ocr optional module true: com.google.android.gms.dynamite.DynamiteModule$LoadingException: No acceptable module found. Local version is 0 and remote version is 0.`.
+
+      This is a known bug with Google Play Services.
+
+      Follow these steps -
+      1. Uninstall app from the device/emulator.
+      2. Update 'Google Play Services' - make sure you have the latest version.
+      3. Clear cache and store for 'Google Play Services'
+      4. Restart the device/emulator
+      4. Install and run the app.
 
 ## Development
 

diff --git a/android/src/main/java/com/bendyworks/capML/CapML.java b/android/src/main/java/com/bendyworks/capML/CapML.java
@@ -1,37 +1,61 @@
 package com.bendyworks.capML;
 
+import android.graphics.Bitmap;
+import android.graphics.Matrix;
 import android.net.Uri;
+import android.provider.MediaStore;
 
-import com.getcapacitor.JSObject;
 import com.getcapacitor.NativePlugin;
 import com.getcapacitor.Plugin;
 import com.getcapacitor.PluginCall;
 import com.getcapacitor.PluginMethod;
+import com.google.firebase.ml.vision.common.FirebaseVisionImageMetadata;
 
 import java.io.File;
+import java.io.IOException;
 
 @NativePlugin()
 public class CapML extends Plugin {
 
   @PluginMethod()
-  public void detectText(PluginCall call) {
+  public void detectText(PluginCall call) throws IOException {
     String filename = call.getString("filename");
     if (filename == null) {
       call.reject("filename not specified");
       return;
     }
-    // remove file:// from the filename
-    filename = filename.substring(7);
-    File file = new File(filename);
+    String orientation = call.getString("orientation");
+    int rotation = this.orientationToRotation(orientation);
 
-    if (file.exists()) {
-      Uri uri = Uri.fromFile(file);
+    Bitmap bitmap  = MediaStore.Images.Media.getBitmap(this.getContext().getContentResolver(), Uri.parse(filename));
+    if (bitmap == null) {
+      call.reject("Could not load image from path");
+      return;
+    } else {
+      int width = bitmap.getWidth();
+      int height = bitmap.getHeight();
+
+      Matrix matrix = new Matrix();
+      matrix.setRotate((float)rotation);
+      Bitmap rotatedBitmap = Bitmap.createBitmap(bitmap, 0, 0, width, height, matrix, true);
 
       TextDetector td = new TextDetector();
-      td.detectText(call, this.getContext(), uri, 0);
-    } else {
-      call.reject("File not found");
-      return;
+      td.detectText(call, rotatedBitmap);
+    }
+  }
+
+  private int orientationToRotation(String orientation) {
+    switch (orientation) {
+      case "UP":
+        return 0;
+      case "RIGHT":
+        return 90;
+      case "DOWN":
+        return 180;
+      case "LEFT":
+        return 270;
+      default:
+        return 0;
     }
   }
 }
diff --git a/android/src/main/java/com/bendyworks/capML/TextDetector.kt b/android/src/main/java/com/bendyworks/capML/TextDetector.kt
@@ -1,46 +1,44 @@
 package com.bendyworks.capML
 
-import com.getcapacitor.PluginCall
-import com.getcapacitor.JSObject
-
-import android.content.Context
-import android.graphics.Rect
-import android.net.Uri
+import android.graphics.Bitmap
 import android.util.NoSuchPropertyException
+import com.getcapacitor.JSObject
+import com.getcapacitor.PluginCall
 import com.google.firebase.ml.vision.FirebaseVision
 import com.google.firebase.ml.vision.common.FirebaseVisionImage
 import com.google.firebase.ml.vision.text.FirebaseVisionTextRecognizer
 import org.json.JSONArray
-import kotlin.collections.ArrayList
 
 
 class TextDetector {
-  fun detectText(call: PluginCall, context: Context, fileUri: Uri, degrees: Int) {
+  fun detectText(call: PluginCall, bitmap: Bitmap) {
     val image: FirebaseVisionImage
     val detectedText = ArrayList<Any>()
 
     try {
-      image = FirebaseVisionImage.fromFilePath(context, fileUri)
-
-      // getting the height and width of the image to perform scaling
-      val bitmap = image.getBitmap()
-      val width = bitmap.getWidth()
-      val height = bitmap.getHeight()
+      image = FirebaseVisionImage.fromBitmap(bitmap)
+      val width = bitmap.width
+      val height = bitmap.height
 
       val textDetector: FirebaseVisionTextRecognizer = FirebaseVision.getInstance().getOnDeviceTextRecognizer();
 
       textDetector.processImage(image)
         .addOnSuccessListener { detectedBlocks ->
           for (block in detectedBlocks.textBlocks) {
             for (line in block.lines) {
-              val rect: Rect = line.boundingBox ?: throw NoSuchPropertyException("FirebaseVisionTextRecognizer.processImage: could not get bounding coordinates")
+              // Gets the four corner points in clockwise direction starting with top-left.
+              val cornerPoints = line.cornerPoints ?: throw NoSuchPropertyException("FirebaseVisionTextRecognizer.processImage: could not get bounding coordinates")
+              val topLeft = cornerPoints[0]
+              val topRight = cornerPoints[1]
+              val bottomRight = cornerPoints[2]
+              val bottomLeft = cornerPoints[3]
 
               val textDetection = mapOf(
                 // normalizing coordinates
-                "topLeft" to listOf<Double?>((rect.left).toDouble()/width, (height - rect.top).toDouble()/height),
-                "topRight" to listOf<Double?>((rect.right).toDouble()/width, (height - rect.top).toDouble()/height),
-                "bottomLeft" to listOf<Double?>((rect.left).toDouble()/width, (height - rect.bottom).toDouble()/height),
-                "bottomRight" to listOf<Double?>((rect.right).toDouble()/width, (height - rect.bottom).toDouble()/height),
+                "topLeft" to listOf<Double?>((topLeft.x).toDouble()/width, (height - topLeft.y).toDouble()/height),
+                "topRight" to listOf<Double?>((topRight.x).toDouble()/width, (height - topRight.y).toDouble()/height),
+                "bottomLeft" to listOf<Double?>((bottomLeft.x).toDouble()/width, (height - bottomLeft.y).toDouble()/height),
+                "bottomRight" to listOf<Double?>((bottomRight.x).toDouble()/width, (height - bottomRight.y).toDouble()/height),
                 "text" to line.text
               )
               detectedText.add(textDetection)

diff --git a/examples/text-detection/ImageReader/android/app/build.gradle b/examples/text-detection/ImageReader/android/app/build.gradle
@@ -41,7 +41,7 @@ dependencies {
     implementation project(':capacitor-cordova-android-plugins')
   implementation 'com.google.firebase:firebase-analytics:17.2.3'
   implementation 'com.google.firebase:firebase-ml-vision:24.0.1'
-  implementation "androidx.core:core-ktx:+"
+  implementation 'androidx.core:core-ktx:+'
   implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version"
 }
 

diff --git a/examples/text-detection/ImageReader/ios/App/App.xcodeproj/project.pbxproj b/examples/text-detection/ImageReader/ios/App/App.xcodeproj/project.pbxproj
@@ -345,6 +345,7 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = "";
 				INFOPLIST_FILE = App/Info.plist;
 				IPHONEOS_DEPLOYMENT_TARGET = 11.0;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
@@ -363,6 +364,7 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = "";
 				INFOPLIST_FILE = App/Info.plist;
 				IPHONEOS_DEPLOYMENT_TARGET = 11.0;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";

diff --git a/examples/text-detection/ImageReader/package-lock.json b/examples/text-detection/ImageReader/package-lock.json
diff --git a/examples/text-detection/ImageReader/package.json b/examples/text-detection/ImageReader/package.json
@@ -19,8 +19,8 @@
     "@angular/platform-browser": "~8.2.14",
     "@angular/platform-browser-dynamic": "~8.2.14",
     "@angular/router": "~8.2.14",
-    "@capacitor/android": "^1.5.1",
-    "@capacitor/core": "^1.5.1",
+    "@capacitor/android": "^2.0.0-beta.0",
+    "@capacitor/core": "^2.0.0-beta.0",
     "@ionic-native/core": "^5.0.0",
     "@ionic-native/splash-screen": "^5.0.0",
     "@ionic-native/status-bar": "^5.0.0",

diff --git a/examples/text-detection/ImageReader/src/app/home/home.page.html b/examples/text-detection/ImageReader/src/app/home/home.page.html
@@ -24,7 +24,7 @@
     </ion-item>
 
     <svg #svgContainer [attr.width.px]="scaleX" [attr.height.px]="scaleY">
-      <image [attr.xlink:href]="imageFile.webPath" [attr.width]="scaleX" [attr.height]="scaleY"></image>
+      <image [attr.xlink:href]="imageFile.webPath" [attr.width]="scaleX" [attr.height]="scaleY" [attr.transform]="'rotate('+rotation+' '+scaleX/2+' '+scaleY/2+')'"></image>
     </svg>
   </div>