Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text Recognition Text Bounding Box Off #65

Open
delasign opened this issue May 24, 2023 · 0 comments
Open

Text Recognition Text Bounding Box Off #65

delasign opened this issue May 24, 2023 · 0 comments

Comments

@delasign
Copy link

delasign commented May 24, 2023

Android Studio Version: Android Studio Flamingo | 2022.2.1 Patch 1

Problem: The Text Recognition bounding box is off.

This can be best seen by this stack overflow post, whose solution doesnt solve the problem for me.
https://stackoverflow.com/questions/66624836/android-locating-words-on-the-screen-google-ml-kit-bounding-boxes-are-off-a-bi

Image Reference from the stack overflow - please note my code doesnt provide boxes that have this styling:
https://i.stack.imgur.com/idVzM.jpg

I tried to follow the MLKit guide (https://codelabs.developers.google.com/codelabs/mlkit-android#4) but this doesnt run.

--

What I'm doing:

I am passing a bitmap selected from the photo gallery into the text recognition and it is not finding the boxes precisely.

Code found below

--

build.gradle.kts

plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
    kotlin("plugin.serialization")
}

val versionMajor = 1
val versionMinor = 0
val versionPatch = 1
val versionBuild = 1 // Bump for dogfood builds, public betas, etc.


android {
    namespace = "--"
    compileSdk = 33

    defaultConfig {
        applicationId = "--"
        minSdk = 30
        targetSdk = 33
        versionCode = versionMajor * 10000 + versionMinor * 1000 + versionPatch * 100 + versionBuild
        versionName = "${versionMajor}.${versionMinor}.${versionPatch}"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            // Enables code shrinking, obfuscation, and optimization for only
            // your project's release build type.
            isMinifyEnabled = true

            // Enables resource shrinking, which is performed by the
            // Android Gradle plugin.
            isShrinkResources = true

            // Includes the default ProGuard rules files that are packaged with
            // the Android Gradle plugin. To learn more, go to the section about
            // R8 configuration files.
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.4.5"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

// CameraX
val cameraxVersion = "1.0.1"

dependencies {

    implementation ("androidx.core:core-ktx:1.10.1")
    implementation ("androidx.lifecycle:lifecycle-runtime-ktx:2.6.1")
    implementation ("androidx.activity:activity-compose:1.7.1")
    implementation("androidx.fragment:fragment-ktx:1.5.7")
    implementation (platform("androidx.compose:compose-bom:2022.10.00"))
    implementation ("androidx.compose.ui:ui")
    implementation ("androidx.compose.ui:ui-graphics")
    implementation ("androidx.compose.ui:ui-tooling-preview")
    implementation ("androidx.compose.material3:material3")
    testImplementation ("junit:junit:4.13.2")
    androidTestImplementation ("androidx.test.ext:junit:1.1.5")
    androidTestImplementation ("androidx.test.espresso:espresso-core:3.5.1")
    androidTestImplementation (platform("androidx.compose:compose-bom:2022.10.00"))
    androidTestImplementation ("androidx.compose.ui:ui-test-junit4")
    debugImplementation ("androidx.compose.ui:ui-tooling")
    debugImplementation ("androidx.compose.ui:ui-test-manifest")
    implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0")
    implementation("com.google.code.gson:gson:2.8.9")
    implementation("androidx.constraintlayout:constraintlayout-compose:1.0.1")
    // CAMERA
    implementation("androidx.camera:camera-camera2:$cameraxVersion")
    implementation("androidx.camera:camera-lifecycle:$cameraxVersion")
    implementation("androidx.camera:camera-view:1.3.0-alpha06")
    // Datastore
    implementation("androidx.datastore:datastore-preferences:1.0.0")
    implementation("androidx.datastore:datastore-preferences-rxjava2:1.0.0")
    implementation("androidx.datastore:datastore-preferences-rxjava3:1.0.0")
    // MLKit
    implementation("com.google.android.gms:play-services-mlkit-text-recognition:19.0.0")
}

--

Algorithm:

@SuppressLint("UnsafeOptInUsageError")
fun TextRecognitionCoordinator.analyzeMedia(image: Bitmap) {
    val orientation = context?.display?.rotation ?: return
    CoroutineScope(Dispatchers.IO).launch {
        val image = InputImage.fromBitmap(image, 0)

        var elementArray: Array<Text.TextBlock> = arrayOf()

        // Pass image to an ML Kit Vision API
        recognizer.process(image)
            .addOnSuccessListener { visionText ->
                // Task completed successfully
                if (TextRecognitionCoordinator.debug) {
                    Log.i(
                        "${TextRecognitionCoordinator.identifier}",
                        "${DebuggingIdentifiers.actionOrEventSucceded} processImage | processed recognition"
                    )
                }



                visionText.textBlocks.filter { it -> isPrice(it.text) }.map { it ->
                    if (TextRecognitionCoordinator.debug) {
                        Log.i(
                            "${TextRecognitionCoordinator.identifier}",
                            "${DebuggingIdentifiers.actionOrEventSucceded} analyzeMedia | text: ${it.text}"
                        )
                    }

                    elementArray = elementArray.plus(it)
                }
                if (TextRecognitionCoordinator.debug) {
                    Log.i(
                        "${TextRecognitionCoordinator.identifier}",
                        "${DebuggingIdentifiers.actionOrEventSucceded} analyzeMedia | closing with count : ${elementArray.count()}"
                    )
                }
            }
            .addOnFailureListener { e ->
                // Task failed with an exception
                // ...
                Log.e(
                    "${TextRecognitionCoordinator.identifier}",
                    "${DebuggingIdentifiers.actionOrEventFailed} analyzeMedia | Failed to recognize text. Error: $e"
                )
                // Return Image Array
                onProcessMedia?.invoke(elementArray)
                // Close Image Proxy

            }
            .addOnCompleteListener { it ->
                // Return Image Array
                onProcessMedia?.invoke(elementArray)
//                    imageProxy.close()
            }
    }
}

--

Composable that's drawn on a full screen card

@Composable
            fun createBoundingBox(element: Text.TextBlock) {
                val boundingBox = element.boundingBox ?: return

                Log.i(
                    "${identifier}",
                    "${DebuggingIdentifiers.actionOrEventSucceded} bounding box ${boundingBox} | top ${boundingBox.top} | left ${boundingBox.left}."
                )
               
                // Note that that screen width and screen height are the devices width and height.
               val wRatio = screenWidth / imageWidth
               val hRatio = screenHeight / imageHeight

                val elementTop = boundingBox.top * hRatio
                val elementLeft = boundingBox.left * wRatio
                val elementWidth = boundingBox.width() * wRatio
                val elementHeight = boundingBox.height() * hRatio
                Box(
                    modifier = Modifier
                        .constrainAs(createRef()) {
                            top.linkTo(
                                parent.top,
                                elementTop.dp
                            )
                            absoluteLeft.linkTo(
                                parent.absoluteLeft,
                                elementLeft.dp
                            )
                            width = Dimension.value(elementWidth.dp)
                            height = Dimension.value(elementHeight.dp)
                        }
                        .background(Color.Red),
                    content = {}
                )
            }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant