diff --git a/examples/colab/ocr/ocr_form_relation.ipynb b/examples/colab/ocr/ocr_form_relation.ipynb index 3c6d5952..8d29870b 100644 --- a/examples/colab/ocr/ocr_form_relation.ipynb +++ b/examples/colab/ocr/ocr_form_relation.ipynb @@ -64,567 +64,75 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "06:59:10, INFO Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "06:59:10, INFO Parsed Nlu_ref=visual_form_relation_extractor as lang=en\n", - "Adding visual_form_relation_extractor to internal component_list\n", - "Adding visual_form_relation_extractor to internal component_list\n", - "Adding visual_form_relation_extractor to internal component_list\n", - "Adding visual_form_relation_extractor to internal component_list\n", - "06:59:10, INFO Adding visual_form_relation_extractor to internal component_list\n", - "Satisfying dependencies\n", - "Satisfying dependencies\n", - "Satisfying dependencies\n", - "Satisfying dependencies\n", - "06:59:10, INFO Satisfying dependencies\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:10, INFO ========================================================================\n", - "Resolution Status provided_features_no_ref = {'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'visual_classifier_prediction'}\n", - "06:59:10, INFO Resolution Status provided_features_no_ref = {'visual_classifier_prediction'}\n", - "Resolution Status required_features_no_ref = {'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_entity'}\n", - "06:59:10, INFO Resolution Status required_features_no_ref = {'text_entity'}\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "06:59:10, INFO Resolution Status provided_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "06:59:10, INFO Resolution Status required_features_ref = set()\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "06:59:10, INFO Resolution Status is_trainable = False\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "06:59:10, INFO Resolution Status conversion_candidates = []\n", - "Resolution Status missing_features_no_ref = {'text_entity'}\n", - "Resolution Status missing_features_no_ref = {'text_entity'}\n", - "Resolution Status missing_features_no_ref = {'text_entity'}\n", - "Resolution Status missing_features_no_ref = {'text_entity'}\n", - "06:59:10, INFO Resolution Status missing_features_no_ref = {'text_entity'}\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "06:59:10, INFO Resolution Status conversion_candidates = set()\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:10, INFO ========================================================================\n", - "Getting default for missing_feature_type=text_entity\n", - "Getting default for missing_feature_type=text_entity\n", - "Getting default for missing_feature_type=text_entity\n", - "Getting default for missing_feature_type=text_entity\n", - "06:59:10, INFO Getting default for missing_feature_type=text_entity\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ + "🚨 Outdated Medical Secrets in license file. Version=5.3.1 but should be Version=5.1.1\n", + "🚨 Outdated OCR Secrets in license file. Version=5.3.1 but should be Version=5.0.2\n", + "πŸ“‹ Loading license number 0 from C:\\Users\\gadde/.johnsnowlabs\\licenses/license_number_0_for_Spark-Healthcare_Spark-OCR.json\n", + "πŸ‘· Trying to install compatible secrets. Use nlp.settings.enforce_versions=False if you want to install outdated secrets.\n", + "πŸ‘· Trying to install compatible secrets. Use nlp.settings.enforce_versions=False if you want to install outdated secrets.\n", + "πŸ‘· Setting up John Snow Labs home in C:\\Users\\gadde/.johnsnowlabs, this might take a few minutes.\n", + "Downloading 🫘+πŸš€ Java Library spark-nlp-assembly-5.1.1.jar\n", + "πŸ™† JSL Home setup in C:\\Users\\gadde/.johnsnowlabs\n", + "πŸ€“ Looks like you are missing some jars, trying fetching them ...\n", + "πŸ‘· Trying to install compatible secrets. Use nlp.settings.enforce_versions=False if you want to install outdated secrets.\n", + "Downloading 🫘+πŸ’Š Java Library spark-nlp-jsl-5.1.1.jar\n", + "Downloading 🫘+πŸ•Ά Java Library spark-ocr-assembly-5.0.2.jar\n", + "πŸ™† JSL Home setup in C:\\Users\\gadde/.johnsnowlabs\n", + "πŸ‘· Trying to install compatible secrets. Use nlp.settings.enforce_versions=False if you want to install outdated secrets.\n", + "πŸ‘Œ Launched \u001B[92mcpu optimized\u001B[39m session with with: πŸš€Spark-NLP==5.3.1, πŸ’ŠSpark-Healthcare==5.1.1, πŸ•ΆSpark-OCR==5.0.2, running on ⚑ PySpark==3.1.2\n", "Warning::Spark Session already created, some configs may not take.\n", "Warning::Spark Session already created, some configs may not take.\n", - "lilt_roberta_funsd_v1 download started this may take some time.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Resolved for missing components the following NLU components : []\n", - "Resolved for missing components the following NLU components : []\n", - "Resolved for missing components the following NLU components : []\n", - "Resolved for missing components the following NLU components : []\n", - "06:59:11, INFO Resolved for missing components the following NLU components : []\n", - "adding visual_document_ner\n", - "adding visual_document_ner\n", - "adding visual_document_ner\n", - "adding visual_document_ner\n", - "06:59:11, INFO adding visual_document_ner\n", - "Adding visual_document_ner to internal component_list\n", - "Adding visual_document_ner to internal component_list\n", - "Adding visual_document_ner to internal component_list\n", - "Adding visual_document_ner to internal component_list\n", - "06:59:11, INFO Adding visual_document_ner to internal component_list\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:11, INFO ========================================================================\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'visual_classifier_prediction'}\n", - "06:59:11, INFO Resolution Status provided_features_no_ref = {'text_entity', 'visual_classifier_prediction'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'text_entity'}\n", - "06:59:11, INFO Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'text_entity'}\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "06:59:11, INFO Resolution Status provided_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "06:59:11, INFO Resolution Status required_features_ref = set()\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "06:59:11, INFO Resolution Status is_trainable = False\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "06:59:11, INFO Resolution Status conversion_candidates = []\n", - "Resolution Status missing_features_no_ref = {'text_tokenized', 'ocr_image'}\n", - "Resolution Status missing_features_no_ref = {'text_tokenized', 'ocr_image'}\n", - "Resolution Status missing_features_no_ref = {'text_tokenized', 'ocr_image'}\n", - "Resolution Status missing_features_no_ref = {'text_tokenized', 'ocr_image'}\n", - "06:59:11, INFO Resolution Status missing_features_no_ref = {'text_tokenized', 'ocr_image'}\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "06:59:11, INFO Resolution Status conversion_candidates = set()\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:11, INFO ========================================================================\n", - "Getting default for missing_feature_type=text_tokenized\n", - "Getting default for missing_feature_type=text_tokenized\n", - "Getting default for missing_feature_type=text_tokenized\n", - "Getting default for missing_feature_type=text_tokenized\n", - "06:59:11, INFO Getting default for missing_feature_type=text_tokenized\n", - "Getting default for missing_feature_type=ocr_image\n", - "Getting default for missing_feature_type=ocr_image\n", - "Getting default for missing_feature_type=ocr_image\n", - "Getting default for missing_feature_type=ocr_image\n", - "06:59:11, INFO Getting default for missing_feature_type=ocr_image\n", - "Resolved for missing components the following NLU components : [, ]\n", - "Resolved for missing components the following NLU components : [, ]\n", - "Resolved for missing components the following NLU components : [, ]\n", - "Resolved for missing components the following NLU components : [, ]\n", - "06:59:11, INFO Resolved for missing components the following NLU components : [, ]\n", - "adding hocr_tokenizer\n", - "adding hocr_tokenizer\n", - "adding hocr_tokenizer\n", - "adding hocr_tokenizer\n", - "06:59:11, INFO adding hocr_tokenizer\n", - "Adding hocr_tokenizer to internal component_list\n", - "Adding hocr_tokenizer to internal component_list\n", - "Adding hocr_tokenizer to internal component_list\n", - "Adding hocr_tokenizer to internal component_list\n", - "06:59:11, INFO Adding hocr_tokenizer to internal component_list\n", - "adding binary2image\n", - "adding binary2image\n", - "adding binary2image\n", - "adding binary2image\n", - "06:59:11, INFO adding binary2image\n", - "Adding binary2image to internal component_list\n", - "Adding binary2image to internal component_list\n", - "Adding binary2image to internal component_list\n", - "Adding binary2image to internal component_list\n", - "06:59:11, INFO Adding binary2image to internal component_list\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:11, INFO ========================================================================\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'text_tokenized', 'ocr_image', 'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'text_tokenized', 'ocr_image', 'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'text_tokenized', 'ocr_image', 'visual_classifier_prediction'}\n", - "Resolution Status provided_features_no_ref = {'text_entity', 'text_tokenized', 'ocr_image', 'visual_classifier_prediction'}\n", - "06:59:11, INFO Resolution Status provided_features_no_ref = {'text_entity', 'text_tokenized', 'ocr_image', 'visual_classifier_prediction'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "06:59:11, INFO Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "06:59:11, INFO Resolution Status provided_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "06:59:11, INFO Resolution Status required_features_ref = set()\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "06:59:11, INFO Resolution Status is_trainable = False\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "06:59:11, INFO Resolution Status conversion_candidates = []\n", - "Resolution Status missing_features_no_ref = {'hocr'}\n", - "Resolution Status missing_features_no_ref = {'hocr'}\n", - "Resolution Status missing_features_no_ref = {'hocr'}\n", - "Resolution Status missing_features_no_ref = {'hocr'}\n", - "06:59:11, INFO Resolution Status missing_features_no_ref = {'hocr'}\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "06:59:11, INFO Resolution Status conversion_candidates = set()\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:11, INFO ========================================================================\n", - "Getting default for missing_feature_type=hocr\n", - "Getting default for missing_feature_type=hocr\n", - "Getting default for missing_feature_type=hocr\n", - "Getting default for missing_feature_type=hocr\n", - "06:59:11, INFO Getting default for missing_feature_type=hocr\n", - "Resolved for missing components the following NLU components : []\n", - "Resolved for missing components the following NLU components : []\n", - "Resolved for missing components the following NLU components : []\n", - "Resolved for missing components the following NLU components : []\n", - "06:59:11, INFO Resolved for missing components the following NLU components : []\n", - "adding image2hocr\n", - "adding image2hocr\n", - "adding image2hocr\n", - "adding image2hocr\n", - "06:59:11, INFO adding image2hocr\n", - "Adding image2hocr to internal component_list\n", - "Adding image2hocr to internal component_list\n", - "Adding image2hocr to internal component_list\n", - "Adding image2hocr to internal component_list\n", - "06:59:11, INFO Adding image2hocr to internal component_list\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:11, INFO ========================================================================\n", - "Resolution Status provided_features_no_ref = {'text_tokenized', 'visual_classifier_prediction', 'hocr', 'ocr_image', 'text_entity'}\n", - "Resolution Status provided_features_no_ref = {'text_tokenized', 'visual_classifier_prediction', 'hocr', 'ocr_image', 'text_entity'}\n", - "Resolution Status provided_features_no_ref = {'text_tokenized', 'visual_classifier_prediction', 'hocr', 'ocr_image', 'text_entity'}\n", - "Resolution Status provided_features_no_ref = {'text_tokenized', 'visual_classifier_prediction', 'hocr', 'ocr_image', 'text_entity'}\n", - "06:59:11, INFO Resolution Status provided_features_no_ref = {'text_tokenized', 'visual_classifier_prediction', 'hocr', 'ocr_image', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "06:59:11, INFO Resolution Status required_features_no_ref = {'text_tokenized', 'ocr_image', 'hocr', 'text_entity'}\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "Resolution Status provided_features_ref = set()\n", - "06:59:11, INFO Resolution Status provided_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "Resolution Status required_features_ref = set()\n", - "06:59:11, INFO Resolution Status required_features_ref = set()\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "Resolution Status is_trainable = False\n", - "06:59:11, INFO Resolution Status is_trainable = False\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "lilt_roberta_funsd_v1 download started this may take some time.\n", "Approximate size to download 419.6 MB\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "Resolution Status conversion_candidates = []\n", - "06:59:11, INFO Resolution Status conversion_candidates = []\n", - "Resolution Status missing_features_no_ref = set()\n", - "Resolution Status missing_features_no_ref = set()\n", - "Resolution Status missing_features_no_ref = set()\n", - "Resolution Status missing_features_no_ref = set()\n", - "06:59:11, INFO Resolution Status missing_features_no_ref = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "Resolution Status conversion_candidates = set()\n", - "06:59:11, INFO Resolution Status conversion_candidates = set()\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "========================================================================\n", - "06:59:11, INFO ========================================================================\n", - "!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!* ALL DEPENDENCIES SATISFIED !*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*\n", - "!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!* ALL DEPENDENCIES SATISFIED !*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*\n", - "!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!* ALL DEPENDENCIES SATISFIED !*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*\n", - "!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!* ALL DEPENDENCIES SATISFIED !*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*\n", - "06:59:11, INFO !*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!* ALL DEPENDENCIES SATISFIED !*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*\n", - "Fixing column names\n", - "Fixing column names\n", - "Fixing column names\n", - "Fixing column names\n", - "06:59:11, INFO Fixing column names\n", - "Fixing input and output column names\n", - "Fixing input and output column names\n", - "Fixing input and output column names\n", - "Fixing input and output column names\n", - "06:59:11, INFO Fixing input and output column names\n", - "Checking for component_to_resolve visual_form_relation_extractor wether inputs {'text_entity'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_form_relation_extractor wether inputs {'text_entity'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_form_relation_extractor wether inputs {'text_entity'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_form_relation_extractor wether inputs {'text_entity'} is satisfied by another component_to_resolve in the component_list \n", - "06:59:11, INFO Checking for component_to_resolve visual_form_relation_extractor wether inputs {'text_entity'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_document_ner wether inputs {'text_tokenized', 'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_document_ner wether inputs {'text_tokenized', 'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_document_ner wether inputs {'text_tokenized', 'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve visual_document_ner wether inputs {'text_tokenized', 'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "06:59:11, INFO Checking for component_to_resolve visual_document_ner wether inputs {'text_tokenized', 'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve hocr_tokenizer wether inputs {'hocr'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve hocr_tokenizer wether inputs {'hocr'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve hocr_tokenizer wether inputs {'hocr'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve hocr_tokenizer wether inputs {'hocr'} is satisfied by another component_to_resolve in the component_list \n", - "06:59:11, INFO Checking for component_to_resolve hocr_tokenizer wether inputs {'hocr'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve binary2image wether inputs {'content', 'path'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve binary2image wether inputs {'content', 'path'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve binary2image wether inputs {'content', 'path'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve binary2image wether inputs {'content', 'path'} is satisfied by another component_to_resolve in the component_list \n", - "06:59:11, INFO Checking for component_to_resolve binary2image wether inputs {'content', 'path'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve image2hocr wether inputs {'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve image2hocr wether inputs {'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve image2hocr wether inputs {'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Checking for component_to_resolve image2hocr wether inputs {'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "06:59:11, INFO Checking for component_to_resolve image2hocr wether inputs {'ocr_image'} is satisfied by another component_to_resolve in the component_list \n", - "Optimizing component_list component_to_resolve order\n", - "Optimizing component_list component_to_resolve order\n", - "Optimizing component_list component_to_resolve order\n", - "Optimizing component_list component_to_resolve order\n", - "06:59:11, INFO Optimizing component_list component_to_resolve order\n", - "Starting to optimize component_to_resolve order \n", - "Starting to optimize component_to_resolve order \n", - "Starting to optimize component_to_resolve order \n", - "Starting to optimize component_to_resolve order \n", - "06:59:11, INFO Starting to optimize component_to_resolve order \n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "06:59:11, INFO Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve binary2image\n", - "Optimizing order for component_to_resolve binary2image\n", - "Optimizing order for component_to_resolve binary2image\n", - "Optimizing order for component_to_resolve binary2image\n", - "06:59:11, INFO Optimizing order for component_to_resolve binary2image\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "06:59:11, INFO Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve image2hocr\n", - "06:59:11, INFO Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "06:59:11, INFO Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve image2hocr\n", - "06:59:11, INFO Optimizing order for component_to_resolve image2hocr\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve hocr_tokenizer\n", - "06:59:11, INFO Optimizing order for component_to_resolve hocr_tokenizer\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_document_ner\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_document_ner\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "06:59:11, INFO Optimizing order for component_to_resolve visual_form_relation_extractor\n", - "Renaming duplicates cols\n", - "Renaming duplicates cols\n", - "Renaming duplicates cols\n", - "Renaming duplicates cols\n", - "06:59:11, INFO Renaming duplicates cols\n", - "Done with component_list optimizing\n", - "Done with component_list optimizing\n", - "Done with component_list optimizing\n", - "Done with component_list optimizing\n", - "06:59:11, INFO Done with component_list optimizing\n", - "Fitting on empty Dataframe, could not infer correct training method. This is intended for non-trainable pipelines.\n", - "Fitting on empty Dataframe, could not infer correct training method. This is intended for non-trainable pipelines.\n", - "Fitting on empty Dataframe, could not infer correct training method. This is intended for non-trainable pipelines.\n", - "Fitting on empty Dataframe, could not infer correct training method. This is intended for non-trainable pipelines.\n", - "06:59:11, INFO Fitting on empty Dataframe, could not infer correct training method. This is intended for non-trainable pipelines.\n", - "Configuring Light Pipeline Usage\n", - "Configuring Light Pipeline Usage\n", - "Configuring Light Pipeline Usage\n", - "Configuring Light Pipeline Usage\n", - "06:59:11, INFO Configuring Light Pipeline Usage\n" - ] - }, + } + ], + "source": [ + "from johnsnowlabs import nlp,visual\n", + "model = nlp.load('visual_form_relation_extractor')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-05-13T08:27:37.781697200Z", + "start_time": "2024-05-13T08:17:43.901075500Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Warning::Spark Session already created, some configs may not take.\n", "Warning::Spark Session already created, some configs may not take.\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Inferred and set output level of pipeline to relation\n", - "Inferred and set output level of pipeline to relation\n", - "Inferred and set output level of pipeline to relation\n", - "Inferred and set output level of pipeline to relation\n", - "06:59:15, INFO Inferred and set output level of pipeline to relation\n", - "Extracting for same_level_cols = ['meta_visual_classifier_prediction_bbox1', 'meta_visual_classifier_prediction_entity1_begin', 'meta_visual_classifier_prediction_x', 'meta_visual_classifier_prediction_bbox2', 'meta_visual_classifier_prediction_entity1_end', 'meta_visual_classifier_prediction_entity2', 'meta_visual_classifier_prediction_entity1', 'meta_visual_classifier_prediction_entity2_end', 'meta_visual_classifier_prediction_height', 'meta_visual_classifier_prediction_y', 'meta_visual_classifier_prediction_width', 'meta_visual_classifier_prediction_entity2_begin']\n", - "\n", - "Extracting for same_level_cols = ['meta_visual_classifier_prediction_bbox1', 'meta_visual_classifier_prediction_entity1_begin', 'meta_visual_classifier_prediction_x', 'meta_visual_classifier_prediction_bbox2', 'meta_visual_classifier_prediction_entity1_end', 'meta_visual_classifier_prediction_entity2', 'meta_visual_classifier_prediction_entity1', 'meta_visual_classifier_prediction_entity2_end', 'meta_visual_classifier_prediction_height', 'meta_visual_classifier_prediction_y', 'meta_visual_classifier_prediction_width', 'meta_visual_classifier_prediction_entity2_begin']\n", - "\n", - "Extracting for same_level_cols = ['meta_visual_classifier_prediction_bbox1', 'meta_visual_classifier_prediction_entity1_begin', 'meta_visual_classifier_prediction_x', 'meta_visual_classifier_prediction_bbox2', 'meta_visual_classifier_prediction_entity1_end', 'meta_visual_classifier_prediction_entity2', 'meta_visual_classifier_prediction_entity1', 'meta_visual_classifier_prediction_entity2_end', 'meta_visual_classifier_prediction_height', 'meta_visual_classifier_prediction_y', 'meta_visual_classifier_prediction_width', 'meta_visual_classifier_prediction_entity2_begin']\n", - "\n", - "Extracting for same_level_cols = ['meta_visual_classifier_prediction_bbox1', 'meta_visual_classifier_prediction_entity1_begin', 'meta_visual_classifier_prediction_x', 'meta_visual_classifier_prediction_bbox2', 'meta_visual_classifier_prediction_entity1_end', 'meta_visual_classifier_prediction_entity2', 'meta_visual_classifier_prediction_entity1', 'meta_visual_classifier_prediction_entity2_end', 'meta_visual_classifier_prediction_height', 'meta_visual_classifier_prediction_y', 'meta_visual_classifier_prediction_width', 'meta_visual_classifier_prediction_entity2_begin']\n", - "\n", - "06:59:19, INFO Extracting for same_level_cols = ['meta_visual_classifier_prediction_bbox1', 'meta_visual_classifier_prediction_entity1_begin', 'meta_visual_classifier_prediction_x', 'meta_visual_classifier_prediction_bbox2', 'meta_visual_classifier_prediction_entity1_end', 'meta_visual_classifier_prediction_entity2', 'meta_visual_classifier_prediction_entity1', 'meta_visual_classifier_prediction_entity2_end', 'meta_visual_classifier_prediction_height', 'meta_visual_classifier_prediction_y', 'meta_visual_classifier_prediction_width', 'meta_visual_classifier_prediction_entity2_begin']\n", - "\n" - ] } ], "source": [ - "from johnsnowlabs import nlp, visual\n", - "import nlu\n", - "\n", - "model = nlu.load('visual_form_relation_extractor',verbose=True)\n", - "res = model.predict(['form.png','form2.png'])" + "res = model.predict(['form.png','form2.jpg'])" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-05-05T01:29:19.227738100Z", - "start_time": "2024-05-05T01:29:10.870752Z" - } + "collapsed": false } }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "outputs": [ { "data": { - "text/plain": " form_relation_prediction_key \\\n0 patient name \n0 lab no \n0 uhid \n0 sample date \n0 age / gender \n0 report date \n0 prescribed by \n0 opd / ipd opd \n0 sample type \n0 specimen \n0 result : \n1 name : \n1 study date : \n1 bp : \n1 mrn : \n1 patient location : \n1 hr : \n1 dob : \n1 gender : \n1 height : \n1 age : \n1 weight : \n1 reason for study : \n1 bsa : \n1 history : \n1 medications : \n1 performed . \n1 . \n\n form_relation_prediction_value \\\n0 mrs meera singh \n0 477737 \n0 248275 \n0 20 / 04 / 2019 1 : 55 9 \n0 33 yrs / female \n0 22 / 04 / 2019 10 : 56 \n0 dr . chetna jain \n0 sec - \n0 urine \n0 mid stream urine \n0 culture sterile after 48 hours of aerobic incu... \n1 dribbler , bbb \n1 12 - 09 - 2006 , 6 : 34 \n1 120 / 80 mmhg \n1 12341820060912 \n1 room \n1 100 bpm \n1 19 - 06 - 1979 \n1 male \n1 123 cm \n1 27 years \n1 25 kg \n1 mi \n1 0 . 92 m \n1 asfgfdgsdg \n1 heparine , paracetamol \n1 the study technically limited . \n1 no \n\n path \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
form_relation_prediction_keyform_relation_prediction_valuepath
0patient namemrs meera singhfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0lab no477737file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0uhid248275file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0sample date20 / 04 / 2019 1 : 55 9file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0age / gender33 yrs / femalefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0report date22 / 04 / 2019 10 : 56file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0prescribed bydr . chetna jainfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0opd / ipd opdsec -file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0sample typeurinefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0specimenmid stream urinefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0result :culture sterile after 48 hours of aerobic incu...file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1name :dribbler , bbbfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1study date :12 - 09 - 2006 , 6 : 34file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1bp :120 / 80 mmhgfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1mrn :12341820060912file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1patient location :roomfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1hr :100 bpmfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1dob :19 - 06 - 1979file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1gender :malefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1height :123 cmfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1age :27 yearsfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1weight :25 kgfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1reason for study :mifile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1bsa :0 . 92 mfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1history :asfgfdgsdgfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1medications :heparine , paracetamolfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1performed .the study technically limited .file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1.nofile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
\n
" + "text/plain": " form_relation_prediction_key form_relation_prediction_value \\\n0 division allied health \n0 course hce \n0 number 116 \n0 title calculations medical dosage \n0 credits 2 \n0 developed by dr . by taz \n0 lecture / lab lecture / o ratio 2 \n0 course activity no \n0 cip code 51 . 0800 \n0 semester fall and \n0 ge category none \n0 separate lab no \n0 course awareness no \n0 course no \n1 name : dribbler , bbb \n1 study date : 12 - 09 - 2006 , 6 : 34 \n1 bp : 120 / 80 mmhg \n1 mrn : 12341820060912 \n1 patient location : room \n1 hr : 100 bpm \n1 dob : 19 - 06 - 1979 \n1 gender : male \n1 height : 123 cm \n1 age : 27 years \n1 weight : 25 kg \n1 reason for study : mi \n1 bsa : 0 . 92 m \n1 history : asfgfdgsdg \n1 medications : heparine , paracetamol \n1 performed . the study technically limited . \n1 . no \n\n path \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n0 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... \n1 file:/F:/Work/repos/nlu_new/ner/nlu/examples/c... ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
form_relation_prediction_keyform_relation_prediction_valuepath
0divisionallied healthfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0coursehcefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0number116file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0titlecalculations medical dosagefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0credits2file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0developed bydr . by tazfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0lecture / lab lecture / o ratio2file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0course activitynofile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0cip code51 . 0800file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0semesterfall andfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0ge categorynonefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0separate labnofile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0course awarenessnofile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
0coursenofile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1name :dribbler , bbbfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1study date :12 - 09 - 2006 , 6 : 34file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1bp :120 / 80 mmhgfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1mrn :12341820060912file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1patient location :roomfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1hr :100 bpmfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1dob :19 - 06 - 1979file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1gender :malefile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1height :123 cmfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1age :27 yearsfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1weight :25 kgfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1reason for study :mifile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1bsa :0 . 92 mfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1history :asfgfdgsdgfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1medications :heparine , paracetamolfile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1performed .the study technically limited .file:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
1.nofile:/F:/Work/repos/nlu_new/ner/nlu/examples/c...
\n
" }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -636,8 +144,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-05T01:33:38.903202300Z", - "start_time": "2024-05-05T01:33:38.883843200Z" + "end_time": "2024-05-13T08:40:51.701641600Z", + "start_time": "2024-05-13T08:40:51.627215600Z" } } }, diff --git a/tests/datasets/ocr/images/form.png b/tests/datasets/ocr/images/form.png index e69de29b..da663616 100644 Binary files a/tests/datasets/ocr/images/form.png and b/tests/datasets/ocr/images/form.png differ diff --git a/tests/datasets/ocr/images/form2.jpg b/tests/datasets/ocr/images/form2.jpg new file mode 100644 index 00000000..fbd8dd5f Binary files /dev/null and b/tests/datasets/ocr/images/form2.jpg differ diff --git a/tests/datasets/ocr/images/form2.png b/tests/datasets/ocr/images/form2.png deleted file mode 100644 index e69de29b..00000000