From 39492f0a505950bc86fb326ad6bd618df8ea3559 Mon Sep 17 00:00:00 2001 From: Pikus16 Date: Fri, 24 May 2024 16:55:52 +0000 Subject: [PATCH 1/5] add verbosity to glip --- mmdet/models/detectors/glip.py | 33 ++++++++++++++---------- mmdet/models/detectors/grounding_dino.py | 15 ++++++----- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/mmdet/models/detectors/glip.py b/mmdet/models/detectors/glip.py index 45cfe7d39fd..0948d4a2ba9 100644 --- a/mmdet/models/detectors/glip.py +++ b/mmdet/models/detectors/glip.py @@ -11,9 +11,10 @@ from mmdet.structures import SampleList from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig from .single_stage import SingleStageDetector +from os.path import expanduser -def find_noun_phrases(caption: str) -> list: +def find_noun_phrases(caption: str, verbose: bool = True) -> list: """Find noun phrases in a caption using nltk. Args: caption (str): The caption to analyze. @@ -27,8 +28,8 @@ def find_noun_phrases(caption: str) -> list: """ try: import nltk - nltk.download('punkt', download_dir='~/nltk_data') - nltk.download('averaged_perceptron_tagger', download_dir='~/nltk_data') + nltk.download('punkt', download_dir=expanduser('~/nltk_data'), quiet=verbose) + nltk.download('averaged_perceptron_tagger', download_dir=expanduser('~/nltk_data'), quiet=verbose) except ImportError: raise RuntimeError('nltk is not installed, please install it by: ' 'pip install nltk.') @@ -66,7 +67,7 @@ def remove_punctuation(text: str) -> str: return text.strip() -def run_ner(caption: str) -> Tuple[list, list]: +def run_ner(caption: str, verbose: bool = False) -> Tuple[list, list]: """Run NER on a caption and return the tokens and noun phrases. Args: caption (str): The input caption. @@ -76,10 +77,11 @@ def run_ner(caption: str) -> Tuple[list, list]: - tokens_positive (List): A list of token positions. - noun_phrases (List): A list of noun phrases. """ - noun_phrases = find_noun_phrases(caption) + noun_phrases = find_noun_phrases(caption,verbose=verbose) noun_phrases = [remove_punctuation(phrase) for phrase in noun_phrases] noun_phrases = [phrase for phrase in noun_phrases if phrase != ''] - print('noun_phrases:', noun_phrases) + if verbose: + print('noun_phrases:', noun_phrases) relevant_phrases = noun_phrases labels = noun_phrases @@ -274,7 +276,8 @@ def get_tokens_and_prompts( self, original_caption: Union[str, list, tuple], custom_entities: bool = False, - enhanced_text_prompts: Optional[ConfigType] = None + enhanced_text_prompts: Optional[ConfigType] = None, + verbose: bool = False ) -> Tuple[dict, str, list, list]: """Get the tokens positive and prompts for the caption.""" if isinstance(original_caption, (list, tuple)) or custom_entities: @@ -300,7 +303,7 @@ def get_tokens_and_prompts( original_caption = original_caption.strip(self._special_tokens) tokenized = self.language_model.tokenizer([original_caption], return_tensors='pt') - tokens_positive, noun_phrases = run_ner(original_caption) + tokens_positive, noun_phrases = run_ner(original_caption, verbose=verbose) entities = noun_phrases caption_string = original_caption @@ -318,6 +321,7 @@ def get_tokens_positive_and_prompts( custom_entities: bool = False, enhanced_text_prompt: Optional[ConfigType] = None, tokens_positive: Optional[list] = None, + verbose:bool = False ) -> Tuple[dict, str, Tensor, list]: if tokens_positive is not None: if tokens_positive == -1: @@ -354,7 +358,8 @@ def get_tokens_positive_and_prompts( else: tokenized, caption_string, tokens_positive, entities = \ self.get_tokens_and_prompts( - original_caption, custom_entities, enhanced_text_prompt) + original_caption, custom_entities, enhanced_text_prompt, + verbose=verbose) positive_map_label_to_token, positive_map = self.get_positive_map( tokenized, tokens_positive) if tokenized.input_ids.shape[1] > self.language_model.max_tokens: @@ -367,7 +372,8 @@ def get_tokens_positive_and_prompts( def get_tokens_positive_and_prompts_chunked( self, original_caption: Union[list, tuple], - enhanced_text_prompts: Optional[ConfigType] = None): + enhanced_text_prompts: Optional[ConfigType] = None, + verbose: bool = False): chunked_size = self.test_cfg.get('chunked_size', -1) original_caption = [clean_label_name(i) for i in original_caption] @@ -409,7 +415,8 @@ def get_tokens_positive_and_prompts_chunked( entities_chunked def loss(self, batch_inputs: Tensor, - batch_data_samples: SampleList) -> Union[dict, list]: + batch_data_samples: SampleList, + verbose: bool = False) -> Union[dict, list]: # TODO: Only open vocabulary tasks are supported for training now. text_prompts = [ data_samples.text for data_samples in batch_data_samples @@ -427,7 +434,7 @@ def loss(self, batch_inputs: Tensor, # so there is no need to calculate them multiple times. tokenized, caption_string, tokens_positive, _ = \ self.get_tokens_and_prompts( - text_prompts[0], True) + text_prompts[0], True, verbose=verbose) new_text_prompts = [caption_string] * len(batch_inputs) for gt_label in gt_labels: new_tokens_positive = [ @@ -440,7 +447,7 @@ def loss(self, batch_inputs: Tensor, for text_prompt, gt_label in zip(text_prompts, gt_labels): tokenized, caption_string, tokens_positive, _ = \ self.get_tokens_and_prompts( - text_prompt, True) + text_prompt, True, verbose=verbose) new_tokens_positive = [ tokens_positive[label] for label in gt_label ] diff --git a/mmdet/models/detectors/grounding_dino.py b/mmdet/models/detectors/grounding_dino.py index b1ab7c2da16..8a0ae9cb00e 100644 --- a/mmdet/models/detectors/grounding_dino.py +++ b/mmdet/models/detectors/grounding_dino.py @@ -135,7 +135,8 @@ def get_tokens_and_prompts( self, original_caption: Union[str, list, tuple], custom_entities: bool = False, - enhanced_text_prompts: Optional[ConfigType] = None + enhanced_text_prompts: Optional[ConfigType] = None, + verbose: bool = False ) -> Tuple[dict, str, list]: """Get the tokens positive and prompts for the caption.""" if isinstance(original_caption, (list, tuple)) or custom_entities: @@ -176,7 +177,7 @@ def get_tokens_and_prompts( padding='max_length' if self.language_model.pad_to_max else 'longest', return_tensors='pt') - tokens_positive, noun_phrases = run_ner(original_caption) + tokens_positive, noun_phrases = run_ner(original_caption, verbose=verbose) entities = noun_phrases caption_string = original_caption @@ -198,6 +199,7 @@ def get_tokens_positive_and_prompts( custom_entities: bool = False, enhanced_text_prompt: Optional[ConfigType] = None, tokens_positive: Optional[list] = None, + verbose: bool = False ) -> Tuple[dict, str, Tensor, list]: """Get the tokens positive and prompts for the caption. @@ -250,7 +252,7 @@ def get_tokens_positive_and_prompts( else: tokenized, caption_string, tokens_positive, entities = \ self.get_tokens_and_prompts( - original_caption, custom_entities, enhanced_text_prompt) + original_caption, custom_entities, enhanced_text_prompt, verbose=verbose) positive_map_label_to_token, positive_map = self.get_positive_map( tokenized, tokens_positive) return positive_map_label_to_token, caption_string, \ @@ -417,7 +419,8 @@ def pre_decoder( return decoder_inputs_dict, head_inputs_dict def loss(self, batch_inputs: Tensor, - batch_data_samples: SampleList) -> Union[dict, list]: + batch_data_samples: SampleList, + verbose: bool = False) -> Union[dict, list]: text_prompts = [ data_samples.text for data_samples in batch_data_samples ] @@ -455,7 +458,7 @@ def loss(self, batch_inputs: Tensor, # so there is no need to calculate them multiple times. tokenized, caption_string, tokens_positive, _ = \ self.get_tokens_and_prompts( - text_prompts[0], True) + text_prompts[0], True, verbose = verbose) new_text_prompts = [caption_string] * len(batch_inputs) for gt_label in gt_labels: new_tokens_positive = [ @@ -468,7 +471,7 @@ def loss(self, batch_inputs: Tensor, for text_prompt, gt_label in zip(text_prompts, gt_labels): tokenized, caption_string, tokens_positive, _ = \ self.get_tokens_and_prompts( - text_prompt, True) + text_prompt, True, verbose=verbose) new_tokens_positive = [ tokens_positive[label] for label in gt_label ] From 4d001f29cefc95fa1c3d658449a79523d4b4ce99 Mon Sep 17 00:00:00 2001 From: Pikus16 Date: Fri, 24 May 2024 17:03:27 +0000 Subject: [PATCH 2/5] fix glip --- mmdet/models/detectors/glip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmdet/models/detectors/glip.py b/mmdet/models/detectors/glip.py index 0948d4a2ba9..da4a76959a0 100644 --- a/mmdet/models/detectors/glip.py +++ b/mmdet/models/detectors/glip.py @@ -28,8 +28,8 @@ def find_noun_phrases(caption: str, verbose: bool = True) -> list: """ try: import nltk - nltk.download('punkt', download_dir=expanduser('~/nltk_data'), quiet=verbose) - nltk.download('averaged_perceptron_tagger', download_dir=expanduser('~/nltk_data'), quiet=verbose) + nltk.download('punkt', download_dir=expanduser('~/nltk_data'), quiet=not verbose) + nltk.download('averaged_perceptron_tagger', download_dir=expanduser('~/nltk_data'), quiet=not verbose) except ImportError: raise RuntimeError('nltk is not installed, please install it by: ' 'pip install nltk.') From 489cc0f6520107a796af8ade90ef23e1af5311ad Mon Sep 17 00:00:00 2001 From: Pikus16 Date: Fri, 24 May 2024 17:56:08 +0000 Subject: [PATCH 3/5] lint --- mmdet/models/detectors/glip.py | 13 ++++++++----- mmdet/models/detectors/grounding_dino.py | 8 +++++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/mmdet/models/detectors/glip.py b/mmdet/models/detectors/glip.py index da4a76959a0..b9d1c157bdd 100644 --- a/mmdet/models/detectors/glip.py +++ b/mmdet/models/detectors/glip.py @@ -28,8 +28,10 @@ def find_noun_phrases(caption: str, verbose: bool = True) -> list: """ try: import nltk - nltk.download('punkt', download_dir=expanduser('~/nltk_data'), quiet=not verbose) - nltk.download('averaged_perceptron_tagger', download_dir=expanduser('~/nltk_data'), quiet=not verbose) + nltk.download('punkt', download_dir=expanduser('~/nltk_data'), + quiet=not verbose) + nltk.download('averaged_perceptron_tagger', download_dir=expanduser('~/nltk_data'), + quiet=not verbose) except ImportError: raise RuntimeError('nltk is not installed, please install it by: ' 'pip install nltk.') @@ -77,7 +79,7 @@ def run_ner(caption: str, verbose: bool = False) -> Tuple[list, list]: - tokens_positive (List): A list of token positions. - noun_phrases (List): A list of noun phrases. """ - noun_phrases = find_noun_phrases(caption,verbose=verbose) + noun_phrases = find_noun_phrases(caption, verbose=verbose) noun_phrases = [remove_punctuation(phrase) for phrase in noun_phrases] noun_phrases = [phrase for phrase in noun_phrases if phrase != ''] if verbose: @@ -303,7 +305,8 @@ def get_tokens_and_prompts( original_caption = original_caption.strip(self._special_tokens) tokenized = self.language_model.tokenizer([original_caption], return_tensors='pt') - tokens_positive, noun_phrases = run_ner(original_caption, verbose=verbose) + tokens_positive, noun_phrases = run_ner(original_caption, + verbose=verbose) entities = noun_phrases caption_string = original_caption @@ -321,7 +324,7 @@ def get_tokens_positive_and_prompts( custom_entities: bool = False, enhanced_text_prompt: Optional[ConfigType] = None, tokens_positive: Optional[list] = None, - verbose:bool = False + verbose: bool = False ) -> Tuple[dict, str, Tensor, list]: if tokens_positive is not None: if tokens_positive == -1: diff --git a/mmdet/models/detectors/grounding_dino.py b/mmdet/models/detectors/grounding_dino.py index 8a0ae9cb00e..6af41553526 100644 --- a/mmdet/models/detectors/grounding_dino.py +++ b/mmdet/models/detectors/grounding_dino.py @@ -177,7 +177,8 @@ def get_tokens_and_prompts( padding='max_length' if self.language_model.pad_to_max else 'longest', return_tensors='pt') - tokens_positive, noun_phrases = run_ner(original_caption, verbose=verbose) + tokens_positive, noun_phrases = run_ner(original_caption, + verbose=verbose) entities = noun_phrases caption_string = original_caption @@ -252,7 +253,8 @@ def get_tokens_positive_and_prompts( else: tokenized, caption_string, tokens_positive, entities = \ self.get_tokens_and_prompts( - original_caption, custom_entities, enhanced_text_prompt, verbose=verbose) + original_caption, custom_entities, enhanced_text_prompt, + verbose=verbose) positive_map_label_to_token, positive_map = self.get_positive_map( tokenized, tokens_positive) return positive_map_label_to_token, caption_string, \ @@ -458,7 +460,7 @@ def loss(self, batch_inputs: Tensor, # so there is no need to calculate them multiple times. tokenized, caption_string, tokens_positive, _ = \ self.get_tokens_and_prompts( - text_prompts[0], True, verbose = verbose) + text_prompts[0], True, verbose=verbose) new_text_prompts = [caption_string] * len(batch_inputs) for gt_label in gt_labels: new_tokens_positive = [ From 6ddc32ef1346dd56285ecdc79f51c744da18ae00 Mon Sep 17 00:00:00 2001 From: Pikus16 Date: Fri, 24 May 2024 19:25:08 +0000 Subject: [PATCH 4/5] lint --- mmdet/models/detectors/glip.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmdet/models/detectors/glip.py b/mmdet/models/detectors/glip.py index b9d1c157bdd..3744e57f8e1 100644 --- a/mmdet/models/detectors/glip.py +++ b/mmdet/models/detectors/glip.py @@ -30,7 +30,8 @@ def find_noun_phrases(caption: str, verbose: bool = True) -> list: import nltk nltk.download('punkt', download_dir=expanduser('~/nltk_data'), quiet=not verbose) - nltk.download('averaged_perceptron_tagger', download_dir=expanduser('~/nltk_data'), + nltk.download('averaged_perceptron_tagger', + download_dir=expanduser('~/nltk_data'), quiet=not verbose) except ImportError: raise RuntimeError('nltk is not installed, please install it by: ' From 5e96c590310796d92a241bc3d80285f1c7011f43 Mon Sep 17 00:00:00 2001 From: Pikus16 Date: Fri, 24 May 2024 19:55:24 +0000 Subject: [PATCH 5/5] lint --- mmdet/models/detectors/glip.py | 44 ++++++++++++------------ mmdet/models/detectors/grounding_dino.py | 31 ++++++++--------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/mmdet/models/detectors/glip.py b/mmdet/models/detectors/glip.py index 3744e57f8e1..80c8a1fb872 100644 --- a/mmdet/models/detectors/glip.py +++ b/mmdet/models/detectors/glip.py @@ -2,6 +2,7 @@ import copy import re import warnings +from os.path import expanduser from typing import Optional, Tuple, Union import torch @@ -11,7 +12,6 @@ from mmdet.structures import SampleList from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig from .single_stage import SingleStageDetector -from os.path import expanduser def find_noun_phrases(caption: str, verbose: bool = True) -> list: @@ -28,11 +28,12 @@ def find_noun_phrases(caption: str, verbose: bool = True) -> list: """ try: import nltk - nltk.download('punkt', download_dir=expanduser('~/nltk_data'), - quiet=not verbose) - nltk.download('averaged_perceptron_tagger', - download_dir=expanduser('~/nltk_data'), - quiet=not verbose) + nltk.download( + 'punkt', download_dir=expanduser('~/nltk_data'), quiet=not verbose) + nltk.download( + 'averaged_perceptron_tagger', + download_dir=expanduser('~/nltk_data'), + quiet=not verbose) except ImportError: raise RuntimeError('nltk is not installed, please install it by: ' 'pip install nltk.') @@ -276,12 +277,11 @@ def to_plain_text_prompts(self, original_caption): return caption_string, tokens_positive def get_tokens_and_prompts( - self, - original_caption: Union[str, list, tuple], - custom_entities: bool = False, - enhanced_text_prompts: Optional[ConfigType] = None, - verbose: bool = False - ) -> Tuple[dict, str, list, list]: + self, + original_caption: Union[str, list, tuple], + custom_entities: bool = False, + enhanced_text_prompts: Optional[ConfigType] = None, + verbose: bool = False) -> Tuple[dict, str, list, list]: """Get the tokens positive and prompts for the caption.""" if isinstance(original_caption, (list, tuple)) or custom_entities: if custom_entities and isinstance(original_caption, str): @@ -306,8 +306,8 @@ def get_tokens_and_prompts( original_caption = original_caption.strip(self._special_tokens) tokenized = self.language_model.tokenizer([original_caption], return_tensors='pt') - tokens_positive, noun_phrases = run_ner(original_caption, - verbose=verbose) + tokens_positive, noun_phrases = run_ner( + original_caption, verbose=verbose) entities = noun_phrases caption_string = original_caption @@ -320,13 +320,12 @@ def get_positive_map(self, tokenized, tokens_positive): return positive_map_label_to_token, positive_map def get_tokens_positive_and_prompts( - self, - original_caption: Union[str, list, tuple], - custom_entities: bool = False, - enhanced_text_prompt: Optional[ConfigType] = None, - tokens_positive: Optional[list] = None, - verbose: bool = False - ) -> Tuple[dict, str, Tensor, list]: + self, + original_caption: Union[str, list, tuple], + custom_entities: bool = False, + enhanced_text_prompt: Optional[ConfigType] = None, + tokens_positive: Optional[list] = None, + verbose: bool = False) -> Tuple[dict, str, Tensor, list]: if tokens_positive is not None: if tokens_positive == -1: if not original_caption.endswith('.'): @@ -418,7 +417,8 @@ def get_tokens_positive_and_prompts_chunked( positive_map_chunked, \ entities_chunked - def loss(self, batch_inputs: Tensor, + def loss(self, + batch_inputs: Tensor, batch_data_samples: SampleList, verbose: bool = False) -> Union[dict, list]: # TODO: Only open vocabulary tasks are supported for training now. diff --git a/mmdet/models/detectors/grounding_dino.py b/mmdet/models/detectors/grounding_dino.py index 6af41553526..fc89b90f905 100644 --- a/mmdet/models/detectors/grounding_dino.py +++ b/mmdet/models/detectors/grounding_dino.py @@ -132,12 +132,11 @@ def to_plain_text_prompts(self, original_caption): return caption_string, tokens_positive def get_tokens_and_prompts( - self, - original_caption: Union[str, list, tuple], - custom_entities: bool = False, - enhanced_text_prompts: Optional[ConfigType] = None, - verbose: bool = False - ) -> Tuple[dict, str, list]: + self, + original_caption: Union[str, list, tuple], + custom_entities: bool = False, + enhanced_text_prompts: Optional[ConfigType] = None, + verbose: bool = False) -> Tuple[dict, str, list]: """Get the tokens positive and prompts for the caption.""" if isinstance(original_caption, (list, tuple)) or custom_entities: if custom_entities and isinstance(original_caption, str): @@ -177,8 +176,8 @@ def get_tokens_and_prompts( padding='max_length' if self.language_model.pad_to_max else 'longest', return_tensors='pt') - tokens_positive, noun_phrases = run_ner(original_caption, - verbose=verbose) + tokens_positive, noun_phrases = run_ner( + original_caption, verbose=verbose) entities = noun_phrases caption_string = original_caption @@ -195,13 +194,12 @@ def get_positive_map(self, tokenized, tokens_positive): return positive_map_label_to_token, positive_map def get_tokens_positive_and_prompts( - self, - original_caption: Union[str, list, tuple], - custom_entities: bool = False, - enhanced_text_prompt: Optional[ConfigType] = None, - tokens_positive: Optional[list] = None, - verbose: bool = False - ) -> Tuple[dict, str, Tensor, list]: + self, + original_caption: Union[str, list, tuple], + custom_entities: bool = False, + enhanced_text_prompt: Optional[ConfigType] = None, + tokens_positive: Optional[list] = None, + verbose: bool = False) -> Tuple[dict, str, Tensor, list]: """Get the tokens positive and prompts for the caption. Args: @@ -420,7 +418,8 @@ def pre_decoder( head_inputs_dict['text_token_mask'] = text_token_mask return decoder_inputs_dict, head_inputs_dict - def loss(self, batch_inputs: Tensor, + def loss(self, + batch_inputs: Tensor, batch_data_samples: SampleList, verbose: bool = False) -> Union[dict, list]: text_prompts = [