Skip to content

Commit

Permalink
Merge pull request #184 from EvolvingLMMs-Lab/pufanyi/pypi/0.2.2
Browse files Browse the repository at this point in the history
New pypi
  • Loading branch information
Luodian authored Aug 8, 2024
2 parents 2af043e + a365bf7 commit 3f89773
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 65 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ cd lmms-eval
pip install -e .
```

If you wanted to test llava, you will have to clone their repo from [LLaVA](https://github.com/haotian-liu/LLaVA) and
If you want to test LLaVA, you will have to clone their repo from [LLaVA](https://github.com/haotian-liu/LLaVA) and
```bash
# for llava 1.5
# git clone https://github.com/haotian-liu/LLaVA
Expand All @@ -69,7 +69,7 @@ You can check the [environment install script](miscs/repr_scripts.sh) and [torch

</details>

If you want to test on caption dataset such as `coco`, `refcoco`, and `nocaps`, you will need to have `java==1.8.0 ` to let pycocoeval api to work. If you don't have it, you can install by using conda
If you want to test on caption dataset such as `coco`, `refcoco`, and `nocaps`, you will need to have `java==1.8.0` to let pycocoeval api to work. If you don't have it, you can install by using conda
```
conda install openjdk=8
```
Expand All @@ -93,6 +93,11 @@ We also provide the raw data exported from Weights & Biases for the detailed res
</details>
<br>

If you want to test [VILA](https://github.com/NVlabs/VILA), you should install the following dependencies:

```bash
pip install s2wrapper@git+https://github.com/bfshi/scaling_on_scales
```

Our Development will be continuing on the main branch, and we encourage you to give us feedback on what features are desired and how to improve the library further, or ask questions, either in issues or PRs on GitHub.

Expand Down
4 changes: 1 addition & 3 deletions lmms_eval/api/samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ def get_context(self, doc, num_fewshot):
+ (
str(self.doc_to_target(doc)[0])
if type(self.doc_to_target(doc)) is list
else self.doc_to_target(doc)
if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str)
else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
else self.doc_to_target(doc) if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str) else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
)
for doc in selected_docs
]
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"video_llava": "VideoLLaVA",
"vila": "VILA",
"xcomposer2_4KHD": "XComposer2_4KHD",
"xcomposer2d5": "XComposer2D5"
"xcomposer2d5": "XComposer2D5",
}

for model_name, model_class in AVAILABLE_MODELS.items():
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/models/llava_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def _collate(x):
pad_token_id=self.tokenizer.eos_token_id,
eos_token_id=self.specified_eot_token_id,
)
cont = cont[:, inputs["input_ids"].shape[-1]:]
cont = cont[:, inputs["input_ids"].shape[-1] :]
except Exception as e:
eval_logger.error(f"Error {e} in generating")
cont = ""
Expand Down
2 changes: 2 additions & 0 deletions lmms_eval/models/llava_vid.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@

try:
from llavavid.model.language_model.llava_qwen import LlavaQwenConfig

AutoConfig.register("llava_qwen", LlavaQwenConfig)
except ImportError:
eval_logger.debug("No Qwen for llava vid")

from llavavid.model.language_model.llava_llama import LlavaConfig

AutoConfig.register("llava_llama", LlavaConfig)


Expand Down
46 changes: 22 additions & 24 deletions lmms_eval/models/mantis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
from mantis.models.mllava import LlavaForConditionalGeneration, MLlavaProcessor
from mantis.models.mfuyu import MFuyuForCausalLM, MFuyuProcessor
from mantis.models.conversation import conv_mllava_v1 as default_conv, conv_templates

except Exception as e:
eval_logger.debug("Mantis is not installed. Please install Mantis to use this model.\nError: %s" % e)

try:
from transformers import AutoModelForVision2Seq, AutoProcessor
except Exception as e:
Expand All @@ -42,13 +42,14 @@

try:
import flash_attn

best_fit_attn_implementation = "flash_attention_2"
except ImportError:
best_fit_attn_implementation = "eager"

DEFAULT_IMAGE_TOKEN = "<image>"


@register_model("mantis")
class Mantis(lmms):
"""
Expand Down Expand Up @@ -84,35 +85,35 @@ def __init__(
else:
self._device = torch.device(f"cuda:{accelerator.local_process_index}")
self.device_map = f"cuda:{accelerator.local_process_index}"

self._is_idefics = "idefics" in pretrained.lower()
if isinstance(dtype, str) and dtype != "auto":
dtype = getattr(torch, dtype)

# Here we load the "non-idefics" Mantis model.
if not self._is_idefics:
if 'fuyu' in pretrained.lower():
if "fuyu" in pretrained.lower():
self._processor = MFuyuProcessor.from_pretrained(pretrained)
self._model = MFuyuForCausalLM.from_pretrained(pretrained, device_map=self.device_map, attn_implementation=attn_implementation, torch_dtype=dtype)
else:
self._processor = MLlavaProcessor.from_pretrained(pretrained)
self._model = LlavaForConditionalGeneration.from_pretrained(pretrained, device_map=self.device_map, attn_implementation=attn_implementation, torch_dtype=dtype)

else:
self._processor = AutoProcessor.from_pretrained(pretrained)
self._model = AutoModelForVision2Seq.from_pretrained(pretrained, device_map=self.device_map, torch_dtype=dtype)
eval_logger.info(f"Using {type(self._model)} to instantiate the Mantis model.")

self._tokenizer = self._processor.tokenizer

self._config = self._model.config
self.model.eval()
self.model.tie_weights()
self.truncation = truncation
self.batch_size_per_gpu = int(batch_size)
self.use_cache = use_cache
self.truncate_context = truncate_context

if accelerator.num_processes > 1:
assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."
# If you want to use DistributedType.DEEPSPEED, you have to run accelerate config before using the model
Expand Down Expand Up @@ -222,7 +223,7 @@ def flatten(self, input):

def generate_until(self, requests: List[Instance]) -> List[str]:
res = []

def _collate(x):
# the negative sign on len(toks) sorts descending - this has a few advantages:
# - time estimates will always be over not underestimates, which is more useful for planning
Expand All @@ -243,11 +244,11 @@ def _collate(x):
for chunk in chunks:
contexts, all_gen_kwargs, doc_to_visuals, doc_id, tasks, splits = zip(*chunk)
visuals = [doc_to_visual(self.task_dict[task][split][ids]) for ids, task, split, doc_to_visual in zip(doc_id, tasks, splits, doc_to_visuals)]

# we assume all gen kwargs in the batch are the same
# this is safe to assume because the `grouper` object ensures it.
gen_kwargs = all_gen_kwargs[0]

until = gen_kwargs.pop("until", None)
image_aspect_ratio = gen_kwargs.pop("image_aspect_ratio", None)

Expand All @@ -261,7 +262,7 @@ def _collate(x):
prompts = []
for visual, context in zip(visuals, contexts):
if self._is_idefics:
# Follow the idefics implementation:
# Follow the idefics implementation:
content = []
if DEFAULT_IMAGE_TOKEN not in context:
for _ in visual:
Expand All @@ -274,27 +275,24 @@ def _collate(x):
# We follow the Mantis code base: https://github.com/TIGER-AI-Lab/Mantis/blob/main/mantis/models/mllava/utils.py#L33 to make sure they are consistent
# Users don't need to define chat template as it is done here
if "llama-3" in self._model.language_model.name_or_path.lower():
conv = conv_templates['llama_3']
terminators = [
self._processor.tokenizer.eos_token_id,
self._processor.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
conv = conv_templates["llama_3"]
terminators = [self._processor.tokenizer.eos_token_id, self._processor.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
else:
conv = default_conv
terminators = None

gen_kwargs["eos_token_id"] = terminators

conv = conv.copy()
conv.append_message(conv.roles[0], context)
conv.append_message(conv.roles[1], "")
prompt = conv.get_prompt()
prompts.append(prompt)
inputs = self._processor(images=visuals, text=prompts, return_tensors="pt", truncation=True)
if "image_patches" in inputs.keys():
inputs["image_patches"] = inputs["image_patches"][0] # FIXME: Fuyu model would return a list instead of a pytorch tensor. This weird behavior needs fixing.
inputs = {k: v.to(self.device) for k, v in inputs.items()}
inputs["image_patches"] = inputs["image_patches"][0] # FIXME: Fuyu model would return a list instead of a pytorch tensor. This weird behavior needs fixing.
inputs = {k: v.to(self.device) for k, v in inputs.items()}

output_ids = self.model.generate(**inputs, **gen_kwargs)
for output_id, input_id in zip(output_ids, inputs["input_ids"]):
generated_id = output_id[len(input_id) :]
Expand Down
6 changes: 2 additions & 4 deletions lmms_eval/models/xcomposer2d5.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def __init__(
if not os.path.exists(self.tmp_folder):
os.makedirs(self.tmp_folder)
eval_logger.info(f"Using temporary folder: {self.tmp_folder}")


batch_size = int(batch_size)
assert batch_size == 1, f"Batch size should be 1 for InternVL2, but got {batch_size}."
Expand All @@ -57,7 +56,7 @@ def __init__(
else:
self._device = torch.device(f"cuda:{accelerator.local_process_index}")
self.device_map = f"cuda:{accelerator.local_process_index}"

self.path = pretrained
self._model = AutoModel.from_pretrained(self.path, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map=self.device_map).half().eval()
self._tokenizer = AutoTokenizer.from_pretrained(self.path, trust_remote_code=True)
Expand Down Expand Up @@ -130,7 +129,6 @@ def rank(self):
def world_size(self):
return self._world_size


def flatten(self, input):
new_list = []
for i in input:
Expand Down Expand Up @@ -168,7 +166,7 @@ def generate_until(self, requests) -> List[str]:
gen_kwargs["num_beams"] = 1

try:
with torch.autocast(device_type='cuda', dtype=torch.float16):
with torch.autocast(device_type="cuda", dtype=torch.float16):
response, his = self.model.chat(self.tokenizer, contexts, image, do_sample=False, num_beams=1, use_meta=True, max_new_tokens=gen_kwargs["max_new_tokens"])
except Exception as e:
eval_logger.error(f"Error : {e}")
Expand Down
Loading

0 comments on commit 3f89773

Please sign in to comment.