Skip to content

Commit

Permalink
Update H2OVL
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Jan 30, 2025
1 parent a47f92a commit ddf55c2
Show file tree
Hide file tree
Showing 7 changed files with 490 additions and 386 deletions.
2 changes: 1 addition & 1 deletion docs/source/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ See [this page](#generative-models) for more information on how to use generativ
* `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc.
*
* ✅︎
*
* ✅︎
- * `Idefics3ForConditionalGeneration`
* Idefics3
* T + I
Expand Down
14 changes: 7 additions & 7 deletions tests/models/decoder_only/vision_language/test_h2ovl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from transformers import AutoConfig

# Import the functions to test
from vllm.model_executor.models.h2ovl import (calculate_targets,
from vllm.model_executor.models.h2ovl import (calculate_h2ovl_targets,
image_to_pixel_values_wrapper)
from vllm.multimodal.image import rescale_image_size

Expand All @@ -27,16 +27,16 @@ def run_preprocessing_test(
max_dynamic_patch = config.max_dynamic_patch

width, height = image.size
use_MSAC = config.use_msac
use_msac = config.use_msac

# Create the mapper function with the provided configuration
mapper = image_to_pixel_values_wrapper(config, max_dynamic_patch, use_MSAC)
mapper = image_to_pixel_values_wrapper(config, max_dynamic_patch, use_msac)
pixel_values = mapper(image)

# Calculate the expected number of blocks
if use_MSAC:
if use_msac:
# First pass
blocks1, _, _, aspect_ratio = calculate_targets(
blocks1, _, _, aspect_ratio = calculate_h2ovl_targets(
width,
height,
config.min_dynamic_patch,
Expand All @@ -47,7 +47,7 @@ def run_preprocessing_test(
)

# Second pass
blocks2, _, _, _ = calculate_targets(
blocks2, _, _, _ = calculate_h2ovl_targets(
width,
height,
config.min_dynamic_patch,
Expand All @@ -68,7 +68,7 @@ def run_preprocessing_test(
expected_blocks = total_blocks

else:
blocks, _, _, _ = calculate_targets(
blocks, _, _, _ = calculate_h2ovl_targets(
width,
height,
config.min_dynamic_patch,
Expand Down
32 changes: 19 additions & 13 deletions tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def __init__(self, hf_runner: HfRunner):
trust_remote_code=True)
self.vision_config = self.config.vision_config
self.use_thumbnail = self.config.use_thumbnail
self.use_msac = self.config.use_msac
self.min_num = self.config.min_dynamic_patch
self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size
Expand All @@ -347,18 +348,19 @@ def __call__(self, text: str, images: Union[Image, List[Image]],
**kwargs):
# yapf: disable
from vllm.model_executor.models.h2ovl import (
IMG_CONTEXT, IMG_END, IMG_START, image_to_pixel_values)
IMG_CONTEXT, IMG_END, IMG_START, image_to_pixel_values_h2ovl)

# yapf: enable
images = [images] if isinstance(images, Image) else images
pixel_values = [
image_to_pixel_values(image,
self.image_size,
self.min_num,
self.max_num,
self.use_thumbnail,
use_MSAC=self.config.use_msac).to(
self.dtype) for image in images
image_to_pixel_values_h2ovl(
image,
input_size=self.image_size,
min_num=self.min_num,
max_num=self.max_num,
use_thumbnail=self.use_thumbnail,
use_msac=self.use_msac,
).to(self.dtype) for image in images
]
num_patches_list = [
pixel_value.shape[0] for pixel_value in pixel_values
Expand Down Expand Up @@ -406,13 +408,17 @@ def __init__(self, hf_runner: HfRunner):
def __call__(self, text: str, images: Union[Image, List[Image]],
**kwargs):
from vllm.model_executor.models.internvl import (
IMG_CONTEXT, IMG_END, IMG_START, image_to_pixel_values)
IMG_CONTEXT, IMG_END, IMG_START,
image_to_pixel_values_internvl)
images = [images] if isinstance(images, Image) else images
pixel_values = [
image_to_pixel_values(image, self.image_size, self.min_num,
self.max_num,
self.use_thumbnail).to(self.dtype)
for image in images
image_to_pixel_values_internvl(
image,
input_size=self.image_size,
min_num=self.min_num,
max_num=self.max_num,
use_thumbnail=self.use_thumbnail,
).to(self.dtype) for image in images
]
num_patches_list = [
pixel_value.shape[0] for pixel_value in pixel_values
Expand Down
1 change: 1 addition & 0 deletions tests/models/multimodal/processing/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def _test_processing_correctness(
"facebook/chameleon-7b",
"deepseek-ai/deepseek-vl2-tiny",
"adept/fuyu-8b",
"h2oai/h2ovl-mississippi-800m",
"OpenGVLab/InternVL2-1B",
"llava-hf/llava-1.5-7b-hf",
"llava-hf/llava-v1.6-mistral-7b-hf",
Expand Down
Loading

0 comments on commit ddf55c2

Please sign in to comment.