Skip to content

Commit

Permalink
rename path for ChatRex
Browse files Browse the repository at this point in the history
  • Loading branch information
Mountchicken committed Nov 26, 2024
1 parent 40e6401 commit 0d29204
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 41 deletions.
23 changes: 11 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
----

# 1. Introduction 📚
**TL;DR: ChatRex is a MLLM skilled in perception that can respond to questions while simultaneously grounding its answers to the referenced objects.**
**TL;DR: ChatRex is an MLLM skilled in perception that can respond to questions while simultaneously grounding its answers to the referenced objects.**

ChatRex is a Multimodal Large Language Model (MLLM) designed to seamlessly integrate fine-grained object perception and robust language understanding. By adopting a decoupled architecture with a retrieval-based approach for object detection and leveraging high-resolution visual inputs, ChatRex addresses key challenges in perception tasks. It is powered by the Rexverse-2M dataset with diverse image-region-text annotations. ChatRex can be applied to various scenarios requiring fine-grained perception, such as object detection, grounded conversation, grounded image captioning and region
understanding.
Expand All @@ -45,16 +45,15 @@ pip install -v -e .

## 2.1 Download Pre-trained Models
We provide model checkpoints for both the ***Universal Proposal Network (UPN)*** and the ***ChatRex model***. You can download the pre-trained models from the following links:
- [UPN Checkpoint](https://drive.google)
- [UPN Checkpoint](https://github.com/IDEA-Research/ChatRex/releases/download/upn-large/upn_large.pth)
- [ChatRex-7B Checkpoint](https://huggingface.co/IDEA-Research/ChatRex-7B)

Or you can also using the following command to download the pre-trained models:
```bash
mkdir checkpoints
mkdir checkpoints/upn
# download UPN checkpoint
wget -O checkpoints/upn/upn_large.pth https://drive.google.com/file/d/
# download ChatRex checkpoint from huggingface IDEA-Research/ChatRex-7B
wget -O checkpoints/upn/upn_large.pth https://github.com/IDEA-Research/ChatRex/releases/download/upn-large/upn_large.pth
# Download ChatRex checkpoint from Hugging Face
git lfs install
git clone https://huggingface.co/IDEA-Research/ChatRex-7B checkpoints/chatrex
Expand Down Expand Up @@ -190,15 +189,15 @@ from chatrex.upn import UPNWrapper
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down Expand Up @@ -308,15 +307,15 @@ from chatrex.upn import UPNWrapper
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down Expand Up @@ -402,15 +401,15 @@ from chatrex.upn import UPNWrapper
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down Expand Up @@ -506,15 +505,15 @@ from chatrex.upn import UPNWrapper
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down
4 changes: 2 additions & 2 deletions gradio_demos/chatrex_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
from chatrex.upn import UPNWrapper

processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_chatrex_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_chatrex_grounded_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_chatrex_grounded_image_caption.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down
37 changes: 18 additions & 19 deletions tests/test_chatrex_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,38 @@
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

# load the model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to('cuda')
).to("cuda")

inputs = processor.process(
image=Image.open(
'tests/images/test_chatrex_install.jpg'
),
image=Image.open("tests/images/test_chatrex_install.jpg"),
question="Can you provide me with a brief description of <obj0>?",
bbox=[[73.88417,56.62228,227.69223,216.34338]] # box in xyxy format
bbox=[[73.88417, 56.62228, 227.69223, 216.34338]], # box in xyxy format
)


inputs = {k: v.to("cuda") for k, v in inputs.items()}

# perform inference
gen_config = GenerationConfig(
max_new_tokens=512,
do_sample=False,
eos_token_id=processor.tokenizer.eos_token_id,
pad_token_id=(
processor.tokenizer.pad_token_id
if processor.tokenizer.pad_token_id is not None
else processor.tokenizer.eos_token_id
))
prediction = model.generate(inputs, gen_config=gen_config, tokenizer=processor.tokenizer)
print(f'prediction:', prediction)

max_new_tokens=512,
do_sample=False,
eos_token_id=processor.tokenizer.eos_token_id,
pad_token_id=(
processor.tokenizer.pad_token_id
if processor.tokenizer.pad_token_id is not None
else processor.tokenizer.eos_token_id
),
)
prediction = model.generate(
inputs, gen_config=gen_config, tokenizer=processor.tokenizer
)
print(f"prediction:", prediction)
4 changes: 2 additions & 2 deletions tests/test_chatrex_region_caption.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
if __name__ == "__main__":
# load the processor
processor = AutoProcessor.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
device_map="cuda",
)

print(f"loading chatrex model...")
# load chatrex model
model = AutoModelForCausalLM.from_pretrained(
"checkpoints/chatrex7b",
"IDEA-Research/ChatRex-7B",
trust_remote_code=True,
use_safetensors=True,
).to("cuda")
Expand Down

0 comments on commit 0d29204

Please sign in to comment.