Skip to content

Commit

Permalink
final
Browse files Browse the repository at this point in the history
  • Loading branch information
lllljx0316 committed Jan 8, 2025
0 parents commit 17c1826
Show file tree
Hide file tree
Showing 585 changed files with 173,093 additions and 0 deletions.
177 changes: 177 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

dataset/*
!dataset/.keep

segres/*
result/*
runs/*
ImageBind/.checkpoints/*
segment-anything-2/checkpoints/*
turtle_tasks/*
*.pth
modelApi/trained_index_20.faiss
modelApi/info_dict_new_datasets.pickle
*.faiss
*.pickle
Binary file added .test.log.swp
Binary file not shown.
34 changes: 34 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Module",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": [
"FaissApi:app",
"--reload",
"--host",
"0.0.0.0",
"--port",
"8001",
],
"cwd": "${workspaceFolder}/modelApi/"
},
{
"name": "Python Debugger: Django",
"type": "debugpy",
"request": "launch",
"args": [
"runserver"
],
"django": true,
"autoStartBrowser": false,
"program": "${workspaceFolder}/backend/manage.py"
}
]
}
81 changes: 81 additions & 0 deletions BLIP.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import requests
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration, BlipProcessor, BlipForConditionalGeneration
from utils import iterative_all_files
import torch
from functools import partial
import pickle
import os
from pathlib import Path


processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
device = "cuda:1" if torch.cuda.is_available() else "cpu"
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
text_ls = []
text_dict = {}
# prompt= '''Describe a visual style for an image that captures [main subject, e.g., "a serene forest scene"] with a focus on [color palette, e.g., "cool, muted colors like soft blues and greens"]. This style should evoke [emotion or atmosphere, e.g., "tranquility and mystery"], using [lighting, e.g., "soft, diffused lighting that creates gentle shadows"]. Incorporate characteristics of [specific art style or era, if applicable, e.g., "Impressionist paintings, focusing on texture and light play"] for added visual interest. The style should also include any additional elements, e.g., "minimal details in the background to maintain focus on the main subject". The generated style is'''
# prompt = "an element in historical map, which is"
prompt = ""
# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
# device = "cuda:1" if torch.cuda.is_available() else "cpu"
# model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(device)

# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'


def blip_model(image_path, text_dict, text=""):
raw_image = Image.open(image_path).convert('RGB')
# conditional image captioning
if text == "":
inputs = processor(raw_image, return_tensors="pt").to(device)
else:
inputs = processor(raw_image, text, return_tensors="pt").to(device)

out = model.generate(**inputs, max_length=265)
# print(processor.decode(out[0][155:], skip_special_tokens=True))
# text_ls.append(processor.decode(out[0]).replace(prompt, "").strip())
text_dict[image_path.stem] = processor.decode(out[0]).replace(prompt, "").strip()

def cluster_sentences(text_ls):
pass

# def semantic_filter()

if __name__ == '__main__':
#generate sentence in a list
# blip_model("result/segres/Bodleian Library/0ac39b91-cd26-4d05-a47c-5439aef2747d/11.png", "The pattern is")
print(os.getcwd())



sub_dataset_dir = Path('result/segres_BLIP_witout_prompt/Ryhiner-Sammlung')

# 列出第一层的内容
first_level_dirs = [p.name for p in sub_dataset_dir.iterdir() if p.is_dir()]

for dir in first_level_dirs:
relative_dir = sub_dataset_dir/dir
print(relative_dir)
text_dict = {}
process_image = partial(blip_model, text=prompt, text_dict = text_dict)
iterative_all_files(relative_dir, process_image, suffix_filter=[".png"])
#delete the last [SEP]
text_dict.pop('final')
for key, value in text_dict.items():
text_dict[key] = value[:-5]
print(text_dict)
with open(relative_dir/'text_res.pickle', 'wb') as f:
pickle.dump(text_dict, f)


# with open('./result/textres/text_embedding_style.pickle', 'wb') as pickle_file:
# pickle.dump(text_ls, pickle_file)
#generate embedding
# text_ls = [s[165:] for s in text_ls]
# print(text_ls)


#visualization


103 changes: 103 additions & 0 deletions BLIP2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@

import requests
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration, BlipProcessor, BlipForConditionalGeneration, AutoProcessor
from utils import iterative_all_files
import torch
from functools import partial
import pickle
import os
from pathlib import Path
from tqdm import tqdm
import numpy as np


processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
device = "cuda:1" if torch.cuda.is_available() else "cpu"
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16).to(device)
text_ls = []
text_dict = {}
# prompt= '''Describe a visual style for an image that captures [main subject, e.g., "a serene forest scene"] with a focus on [color palette, e.g., "cool, muted colors like soft blues and greens"]. This style should evoke [emotion or atmosphere, e.g., "tranquility and mystery"], using [lighting, e.g., "soft, diffused lighting that creates gentle shadows"]. Incorporate characteristics of [specific art style or era, if applicable, e.g., "Impressionist paintings, focusing on texture and light play"] for added visual interest. The style should also include any additional elements, e.g., "minimal details in the background to maintain focus on the main subject". The generated style is'''
prompt = "Question: Analyze the image and provide a concise, domain-specific description using terminology from the historical map domain. Answer one word. Answer:"
prompt2 = "Question: Analyze the image and provide a concise, domain-specific description using terminology from the historical map domain. Answer one sentence. Answer:"

# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
# device = "cuda:1" if torch.cuda.is_available() else "cpu"
# model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(device)

# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'


def blip_model(image_path, text_dict):
raw_image = Image.open(image_path).convert('RGB')
# conditional image captioning
if prompt == "":
inputs = processor(raw_image, return_tensors="pt").to(device)
else:
inputs = processor(raw_image, prompt, return_tensors="pt").to(device)

generated_ids = model.generate(**inputs, max_new_tokens=100)
# text_dict[image_path.stem] = processor.decode(out[0]).replace(prompt, "").strip()
now_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
if now_text == '':
inputs = processor(raw_image, prompt2, return_tensors="pt").to(device)
generated_ids = model.generate(**inputs, max_new_tokens=100)
now_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

text_dict[image_path.stem] = now_text

def cluster_sentences(text_ls):
pass

# def semantic_filter()
def sub_dataset_process(sub_dataset_dir:Path):
if sub_dataset_dir.name in ["Bibliotheque Nationale de France", "Ryhiner-Sammlung"]:
return
# 列出第一层的内容
first_level_dirs = [p.name for p in sub_dataset_dir.iterdir() if p.is_dir()]
# random select 3%
total_elements = len(first_level_dirs)
# mask_ls =np.zeros(total_elements, dtype=int)
first_level_dirs = np.array(first_level_dirs)[np.random.choice(total_elements, int(total_elements*0.05)+1)]



for dir in tqdm(first_level_dirs):
relative_dir = sub_dataset_dir/dir
print(relative_dir)
if (relative_dir/'text_res.pickle').exists():
continue
text_dict = {}

process_image = partial(blip_model, text_dict = text_dict)
iterative_all_files(relative_dir, process_image, suffix_filter=[".png"])
#delete the last [SEP]

text_dict.pop('final', None) #
for key, value in text_dict.items():
text_dict[key] = value
print(text_dict)
with open(relative_dir/'text_res.pickle', 'wb') as f:
pickle.dump(text_dict, f)


if __name__ == '__main__':
#generate sentence in a list
# blip_model("result/segres/Bodleian Library/0ac39b91-cd26-4d05-a47c-5439aef2747d/11.png", "The pattern is")
print(os.getcwd())

dataset_dir = Path('result/segres/')
subfolders = [folder for folder in dataset_dir.iterdir() if folder.is_dir()]
for subfolder in subfolders:
sub_dataset_process(subfolder)

# with open('./result/textres/text_embedding_style.pickle', 'wb') as pickle_file:
# pickle.dump(text_ls, pickle_file)
#generate embedding
# text_ls = [s[165:] for s in text_ls]
# print(text_ls)


#visualization


Binary file added ImageBind/.assets/bird_audio.wav
Binary file not shown.
Binary file added ImageBind/.assets/bird_image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ImageBind/.assets/car_audio.wav
Binary file not shown.
Binary file added ImageBind/.assets/car_image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ImageBind/.assets/dog_audio.wav
Binary file not shown.
Binary file added ImageBind/.assets/dog_image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 17c1826

Please sign in to comment.