final

llljx316 · Jan 8, 2025 · 17c1826 · 17c1826
commit 17c1826
Show file tree

Hide file tree

Showing 585 changed files with 173,093 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,177 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+dataset/*
+!dataset/.keep
+
+segres/*
+result/*
+runs/*
+ImageBind/.checkpoints/*
+segment-anything-2/checkpoints/*
+turtle_tasks/*
+*.pth
+modelApi/trained_index_20.faiss
+modelApi/info_dict_new_datasets.pickle
+*.faiss
+*.pickle
diff --git a/.test.log.swp b/.test.log.swp
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,34 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Module",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "uvicorn",
+            "args": [
+                "FaissApi:app",
+                "--reload", 
+                "--host",
+                "0.0.0.0",
+                "--port",
+                "8001",
+            ],
+            "cwd": "${workspaceFolder}/modelApi/"
+        },
+        {
+            "name": "Python Debugger: Django",
+            "type": "debugpy",
+            "request": "launch",
+            "args": [
+                "runserver"
+            ],
+            "django": true,
+            "autoStartBrowser": false,
+            "program": "${workspaceFolder}/backend/manage.py"
+        }
+    ]
+}
diff --git a/BLIP.py b/BLIP.py
@@ -0,0 +1,81 @@
+import requests
+from PIL import Image
+from transformers import Blip2Processor, Blip2ForConditionalGeneration, BlipProcessor, BlipForConditionalGeneration
+from utils import iterative_all_files
+import torch
+from functools import partial
+import pickle
+import os
+from pathlib import Path
+
+
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+device = "cuda:1" if torch.cuda.is_available() else "cpu"
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
+text_ls = []
+text_dict = {}
+# prompt= '''Describe a visual style for an image that captures [main subject, e.g., "a serene forest scene"] with a focus on [color palette, e.g., "cool, muted colors like soft blues and greens"]. This style should evoke [emotion or atmosphere, e.g., "tranquility and mystery"], using [lighting, e.g., "soft, diffused lighting that creates gentle shadows"]. Incorporate characteristics of [specific art style or era, if applicable, e.g., "Impressionist paintings, focusing on texture and light play"] for added visual interest. The style should also include any additional elements, e.g., "minimal details in the background to maintain focus on the main subject". The generated style is'''
+# prompt = "an element in historical map, which is"
+prompt = ""
+# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+# device = "cuda:1" if torch.cuda.is_available() else "cpu"
+# model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(device)
+
+# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' 
+
+
+def blip_model(image_path, text_dict, text=""):
+    raw_image = Image.open(image_path).convert('RGB')
+    # conditional image captioning
+    if text == "":
+        inputs = processor(raw_image, return_tensors="pt").to(device)
+    else:
+        inputs = processor(raw_image, text, return_tensors="pt").to(device)
+
+    out = model.generate(**inputs, max_length=265)
+    # print(processor.decode(out[0][155:], skip_special_tokens=True))
+    # text_ls.append(processor.decode(out[0]).replace(prompt, "").strip())
+    text_dict[image_path.stem] = processor.decode(out[0]).replace(prompt, "").strip()
+
+def cluster_sentences(text_ls):
+    pass
+
+# def semantic_filter()
+
+if __name__ == '__main__':
+    #generate sentence in a list
+    # blip_model("result/segres/Bodleian Library/0ac39b91-cd26-4d05-a47c-5439aef2747d/11.png", "The pattern is")
+    print(os.getcwd())
+
+
+
+    sub_dataset_dir = Path('result/segres_BLIP_witout_prompt/Ryhiner-Sammlung')
+
+    # 列出第一层的内容
+    first_level_dirs = [p.name for p in sub_dataset_dir.iterdir() if p.is_dir()]
+
+    for dir in first_level_dirs:
+        relative_dir = sub_dataset_dir/dir
+        print(relative_dir)
+        text_dict = {}
+        process_image = partial(blip_model, text=prompt, text_dict = text_dict)
+        iterative_all_files(relative_dir, process_image, suffix_filter=[".png"])
+        #delete the last [SEP]
+        text_dict.pop('final')
+        for key, value in text_dict.items():
+            text_dict[key] = value[:-5]
+        print(text_dict)
+        with open(relative_dir/'text_res.pickle', 'wb') as f:
+            pickle.dump(text_dict, f)
+
+
+    # with open('./result/textres/text_embedding_style.pickle', 'wb') as pickle_file:
+    #     pickle.dump(text_ls, pickle_file)
+    #generate embedding
+    # text_ls = [s[165:] for s in text_ls]
+    # print(text_ls)
+
+
+    #visualization
+
+
diff --git a/BLIP2.py b/BLIP2.py
@@ -0,0 +1,103 @@
+
+import requests
+from PIL import Image
+from transformers import Blip2Processor, Blip2ForConditionalGeneration, BlipProcessor, BlipForConditionalGeneration, AutoProcessor
+from utils import iterative_all_files
+import torch
+from functools import partial
+import pickle
+import os
+from pathlib import Path
+from tqdm import tqdm 
+import numpy as np
+
+
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") 
+device = "cuda:1" if torch.cuda.is_available() else "cpu"
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16).to(device)
+text_ls = []
+text_dict = {}
+# prompt= '''Describe a visual style for an image that captures [main subject, e.g., "a serene forest scene"] with a focus on [color palette, e.g., "cool, muted colors like soft blues and greens"]. This style should evoke [emotion or atmosphere, e.g., "tranquility and mystery"], using [lighting, e.g., "soft, diffused lighting that creates gentle shadows"]. Incorporate characteristics of [specific art style or era, if applicable, e.g., "Impressionist paintings, focusing on texture and light play"] for added visual interest. The style should also include any additional elements, e.g., "minimal details in the background to maintain focus on the main subject". The generated style is'''
+prompt = "Question: Analyze the image and provide a concise, domain-specific description using terminology from the historical map domain. Answer one word. Answer:"
+prompt2 = "Question: Analyze the image and provide a concise, domain-specific description using terminology from the historical map domain. Answer one sentence. Answer:"
+
+# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+# device = "cuda:1" if torch.cuda.is_available() else "cpu"
+# model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(device)
+
+# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' 
+
+
+def blip_model(image_path, text_dict):
+    raw_image = Image.open(image_path).convert('RGB')
+    # conditional image captioning
+    if prompt == "":
+        inputs = processor(raw_image, return_tensors="pt").to(device)
+    else:
+        inputs = processor(raw_image, prompt, return_tensors="pt").to(device)
+
+    generated_ids = model.generate(**inputs, max_new_tokens=100)
+    # text_dict[image_path.stem] = processor.decode(out[0]).replace(prompt, "").strip()
+    now_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    if now_text == '':
+        inputs = processor(raw_image, prompt2, return_tensors="pt").to(device)
+        generated_ids = model.generate(**inputs, max_new_tokens=100)
+        now_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+
+    text_dict[image_path.stem] = now_text
+
+def cluster_sentences(text_ls):
+    pass
+
+# def semantic_filter()
+def sub_dataset_process(sub_dataset_dir:Path):
+    if sub_dataset_dir.name in ["Bibliotheque Nationale de France", "Ryhiner-Sammlung"]:
+        return
+    # 列出第一层的内容
+    first_level_dirs = [p.name for p in sub_dataset_dir.iterdir() if p.is_dir()]
+    # random select 3%
+    total_elements = len(first_level_dirs)
+    # mask_ls =np.zeros(total_elements, dtype=int) 
+    first_level_dirs = np.array(first_level_dirs)[np.random.choice(total_elements, int(total_elements*0.05)+1)]
+
+
+
+    for dir in tqdm(first_level_dirs):
+        relative_dir = sub_dataset_dir/dir
+        print(relative_dir)
+        if (relative_dir/'text_res.pickle').exists():
+            continue
+        text_dict = {}
+
+        process_image = partial(blip_model, text_dict = text_dict)
+        iterative_all_files(relative_dir, process_image, suffix_filter=[".png"])
+        #delete the last [SEP]
+
+        text_dict.pop('final', None) #
+        for key, value in text_dict.items():
+            text_dict[key] = value
+        print(text_dict)
+        with open(relative_dir/'text_res.pickle', 'wb') as f:
+            pickle.dump(text_dict, f)
+
+
+if __name__ == '__main__':
+    #generate sentence in a list
+    # blip_model("result/segres/Bodleian Library/0ac39b91-cd26-4d05-a47c-5439aef2747d/11.png", "The pattern is")
+    print(os.getcwd())
+
+    dataset_dir = Path('result/segres/')
+    subfolders = [folder for folder in dataset_dir.iterdir() if folder.is_dir()]
+    for subfolder in subfolders:
+        sub_dataset_process(subfolder)
+
+    # with open('./result/textres/text_embedding_style.pickle', 'wb') as pickle_file:
+    #     pickle.dump(text_ls, pickle_file)
+    #generate embedding
+    # text_ls = [s[165:] for s in text_ls]
+    # print(text_ls)
+
+
+    #visualization
+
+
diff --git a/ImageBind/.assets/bird_audio.wav b/ImageBind/.assets/bird_audio.wav
diff --git a/ImageBind/.assets/bird_image.jpg b/ImageBind/.assets/bird_image.jpg
diff --git a/ImageBind/.assets/car_audio.wav b/ImageBind/.assets/car_audio.wav
diff --git a/ImageBind/.assets/car_image.jpg b/ImageBind/.assets/car_image.jpg
diff --git a/ImageBind/.assets/dog_audio.wav b/ImageBind/.assets/dog_audio.wav
diff --git a/ImageBind/.assets/dog_image.jpg b/ImageBind/.assets/dog_image.jpg