diff --git a/cli/README.md b/cli/README.md
deleted file mode 100644
index c049f0d..0000000
--- a/cli/README.md
+++ /dev/null
@@ -1,199 +0,0 @@
-# Face Dataset
-## Sample Label of 3rd Tools
-
-Facepp
-
-```yaml
-{
- "request_id": "1699959145,1bb349e4-c83f-4a39-ad4e-8dcdc0cd34fb",
- "time_used": 0,
- "faces": [
- {
- "face_token": "974b332135a90dd90ac8c685fa983a69",
- "face_rectangle": {
- "top": 158,
- "left": 66,
- "width": 191,
- "height": 191
- },
- "attributes": {
- "gender": {
- "value": "Female"
- },
- "age": {
- "value": 3
- },
- "headpose": {
- "pitch_angle": 6.6730194,
- "roll_angle": -2.4714718,
- "yaw_angle": 3.7413023
- },
- "eyestatus": {
- "left_eye_status": {
- "no_glass_eye_open": 99.357,
- "no_glass_eye_close": 0,
- "normal_glass_eye_open": 0.635,
- "normal_glass_eye_close": 0.001,
- "dark_glasses": 0,
- "occlusion": 0.007
- },
- "right_eye_status": {
- "no_glass_eye_open": 99.763,
- "no_glass_eye_close": 0.011,
- "normal_glass_eye_open": 0.054,
- "normal_glass_eye_close": 0.005,
- "dark_glasses": 0.01,
- "occlusion": 0.158
- }
- },
- "emotion": {
- "anger": 22.269,
- "disgust": 1.227,
- "fear": 0.41,
- "happiness": 9.247,
- "neutral": 12.255,
- "sadness": 54.181,
- "surprise": 0.41
- },
- "facequality": {
- "value": 90.754,
- "threshold": 70.1
- },
- "ethnicity": {
- "value": ""
- },
- "beauty": {
- "male_score": 52.808,
- "female_score": 47.62
- },
- "mouthstatus": {
- "surgical_mask_or_respirator": 0,
- "other_occlusion": 0.004,
- "close": 0.245,
- "open": 99.751
- },
- "glass": {
- "value": "None"
- }
- }
- }
- ],
- "image_id": "zGJ7eeUqKzu0NJBxIodt7w==",
- "face_num": 1
-}
-```
-
-
-
-
-Baidu
-
-```yaml
-{
- "face_list": [
- {
- "face_token": "2df98fc10747d685e4dd4794f91a9c26",
- "location": {
- "left": 18.03,
- "top": 41.28,
- "width": 88,
- "height": 74,
- "rotation": -2
- },
- "face_probability": 1,
- "angle": {
- "yaw": 5.83,
- "pitch": 2.12,
- "roll": -3.13
- },
- "age": 33,
- "expression": {
- "type": "none",
- "probability": 1
- },
- "face_shape": {
- "type": "square",
- "probability": 0.73
- },
- "gender": {
- "type": "male",
- "probability": 0.99
- },
- "glasses": {
- "type": "none",
- "probability": 1
- },
- "landmark150": {},
- "quality": {
- "occlusion": {
- "left_eye": 0,
- "right_eye": 0,
- "nose": 0,
- "mouth": 0,
- "left_cheek": 0.42,
- "right_cheek": 0.52,
- "chin_contour": 0.8
- },
- "blur": 0,
- "illumination": 129,
- "completeness": 1
- },
- "emotion": {
- "type": "neutral",
- "probability": 0.83
- },
- "mask": {
- "type": 0,
- "probability": 0.96
- }
- }
- ],
- "face_num": 1
-}
-```
-
-
-
-## Write LMDB with All-Age-Faces Dataset
-### Structure Dataset
-```
-├── aglined faces
-│ ├── 00000A02.jpg
-│ ├── 00001A02.jpg
-│ ├── ...
-├── example
-├── image sets
-│ ├── train.txt
-│ ├── val.txt
-├── key points
-├── original images
-│ ├── 00000A02.jpg
-│ ├── 00001A02.jpg
-│ ├── ...
-```
-Origin label of dataset get from "image sets/train.txt" and "image sets/val.txt"
-### CMD
-```shell
-bash cli/aaf.sh
-```
-
-## Write LMDB with AFAD
-### Structure Dataset
-```
-├── 15
-│ ├── 111
-│ | ├── 292943-1.jpg
-│ | ├── 292943-2.jpg
-│ ├── 112
-│ | ├── 671487-0.jpg
-│ | ├── 660728-0.jpg
-├── ...
-├── 75
-├── README.md
-├── AFAD-Full.txt
-```
-Origin label of dataset get from "AFAD-Full.txt"
-### CMD
-```shell
-bash cli/afad.sh
-```
diff --git a/cli/dataset_loaders.py b/cli/dataset_loaders.py
deleted file mode 100644
index 79b2909..0000000
--- a/cli/dataset_loaders.py
+++ /dev/null
@@ -1,397 +0,0 @@
-import os
-import re
-import warnings
-
-from glob import glob
-from typing import Any, Dict, Generator, List, Optional, Tuple
-
-from lmdbsystem.dataloader import DataLoader
-from lmdbsystem.utils import (
- csv_line_reader,
- dump_pickle,
- get_md5_file,
- get_relative_path,
- json_reader,
- normalize_path,
- raw_reader,
- removesuffix_path,
- str2bytes,
- text_line_reader,
- text_reader,
-)
-
-
-class ImageLoader(DataLoader):
- def __init__(
- self,
- directory: str,
- suffix: str,
- fn_md5_mode: str,
- fn_md5_path: str,
- ):
- self.directory = directory
- self.suffix = suffix
- self.fn_md5_mode = fn_md5_mode
- self.fn_md5_path = fn_md5_path
- if fn_md5_mode == "r":
- self.dict_filename_md5 = json_reader(fn_md5_path)
- elif fn_md5_mode == "w":
- self.dict_filename_md5 = {}
- else:
- raise ValueError(f"Don't support fn_md5_mode: {fn_md5_mode}")
- self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- yield self[file_path]
-
- def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
- filename = get_relative_path(self.directory, file_path).removesuffix(self.suffix)
- value = raw_reader(file_path)
- if self.fn_md5_mode == "r":
- md5_file = self.dict_filename_md5[filename]
- value = dump_pickle((str2bytes(get_md5_file(file_path)), value))
- else:
- md5_file = get_md5_file(file_path)
- self.dict_filename_md5[filename] = md5_file
- key = str2bytes(md5_file)
-
- return key, value
-
-
-class FaceppLoader(DataLoader):
- def __init__(
- self, directory: str, suffix: str, fn_md5_path: str, keys_extracted: List[str], values_map: Dict[str, str]
- ):
- self.directory = directory
- self.suffix = suffix
- self.keys_extracted = keys_extracted
- self.values_map = values_map
- self.dict_filename_md5 = json_reader(fn_md5_path)
- self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- yield self[file_path]
-
- def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
- md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
- key = str2bytes(md5_file)
- sub_key = str2bytes(get_md5_file(file_path))
-
- data = json_reader(file_path)
- if data["face_num"] == 0:
- return None, None
-
- attribute = data["faces"][0]["attributes"]
- for _key in self.keys_extracted:
- attribute = attribute[_key]
-
- if self.values_map:
- attribute = self.values_map.get(attribute, attribute)
-
- value = dump_pickle((sub_key, str2bytes(str(attribute))))
- return key, value
-
-
-class BaiduLoader(DataLoader):
- def __init__(
- self,
- directory: str,
- suffix: str,
- fn_md5_path: str,
- keys_extracted: List[str],
- key_probability: Optional[float],
- values_map: Optional[Dict[str, str]],
- ):
- self.directory = directory
- self.suffix = suffix
- self.keys_extracted = keys_extracted
- self.key_probability = key_probability
- self.values_map = values_map
- self.dict_filename_md5 = json_reader(fn_md5_path)
- self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- yield self[file_path]
-
- def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
- md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
- key = str2bytes(md5_file)
- sub_key = str2bytes(get_md5_file(file_path))
-
- data = json_reader(file_path)
- if data["face_num"] == 0:
- return None, None
-
- attribute = data["face_list"][0]
- for _key in self.keys_extracted:
- if "probability" in attribute and attribute["probability"] < self.key_probability:
- attribute = None
- break
- attribute = attribute[_key]
-
- if attribute is None:
- return None, None
-
- if self.values_map:
- attribute = self.values_map.get(attribute, attribute)
-
- value = dump_pickle((sub_key, str2bytes(str(attribute))))
- return key, value
-
-
-class VisageLoader(DataLoader):
- def __init__(
- self, directory: str, suffix: str, fn_md5_path: str, keys_extracted: List[str], values_map: Dict[str, str]
- ):
- self.directory = directory
- self.suffix = suffix
- self.keys_extracted = keys_extracted
- self.values_map = values_map
- self.dict_filename_md5 = json_reader(fn_md5_path)
- self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- yield self[file_path]
-
- def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
- md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
- key = str2bytes(md5_file)
- sub_key = str2bytes(get_md5_file(file_path))
-
- data = json_reader(file_path)
- if not data:
- return None, None
-
- attribute = data[self.keys_extracted[0]]
-
- if self.values_map:
- attribute = self.values_map.get(attribute, attribute)
-
- value = dump_pickle((sub_key, str2bytes(str(attribute))))
- return key, value
-
-
-class LabelInFilenameLoader(DataLoader):
- def __init__(
- self,
- directory: str,
- suffix: str,
- fn_md5_path: str,
- values_index: List[int],
- values_map: Dict[str, str],
- delimiter: str,
- ):
- self.directory = directory
- self.suffix = suffix
- self.values_map = values_map
- self.delimiter = delimiter
- self.values_index = values_index
- self.dict_filename_md5 = json_reader(fn_md5_path)
- self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- yield self[file_path]
-
- def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
- md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
- key = str2bytes(md5_file)
- sub_key = str2bytes(get_md5_file(file_path))
-
- line_values = os.path.basename(file_path).removesuffix(self.suffix).split(self.delimiter)
- labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]
-
- if self.values_map:
- if "type" in self.values_map:
- value_type = self.values_map["type"]
- labels = [str(eval(value_type)(item)) for item in labels]
- else:
- labels = [self.values_map.get(item, item) for item in labels]
-
- value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
- return key, value
-
-
-class LabelInTxtLoader(DataLoader):
- def __init__(
- self,
- directory: str,
- suffix: str,
- fn_md5_path: str,
- values_index: List[int],
- values_map: Dict[str, str],
- delimiter: str,
- ):
- self.directory = directory
- self.suffix = suffix
- self.values_map = values_map
- self.delimiter = delimiter
- self.values_index = values_index
- self.dict_filename_md5 = json_reader(fn_md5_path)
- self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- yield self[file_path]
-
- def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
- md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
- key = str2bytes(md5_file)
- sub_key = str2bytes(get_md5_file(file_path))
-
- line_values = text_reader(file_path).split(self.delimiter)
- labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]
-
- if self.values_map:
- if "type" in self.values_map:
- value_type = self.values_map["type"]
- labels = [str(eval(value_type)(item)) for item in labels]
- else:
- labels = [self.values_map.get(item, item) for item in labels]
-
- value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
- return key, value
-
-
-class LabelInSomeCsvLoader(DataLoader):
- def __init__(
- self,
- file_paths: str,
- fn_md5_path: str,
- key_index: int,
- values_index: List[int],
- values_map: Dict[str, str],
- delimiter: str,
- skip_header: bool = False,
- ):
- self.file_paths = file_paths
- self.key_index = key_index
- self.values_index = values_index
- self.values_map = values_map
- self.delimiter = delimiter
- self.skip_header = skip_header
- self.dict_filename_md5 = json_reader(fn_md5_path)
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- sub_key = str2bytes(get_md5_file(file_path))
- for line_values in csv_line_reader(file_path, self.delimiter, self.skip_header):
- yield self[(sub_key, line_values)]
-
- def __getitem__(self, item: Any) -> Tuple[Optional[bytes], Optional[bytes]]:
- sub_key, line_values = item
- filename = removesuffix_path(normalize_path(line_values[self.key_index]))
- if filename not in self.dict_filename_md5:
- warnings.warn(f"File {filename} not in image folder")
- return None, None
- md5_file = self.dict_filename_md5[filename]
- key = str2bytes(md5_file)
-
- labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]
-
- if self.values_map:
- if "type" in self.values_map:
- value_type = self.values_map["type"]
- labels = [str(eval(value_type)(item)) for item in labels]
- else:
- labels = [self.values_map.get(item, item) for item in labels]
-
- value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
- return key, value
-
-
-class LabelInSomeTxtLoader(DataLoader):
- def __init__(
- self,
- file_paths: str,
- fn_md5_path: str,
- key_index: int,
- values_index: List[int],
- values_map: Dict[str, str],
- delimiter: str,
- ):
- self.file_paths = file_paths
- self.key_index = key_index
- self.values_index = values_index
- self.values_map = values_map
- self.delimiter = delimiter
- self.dict_filename_md5 = json_reader(fn_md5_path)
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- sub_key = str2bytes(get_md5_file(file_path))
- for line in text_line_reader(file_path):
- yield self[(sub_key, line)]
-
- def __getitem__(self, item: Any) -> Tuple[Optional[bytes], Optional[bytes]]:
- sub_key, line = item
- line_values = line.split(self.delimiter)
-
- filename = removesuffix_path(normalize_path(line_values[self.key_index]))
- if filename not in self.dict_filename_md5:
- warnings.warn(f"File {filename} not in image folder")
- return None, None
- md5_file = self.dict_filename_md5[filename]
- key = str2bytes(md5_file)
-
- labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]
-
- if self.values_map:
- if "type" in self.values_map:
- value_type = self.values_map["type"]
- labels = [str(eval(value_type)(item)) for item in labels]
- else:
- labels = [self.values_map.get(item, item) for item in labels]
-
- value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
- return key, value
-
-
-class LabelInKeyInSomeTxtLoader(DataLoader):
- def __init__(
- self,
- file_paths: str,
- fn_md5_path: str,
- key_index: int,
- pattern_value_in_key: str,
- type_value_in_key: str,
- values_map: Dict[str, str],
- delimiter: str,
- ):
- self.file_paths = file_paths
- self.key_index = key_index
- self.pattern = re.compile(pattern_value_in_key)
- self.value_type_of_key = type_value_in_key
- self.values_map = values_map
- self.delimiter = delimiter
- self.dict_filename_md5 = json_reader(fn_md5_path)
-
- def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
- for file_path in self.file_paths:
- sub_key = str2bytes(get_md5_file(file_path))
- for line in text_line_reader(file_path):
- yield self[(sub_key, line)]
-
- def __getitem__(self, item: Any) -> Tuple[Optional[bytes], Optional[bytes]]:
- sub_key, line = item
- line_values = line.split(self.delimiter)
-
- filename = removesuffix_path(normalize_path(line_values[self.key_index]))
- if filename not in self.dict_filename_md5:
- warnings.warn(f"File {filename} not in image folder")
- return None, None
- md5_file = self.dict_filename_md5[filename]
- key = str2bytes(md5_file)
-
- res = self.pattern.search(line_values[self.key_index])
- labels = [str(eval(self.value_type_of_key)(res.group(1)))]
-
- if self.values_map:
- labels = [self.values_map.get(item, item) for item in labels]
-
- value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
- return key, value
diff --git a/cli/write_image.py b/cli/write_image.py
deleted file mode 100644
index 87f8297..0000000
--- a/cli/write_image.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import argparse
-
-from dataset_loaders import ImageLoader
-
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.image import ImageWriteAdapter
-
-
-def get_argument():
- parser = argparse.ArgumentParser(description="Convert pdf file to text detection and recognition label.")
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument("--folder", type=str, help="Directory to containing the image file", required=True)
-
- parser.add_argument("--suffix", default=".jpg", type=str, help="The suffix of image file")
-
- parser.add_argument(
- "--lmdb-map-size", default=32212254720, type=int, help="Map size to dump lmdb file, default 30GB" # 30GB
- )
-
- parser.add_argument(
- "--fn-md5-mode",
- type=str,
- help='The mode of handle with filename_to_md5 file. Only support ["r", "w"] mode',
- required=True,
- )
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- lmdb_obj = Lmdb(ImageWriteAdapter(path=args.lmdb_file, map_size=args.lmdb_map_size))
- lmdb_obj.write_loader(
- ImageLoader(
- directory=args.folder,
- suffix=args.suffix,
- fn_md5_mode=args.fn_md5_mode,
- fn_md5_path=args.fn_md5_path,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_json_baidu.py b/cli/write_json_baidu.py
deleted file mode 100644
index 9dabbce..0000000
--- a/cli/write_json_baidu.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import argparse
-import os
-
-from dataset_loaders import BaiduLoader
-
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument("--folder", type=str, help="Directory to containing the label file", required=True)
-
- parser.add_argument("--suffix", default=".json", type=str, help="The suffix of label file")
-
- parser.add_argument(
- "--fn-md5-mode",
- type=str,
- help='The mode of handle with filename_to_md5 file. Only support ["r", "w"] mode',
- required=True,
- )
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--keys-extracted",
- type=str,
- choices=["angle", "age", "expression,type", "gender,type", "glasses,type", "emotion,type", "mask,type"],
- help="The key with multi level to extract from the label file",
- required=True,
- )
-
- parser.add_argument(
- "--key-probability",
- type=float,
- help="The minimum probability of value for attribute." 'Only using parameter when keys-extracted has "type"',
- )
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- if args.folder and not os.path.isdir(args.folder):
- raise ValueError("Folder not exists")
-
- if args.folder and not args.suffix:
- raise ValueError("Do not empty --suffix argument when handle with some folder")
-
- keys_extracted = args.keys_extracted.split(",") if args.keys_extracted else []
-
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- BaiduLoader(
- directory=args.folder,
- suffix=args.suffix,
- fn_md5_path=args.fn_md5_path,
- keys_extracted=keys_extracted,
- key_probability=args.key_probability,
- values_map=values_map,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_json_facepp.py b/cli/write_json_facepp.py
deleted file mode 100644
index 04ce5ca..0000000
--- a/cli/write_json_facepp.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import argparse
-import os
-
-from dataset_loaders import FaceppLoader
-
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument("--folder", type=str, help="Directory to containing the label file", required=True)
-
- parser.add_argument("--suffix", default=".json", type=str, help="The suffix of label file")
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--keys-extracted",
- type=str,
- choices=[
- "gender,value",
- "age,value",
- "headpose",
- "emotion",
- "facequality,value",
- "ethnicity,value",
- "beauty",
- "glass,value",
- ],
- help="The key with multi level to extract from the label file",
- required=True,
- )
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- if args.folder and not os.path.isdir(args.folder):
- raise ValueError("Folder not exists")
-
- if args.folder and not args.suffix:
- raise ValueError("Do not empty --suffix argument when handle with some folder")
-
- keys_extracted = args.keys_extracted.split(",") if args.keys_extracted else []
-
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- FaceppLoader(
- directory=args.folder,
- suffix=args.suffix,
- fn_md5_path=args.fn_md5_path,
- keys_extracted=keys_extracted,
- values_map=values_map,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_json_visage.py b/cli/write_json_visage.py
deleted file mode 100644
index f873f6a..0000000
--- a/cli/write_json_visage.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import argparse
-import os
-
-from dataset_loaders import VisageLoader
-
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument("--folder", type=str, help="Directory to containing the label file", required=True)
-
- parser.add_argument("--suffix", default=".json", type=str, help="The suffix of label file")
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--keys-extracted",
- type=str,
- choices=[
- "gender",
- "age",
- ],
- help="The key to extract from the label file",
- required=True,
- )
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- if args.folder and not os.path.isdir(args.folder):
- raise ValueError("Folder not exists")
-
- if args.folder and not args.suffix:
- raise ValueError("Do not empty --suffix argument when handle with some folder")
-
- keys_extracted = args.keys_extracted.split(",") if args.keys_extracted else []
-
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- VisageLoader(
- directory=args.folder,
- suffix=args.suffix,
- fn_md5_path=args.fn_md5_path,
- keys_extracted=keys_extracted,
- values_map=values_map,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_scripts/write_baidu.sh b/cli/write_scripts/write_baidu.sh
deleted file mode 100644
index a8c68a7..0000000
--- a/cli/write_scripts/write_baidu.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-# Write Label From Baidu Tool
-root_data_path="/tmp/facedata/mount"
-datasets=(
- # "aaf"
- # "afad"
- # "afd"
- "deepglint"
- # "fairface"
- "ffhq"
- # "rfw"
- # "utkface"
-)
-for dataset in "${datasets[@]}"; do
- echo "Handling with dataset: $dataset ..."
- echo "Building Gender LMDB File ..."
- ## Gender
- python cli/write_json_baidu.py \
- --lmdb-file "${root_data_path}/${dataset}/${dataset}_gender_baidu.lmdb" \
- --folder "${root_data_path}/${dataset}/label_baidu" \
- --suffix .json \
- --fn-md5-path "${root_data_path}/${dataset}/${dataset}_fn_md5.json" \
- --keys-extracted "gender,type" \
- --key-probability 0.9 \
- --values-map "Female:0,Male:1,female:0,male:1,111:1,112:0"
-
- echo "Building Age LMDB File ..."
- ## Age
- python cli/write_json_baidu.py \
- --lmdb-file "${root_data_path}/${dataset}/${dataset}_age_baidu.lmdb" \
- --folder "${root_data_path}/${dataset}/label_baidu" \
- --suffix .json \
- --fn-md5-path "${root_data_path}/${dataset}/${dataset}_fn_md5.json" \
- --keys-extracted "age"
-
-done
diff --git a/cli/write_scripts/write_facepp.sh b/cli/write_scripts/write_facepp.sh
deleted file mode 100644
index 91e822f..0000000
--- a/cli/write_scripts/write_facepp.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-# Write Label From Facepp Tool
-root_data_path="/tmp/facedata/mount"
-datasets=(
- "aaf"
- "afad"
- "afd"
- "deepglint"
- "fairface"
- "ffhq"
- "rfw"
- "utkface"
- # "vggface2_cleandata_p1"
- # "vggface2_cleandata_p2"
- # "vggface2_cleandata_p3"
- # "vggface2_cleandata_p4"
- # "vggface2_cleandata_p5"
- # "vggface2_cleandata_p6"
- # "vggface2_cleandata_p7"
- # "vggface2_cleandata_p8"
- # "vggface2_cleandata_p9"
- # "vggface2_cleandata_p10"
-)
-for dataset in "${datasets[@]}"; do
- echo "Handling with dataset: $dataset ..."
- echo "Building Gender LMDB File ..."
- ## Gender
- python cli/write_json_facepp.py \
- --lmdb-file "${root_data_path}/${dataset}/${dataset}_gender_facepp.lmdb" \
- --folder "${root_data_path}/${dataset}/label_facepp" \
- --suffix .json \
- --fn-md5-path "${root_data_path}/${dataset}/${dataset}_fn_md5.json" \
- --keys-extracted "gender,value" \
- --values-map "Female:0,Male:1,female:0,male:1,111:1,112:0"
-
- echo "Building Age LMDB File ..."
- ## Age
- python cli/write_json_facepp.py \
- --lmdb-file "${root_data_path}/${dataset}/${dataset}_age_facepp.lmdb" \
- --folder "${root_data_path}/${dataset}/label_facepp" \
- --suffix .json \
- --fn-md5-path "${root_data_path}/${dataset}/${dataset}_fn_md5.json" \
- --keys-extracted "age,value"
-
-done
diff --git a/cli/write_scripts/write_image.sh b/cli/write_scripts/write_image.sh
deleted file mode 100644
index 01b8570..0000000
--- a/cli/write_scripts/write_image.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-# Write Image
-root_data_path="/tmp/facedata/mount"
-datasets=(
- "aaf;AAF/All-Age-Faces Dataset/original images;jpg"
- "afad;AFAD;jpg"
- "afd;generated_yellow-stylegan2;png"
- "deepglint;DeepGlint_7000;jpg"
- "fairface;fairface-img-margin125-trainval;jpg"
- "rfw;test/data;jpg"
- "utkface;UTKFace;jpg"
- # "vggface2_cleandata_p1;VGGFACE2_Cleandata_p1;jpg"
- # "vggface2_cleandata_p2;VGGFACE2_Cleandata_p2;jpg"
- # "vggface2_cleandata_p3;VGGFACE2_Cleandata_p3;jpg"
- # "vggface2_cleandata_p4;VGGFACE2_Cleandata_p4;jpg"
- # "vggface2_cleandata_p5;VGGFACE2_Cleandata_p5;jpg"
- # "vggface2_cleandata_p6;VGGFACE2_Cleandata_p6;jpg"
- # "vggface2_cleandata_p7;VGGFACE2_Cleandata_p7;jpg"
- # "vggface2_cleandata_p8;VGGFACE2_Cleandata_p8;jpg"
- # "vggface2_cleandata_p9;VGGFACE2_Cleandata_p9;jpg"
- # "vggface2_cleandata_p10;VGGFACE2_Cleandata_p10;jpg"
-)
-for dataset in "${datasets[@]}"; do
- IFS=$';'
- dataset_item=($dataset)
- unset IFS
- echo "Handling with dataset: ${dataset_item[0]} ..."
-
- python cli/write_image.py \
- --lmdb-file "${root_data_path}/${dataset_item[0]}/${dataset_item[0]}_image.lmdb" \
- --folder "${root_data_path}/${dataset_item[0]}/${dataset_item[1]}" \
- --suffix ".${dataset_item[2]}" \
- --fn-md5-mode w \
- --fn-md5-path "${root_data_path}/${dataset_item[0]}/${dataset_item[0]}_fn_md5.json"
-
-done
-
-# Special case for too large dataset
-## FFHQ
-python cli/write_image.py \
- --lmdb-file "/media/ubuntu/My Passport/ffhq_image.lmdb" \
- --folder "/media/ubuntu/My Passport/ffhq_1024x1024" \
- --suffix .png \
- --lmdb-map-size 214748364800 \
- --fn-md5-mode w \
- --fn-md5-path "${root_data_path}/ffhq/ffhq_fn_md5.json"
-
-## Write Cleaned Image
-#python cli/write_image.py --lmdb-file "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/aaf_image.lmdb" \
-# --folder "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/AAF" \
-# --suffix .jpg \
-# --fn-md5-mode r \
-# --fn-md5-path "/tmp/facedata/lmdb/aaf_fn_md5.json"
-
-## Write Cleaned Image
-#python cli/write_image.py --lmdb-file "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/fairface_image.lmdb" \
-# --folder "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/FairFace" \
-# --suffix .jpg \
-# --fn-md5-mode r \
-# --fn-md5-path "/tmp/facedata/lmdb/fairface_fn_md5.json"
-
-## Write Cleaned Image
-#python cli/write_image.py --lmdb-file "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/ffhq_image.lmdb" \
-# --folder "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/FFHQ" \
-# --suffix .png \
-# --fn-md5-mode r \
-# --fn-md5-path "/tmp/facedata/lmdb/ffhq_fn_md5.json"
-
-## Write Cleaned Image
-#python cli/write_image.py --lmdb-file "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/rfw_image.lmdb" \
-# --folder "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/RFW" \
-# --suffix .jpg \
-# --fn-md5-mode r \
-# --fn-md5-path "/tmp/facedata/lmdb/rfw_fn_md5.json"
-
-## Write Cleaned Image
-#python cli/write_image.py --lmdb-file "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/utkface_image.lmdb" \
-# --folder "/tmp/facedata/SCRFD_500M_KPS_ALIGN_224x224/UTKFace" \
-# --suffix .jpg \
-# --fn-md5-mode r \
-# --fn-md5-path "/tmp/facedata/lmdb/utkface_fn_md5.json"
diff --git a/cli/write_scripts/write_origin_label.sh b/cli/write_scripts/write_origin_label.sh
deleted file mode 100644
index 2399ddd..0000000
--- a/cli/write_scripts/write_origin_label.sh
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-## Write Label From Dataset
-root_data_path="/tmp/facedata/mount"
-# AAF
-## Gender
-python cli/write_text_label_in_some_txt.py \
- --lmdb-file "${root_data_path}/aaf/aaf_gender_origin.lmdb" \
- --files "${root_data_path}/aaf/AAF/All-Age-Faces Dataset/image sets/train.txt,${root_data_path}/aaf/AAF/All-Age-Faces Dataset/image sets/val.txt" \
- --delimiter " " \
- --key-index 0 \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/aaf/aaf_fn_md5.json"
-## Age
-python cli/write_text_label_in_filename.py \
- --lmdb-file "${root_data_path}/aaf/aaf_age_origin.lmdb" \
- --folder "${root_data_path}/aaf/AAF/All-Age-Faces Dataset/original images" \
- --suffix .jpg \
- --delimiter "A" \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/aaf/aaf_fn_md5.json" \
- --values-map "type:int"
-
-# AFAD
-## Gender
-python cli/write_text_label_in_key_in_some_txt.py \
- --lmdb-file "${root_data_path}/afad/afad_gender_origin.lmdb" \
- --files "${root_data_path}/afad/AFAD/AFAD-Full.txt" \
- --delimiter " " \
- --key-index 0 \
- --fn-md5-path "${root_data_path}/afad/afad_fn_md5.json" \
- --pattern-value-in-key "/(\d{3})/" \
- --type-value-in-key int \
- --values-map "Female:0,Male:1,female:0,male:1,111:1,112:0"
-## Age
-python cli/write_text_label_in_key_in_some_txt.py \
- --lmdb-file "${root_data_path}/afad/afad_age_origin.lmdb" \
- --files "${root_data_path}/afad/AFAD/AFAD-Full.txt" \
- --delimiter " " \
- --key-index 0 \
- --fn-md5-path "${root_data_path}/afad/afad_fn_md5.json" \
- --pattern-value-in-key "/(\d{2})/" \
- --type-value-in-key int
-
-# FairFace
-## Gender
-python cli/write_text_label_in_some_csv.py \
- --lmdb-file "${root_data_path}/fairface/fairface_gender_origin.lmdb" \
- --files "${root_data_path}/fairface/fairface_label_train.csv,${root_data_path}/facefair/fairface_label_val.csv" \
- --delimiter "," \
- --key-index 0 \
- --values-index 2 \
- --fn-md5-path "${root_data_path}/fairface/fairface_fn_md5.json" \
- --values-map "Female:0,Male:1,female:0,male:1,111:1,112:0" \
- --skip-header
-## Age
-python cli/write_text_label_in_some_csv.py \
- --lmdb-file "${root_data_path}/fairface/fairface_age_origin.lmdb" \
- --files "${root_data_path}/fairface/fairface_label_train.csv,${root_data_path}/facefair/fairface_label_val.csv" \
- --delimiter "," \
- --key-index 0 \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/fairface/fairface_fn_md5.json" \
- --skip-header
-
-# FFHQ
-## Age
-python cli/write_text_label_in_some_txt.py \
- --lmdb-file "${root_data_path}/ffhq/ffhq_age_human.lmdb" \
- --files "${root_data_path}/ffhq/label_human.txt" \
- --delimiter " " \
- --key-index 0 \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/ffhq/ffhq_fn_md5.json"
-
-# UTKFace
-## Gender
-python cli/write_text_label_in_filename.py \
- --lmdb-file "${root_data_path}/utkface/utkface_gender_origin.lmdb" \
- --folder "${root_data_path}/utkface/UTKFace" \
- --suffix .jpg \
- --delimiter "_" \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/utkface/utkface_fn_md5.json" \
- --values-map "0:1,1:0"
-## Age
-python cli/write_text_label_in_filename.py \
- --lmdb-file "${root_data_path}/utkface/utkface_age_origin.lmdb" \
- --folder "${root_data_path}/utkface/UTKFace" \
- --suffix .jpg \
- --delimiter "_" \
- --values-index 0 \
- --fn-md5-path "${root_data_path}/utkface/utkface_fn_md5.json"
-
-# VGGFACE2_Cleandata_p1
-## Age
-python cli/write_text_label_in_some_txt.py \
- --lmdb-file "${root_data_path}/vggface2_cleandata_p1/vggface2_cleandata_p1_age_origin.lmdb" \
- --files "${root_data_path}/vggface2_cleandata_p1/label_origin.txt" \
- --delimiter "\t" \
- --key-index 0 \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/vggface2_cleandata_p1/vggface2_cleandata_p1_fn_md5.json"
-
-# VGGFACE2_Cleandata_p2
-## Age
-python cli/write_text_label_in_some_txt.py \
- --lmdb-file "${root_data_path}/vggface2_cleandata_p2/vggface2_cleandata_p2_age_origin.lmdb" \
- --files "${root_data_path}/vggface2_cleandata_p2/label_origin.txt" \
- --delimiter "\t" \
- --key-index 0 \
- --values-index 1 \
- --fn-md5-path "${root_data_path}/vggface2_cleandata_p2/vggface2_cleandata_p2_fn_md5.json"
-
-# To p10
diff --git a/cli/write_scripts/write_visage.sh b/cli/write_scripts/write_visage.sh
deleted file mode 100644
index dd9cd2b..0000000
--- a/cli/write_scripts/write_visage.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-set -eux
-
-# Write Label From Visage Tool
-root_data_path="/tmp/facedata/mount"
-datasets=(
- "aaf"
- "afad"
- "afd"
- # "deepglint"
- "fairface"
- # "ffhq"
- "rfw"
- "utkface"
- # "vggface2_cleandata_p1"
- # "vggface2_cleandata_p2"
- # "vggface2_cleandata_p3"
- # "vggface2_cleandata_p4"
- # "vggface2_cleandata_p5"
- # "vggface2_cleandata_p6"
- # "vggface2_cleandata_p7"
- # "vggface2_cleandata_p8"
- # "vggface2_cleandata_p9"
- # "vggface2_cleandata_p10"
-)
-for dataset in "${datasets[@]}"; do
- echo "Handling with dataset: $dataset ..."
- echo "Building Gender LMDB File ..."
- ## Gender
- python cli/write_json_visage.py \
- --lmdb-file "${root_data_path}/${dataset}/${dataset}_gender_visage.lmdb" \
- --folder "${root_data_path}/${dataset}/label_visage" \
- --suffix .json \
- --fn-md5-path "${root_data_path}/${dataset}/${dataset}_fn_md5.json" \
- --keys-extracted "gender" \
- --values-map "Female:0,Male:1,female:0,male:1,111:1,112:0"
-
- echo "Building Age LMDB File ..."
- ## Age
- python cli/write_json_visage.py \
- --lmdb-file "${root_data_path}/${dataset}/${dataset}_age_visage.lmdb" \
- --folder "${root_data_path}/${dataset}/label_visage" \
- --suffix .json \
- --fn-md5-path "${root_data_path}/${dataset}/${dataset}_fn_md5.json" \
- --keys-extracted "age"
-done
diff --git a/cli/write_text_label_in_filename.py b/cli/write_text_label_in_filename.py
deleted file mode 100644
index 1ea7372..0000000
--- a/cli/write_text_label_in_filename.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-label of data in filename of image
-ex: 12_0_0_20170117190914091
-"""
-
-import argparse
-import os
-
-from cli.dataset_loaders import LabelInFilenameLoader
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def unescaped_str(arg_str):
- return arg_str.encode().decode("unicode_escape")
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument("--folder", type=str, help="Directory to containing the label file")
-
- parser.add_argument("--suffix", default=".txt", type=str, help="The suffix of label file")
-
- parser.add_argument(
- "--delimiter",
- type=unescaped_str,
- choices=["\t", "\n", " ", ",", "_", "A"],
- help="punctuation for split the label line",
- required=True,
- )
-
- parser.add_argument(
- "--values-index",
- type=str,
- help="The list of index to extract values from the label line, Except value: 0,1,2,3",
- required=True,
- )
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
- if args.folder and not os.path.isdir(args.folder):
- raise ValueError("Folder not exists")
-
- if args.folder and (not args.suffix or not args.values_index):
- raise ValueError("Do not empty --suffix or --values-index argument when handle with some folder")
-
- values_index = [int(value) for value in args.values_index.split(",")]
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- LabelInFilenameLoader(
- directory=args.folder,
- suffix=args.suffix,
- fn_md5_path=args.fn_md5_path,
- values_map=values_map,
- delimiter=args.delimiter,
- values_index=values_index,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_text_label_in_key_in_some_txt.py b/cli/write_text_label_in_key_in_some_txt.py
deleted file mode 100644
index 50b589a..0000000
--- a/cli/write_text_label_in_key_in_some_txt.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import argparse
-
-from cli.dataset_loaders import LabelInKeyInSomeTxtLoader
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def unescaped_str(arg_str):
- return arg_str.encode().decode("unicode_escape")
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument(
- "--files",
- type=str,
- help="The list of file path, Except value: /tmp/test1.txt,/tmp/test2.txt,/tmp/test3.txt",
- required=True,
- )
-
- parser.add_argument(
- "--delimiter",
- type=unescaped_str,
- choices=["\t", "\n", " ", ",", "_"],
- help="punctuation for split the label line",
- required=True,
- )
-
- parser.add_argument("--key-index", type=int, help="The index to extract key from the label line", required=True)
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--pattern-value-in-key",
- type=str,
- help="The pattern of value in key",
- required=True,
- )
-
- parser.add_argument(
- "--type-value-in-key",
- type=str,
- choices=["int", "str", "float"],
- help="The type of value in key",
- required=True,
- )
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- LabelInKeyInSomeTxtLoader(
- file_paths=args.files.split(","),
- fn_md5_path=args.fn_md5_path,
- key_index=args.key_index,
- pattern_value_in_key=args.pattern_value_in_key,
- type_value_in_key=args.type_value_in_key,
- values_map=values_map,
- delimiter=args.delimiter,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_text_label_in_some_csv.py b/cli/write_text_label_in_some_csv.py
deleted file mode 100644
index 1feb4c1..0000000
--- a/cli/write_text_label_in_some_csv.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import argparse
-
-from cli.dataset_loaders import LabelInSomeCsvLoader
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def unescaped_str(arg_str):
- return arg_str.encode().decode("unicode_escape")
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument(
- "--files",
- type=str,
- help="The list of file path, Except value: /tmp/test1.txt,/tmp/test2.txt,/tmp/test3.txt",
- required=True,
- )
-
- parser.add_argument(
- "--delimiter",
- type=unescaped_str,
- choices=["\t", "\n", " ", ",", "_"],
- help="punctuation for split the label line",
- required=True,
- )
-
- parser.add_argument("--key-index", type=int, help="The index to extract key from the label line", required=True)
-
- parser.add_argument(
- "--values-index",
- type=str,
- help="The list of index to extract values from the label line, Except value: 0,1,2,3",
- required=True,
- )
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- parser.add_argument(
- "--skip-header", action=argparse.BooleanOptionalAction, help="ignore header of csv if True, default False"
- )
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- values_index = [int(value) for value in args.values_index.split(",")]
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- LabelInSomeCsvLoader(
- file_paths=args.files.split(","),
- fn_md5_path=args.fn_md5_path,
- key_index=args.key_index,
- values_index=values_index,
- values_map=values_map,
- delimiter=args.delimiter,
- skip_header=args.skip_header | False,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_text_label_in_some_txt.py b/cli/write_text_label_in_some_txt.py
deleted file mode 100644
index 7aa0bf8..0000000
--- a/cli/write_text_label_in_some_txt.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import argparse
-
-from cli.dataset_loaders import LabelInSomeTxtLoader
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def unescaped_str(arg_str):
- return arg_str.encode().decode("unicode_escape")
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument(
- "--files", type=str, help="The list of file path, Except value: /tmp/test1.txt,/tmp/test2.txt,/tmp/test3.txt"
- )
-
- parser.add_argument(
- "--delimiter",
- type=unescaped_str,
- choices=["\t", "\n", " ", ",", "_"],
- help="punctuation for split the label line",
- required=True,
- )
-
- parser.add_argument("--key-index", type=int, help="The index to extract key from the label line", required=True)
-
- parser.add_argument(
- "--values-index",
- type=str,
- help="The list of index to extract values from the label line, Except value: 0,1,2,3",
- required=True,
- )
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
-
- values_index = [int(value) for value in args.values_index.split(",")]
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- LabelInSomeTxtLoader(
- file_paths=args.files.split(","),
- fn_md5_path=args.fn_md5_path,
- key_index=args.key_index,
- values_index=values_index,
- values_map=values_map,
- delimiter=args.delimiter,
- ),
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/cli/write_text_label_in_txt.py b/cli/write_text_label_in_txt.py
deleted file mode 100644
index de07091..0000000
--- a/cli/write_text_label_in_txt.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-label of data in txt file, and map 1:1 with image
-ex: 12_0_0_20170117190914091
-"""
-
-import argparse
-import os
-
-from cli.dataset_loaders import LabelInTxtLoader
-from lmdbsystem.lmdb import Lmdb
-from lmdbsystem.write_adapters.text import TextWriteAdapter
-
-
-def unescaped_str(arg_str):
- return arg_str.encode().decode("unicode_escape")
-
-
-def get_argument():
- parser = argparse.ArgumentParser(
- description="Convert pdf file to text detection and recognition label.",
- )
-
- parser.add_argument("--lmdb-file", type=str, help="The path of lmdb file", required=True)
-
- parser.add_argument("--folder", type=str, help="Directory to containing the label file")
-
- parser.add_argument("--suffix", default=".txt", type=str, help="The suffix of label file", required=True)
-
- parser.add_argument(
- "--delimiter",
- type=unescaped_str,
- choices=["\t", "\n", " ", ",", "_"],
- help="punctuation for split the label line",
- required=True,
- )
-
- parser.add_argument(
- "--values-index",
- type=str,
- help="The list of index to extract values from the label line, Except value: 0,1,2,3",
- required=True,
- )
-
- parser.add_argument("--fn-md5-path", type=str, help="The path of filename_to_md5 file", required=True)
-
- parser.add_argument(
- "--values-map",
- type=str,
- help="List of normalize the value." 'Ex: "Female:0,Male:1,female:0,male:1,111:1,112:0"',
- )
-
- parser.add_argument("--from-filename", action=argparse.BooleanOptionalAction, help="Extract value from filename")
- args = parser.parse_args()
- return args
-
-
-def main():
- args = get_argument()
- if args.folder and not os.path.isdir(args.folder):
- raise ValueError("Folder not exists")
-
- values_index = [int(value) for value in args.values_index.split(",")]
- values_map = (
- {value.split(":")[0]: value.split(":")[1] for value in args.values_map.split(",")} if args.values_map else None
- )
-
- lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
- lmdb_obj.write_loader(
- LabelInTxtLoader(
- directory=args.folder,
- suffix=args.suffix,
- fn_md5_path=args.fn_md5_path,
- values_map=values_map,
- delimiter=args.delimiter,
- values_index=values_index,
- ),
- )
-
-
-if __name__ == "__main__":
- main()