-
Notifications
You must be signed in to change notification settings - Fork 27
/
preprocess.py
31 lines (24 loc) · 1.01 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import argparse
from multiprocessing import cpu_count
from pathlib import Path
from commu.preprocessor import PreprocessPipeline
def get_root_parser() -> argparse.ArgumentParser:
root_parser = argparse.ArgumentParser("dataset preprocessing", add_help=True)
root_parser.add_argument("--root_dir", type=str, required=True, help="root directory containing 'raw' directory")
root_parser.add_argument("--csv_path", type=str, required=True, help="csv file path containing meta info")
root_parser.add_argument("--num_cores", type=int, default=max(1, cpu_count() - 4))
return root_parser
def main(args: argparse.Namespace) -> None:
root_dir = Path(args.root_dir).expanduser()
pipeline = PreprocessPipeline()
pipeline(
root_dir=root_dir,
csv_path=args.csv_path,
num_cores=args.num_cores,
)
if __name__ == "__main__":
import warnings
warnings.filterwarnings("ignore")
parser = get_root_parser()
known_args, _ = parser.parse_known_args()
main(known_args)