-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmulti_small_files.py
50 lines (40 loc) · 1.52 KB
/
multi_small_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Split files to multiple small ones when writing large files.
Usage: write a new class inherits the `MultiFileWriter` class,
invoke get_file_path to get file path to write to.
"""
import os
import re
class MultiFileWriter(object):
def __init__(self, max_size=524288000):
"""
Note: the unit of the argument max_size is byte.
"""
self.max_size = max_size
def _get_current(self, base_dir, filename):
pattern = re.compile(r"^%s-(\d*)" % filename)
existing_idx_list = []
for name in os.listdir(base_dir):
matched = pattern.match(name)
if matched:
idx = matched.groups()[0]
existing_idx_list.append(int(idx))
if existing_idx_list:
current_idx = max(existing_idx_list)
current_file = os.path.join(base_dir,
"%s-%s" % (filename, current_idx))
else:
current_idx = 0
current_file = os.path.join(base_dir, filename)
return current_idx, current_file
def get_file_path(self, file_path):
if not os.path.exists(file_path):
return file_path
base_dir = os.path.dirname(file_path)
filename = os.path.basename(file_path)
current_idx, current_file = self._get_current(base_dir, filename)
if os.stat(current_file).st_size < self.max_size:
return current_file
return "%s-%s" % (file_path, current_idx + 1)