From 4ad34867733a7ad7c8c9de15380c477db96d4b6a Mon Sep 17 00:00:00 2001 From: Ratchet Date: Tue, 30 Jan 2024 12:35:44 +0200 Subject: [PATCH] page now has their pid assigned to resolve racing conditions --- tablite/_nimlite/funcs/column_selector/collectinfo.nim | 2 +- tablite/_nimlite/funcs/column_selector/infos.nim | 2 +- tablite/_nimlite/funcs/column_selector/sliceconv.nim | 2 +- tablite/_nimlite/funcs/text_reader/text_reader.nim | 2 +- tablite/_nimlite/numpy.nim | 2 +- tablite/base.py | 5 ++--- tablite/joins.py | 3 +++ 7 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tablite/_nimlite/funcs/column_selector/collectinfo.nim b/tablite/_nimlite/funcs/column_selector/collectinfo.nim index 98ad61d4..50839398 100644 --- a/tablite/_nimlite/funcs/column_selector/collectinfo.nim +++ b/tablite/_nimlite/funcs/column_selector/collectinfo.nim @@ -112,7 +112,7 @@ proc collectColumnSelectInfo*(table: nimpy.PyObject, cols: nimpy.PyObject, dirPi var isCorrectType = initTable[string, bool]() - proc genpage(dirpid: string): ColSliceInfo {.inline.} = (dir_pid, tabliteBase().SimplePage.next_id(dir_pid).to(int)) + proc genpage(dirpid: string): ColSliceInfo {.inline.} = (dir_pid, tabliteBase().SimplePage.next_id(dir_pid).to(string)) discard pbar.update(5) discard pbar.display() diff --git a/tablite/_nimlite/funcs/column_selector/infos.nim b/tablite/_nimlite/funcs/column_selector/infos.nim index 8026a645..bc35180a 100644 --- a/tablite/_nimlite/funcs/column_selector/infos.nim +++ b/tablite/_nimlite/funcs/column_selector/infos.nim @@ -4,7 +4,7 @@ from std/sugar import collect from ../../pymodules import builtins from ../../pytypes import KindObjectND, str2ObjKind -type ColSliceInfo* = (string, int) +type ColSliceInfo* = (string, string) type ColInfo* = Table[string, ColSliceInfo] type DesiredColumnInfo* = object originalName*: string diff --git a/tablite/_nimlite/funcs/column_selector/sliceconv.nim b/tablite/_nimlite/funcs/column_selector/sliceconv.nim index ef08e311..b237f268 100644 --- a/tablite/_nimlite/funcs/column_selector/sliceconv.nim +++ b/tablite/_nimlite/funcs/column_selector/sliceconv.nim @@ -35,7 +35,7 @@ proc finalizeSlice(indices: var seq[int], columnNames: seq[string], infos: var T proc toColSliceInfo(path: Path): ColSliceInfo = let workdir = string path.parentDir.parentDir - let pid = parseInt(string path.extractFilename.changeFileExt("")) + let pid = string path.extractFilename.changeFileExt("") return (workdir, pid) diff --git a/tablite/_nimlite/funcs/text_reader/text_reader.nim b/tablite/_nimlite/funcs/text_reader/text_reader.nim index 23844e69..42a59833 100644 --- a/tablite/_nimlite/funcs/text_reader/text_reader.nim +++ b/tablite/_nimlite/funcs/text_reader/text_reader.nim @@ -67,7 +67,7 @@ proc textReaderTask*(task: TaskArgs): seq[nimpy.PyObject] = for i in 0.. None: - self.id = id self.path = Path(path) / "pages" / f"{id}.npy" self.len = len self.dtype = py_dtype @@ -97,7 +96,7 @@ def next_id(cls, path): path = Path(path) while True: - _id = next(cls.ids) + _id = f"{os.getpid()}-{next(cls.ids)}" _path = path / "pages" / f"{_id}.npy" if not _path.exists(): @@ -117,7 +116,7 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}({self.path}, <{e}>)" def __hash__(self) -> int: - return hash(self.id) + return hash(self.path) def owns(self): parts = self.path.parts diff --git a/tablite/joins.py b/tablite/joins.py index 8c85a6ae..8f5b9725 100644 --- a/tablite/joins.py +++ b/tablite/joins.py @@ -598,8 +598,11 @@ def _mp_reindex_page( if np.any(mask): nones = np.full(ix_arr.shape, fill_value=None) array = np.where(mask, nones, array) + + Constr = type(T) remapped_T = Constr({column_name: array}, _path=path) + return remapped_T