Skip to content

Commit

Permalink
Merge pull request #165 from realratchet/master
Browse files Browse the repository at this point in the history
Respect passed pbars
  • Loading branch information
realratchet authored Apr 22, 2024
2 parents 492be9c + e15191c commit 2d0ecae
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 42 deletions.
15 changes: 10 additions & 5 deletions nimlite/funcs/filter.nim
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ proc checkExpressions(row: seq[PY_ObjectND], exprCols: seq[string], expressions:
of FT_ANY: any(expressions, xpr => row.checkExpression(exprCols, xpr))
of FT_ALL: all(expressions, xpr => row.checkExpression(exprCols, xpr))

proc filter*(table: nimpy.PyObject, pyExpressions: seq[nimpy.PyObject], filterTypeName: string, tqdm: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) =
proc filter*(table: nimpy.PyObject, pyExpressions: seq[nimpy.PyObject], filterTypeName: string, tqdm: nimpy.PyObject = nil, pbarInp: nimpy.PyObject = nil): (nimpy.PyObject, nimpy.PyObject) =
let m = modules()
let builtins = m.builtins
let tablite = m.tablite
Expand Down Expand Up @@ -267,10 +267,15 @@ proc filter*(table: nimpy.PyObject, pyExpressions: seq[nimpy.PyObject], filterTy
for (key, col) in tablePages.pairs():
col.dumpPage(passTablePages[key], failTablePages[key])

let tableLen = builtins.getLen(table)
let tqdmLen = int ceil(float(tableLen) / float(pageSize))
let TqdmClass = (if isNone(tqdm): m.tqdm.classes.TqdmClass else: tqdm)
let pbar = TqdmClass!(total: tqdmLen, desc = "filter")
var pbar: nimpy.PyObject

if pbarInp.isNone:
let tableLen = builtins.getLen(table)
let tqdmLen = int ceil(float(tableLen) / float(pageSize))
let TqdmClass = (if isNone(tqdm): m.tqdm.classes.TqdmClass else: tqdm)
pbar = TqdmClass!(total: tqdmLen, desc = "filter")
else:
pbar = pbarInp

for (i, row) in enumerate(exprCols.iterateRows(tablePages)):
bitmask[bitNum] = row.checkExpressions(exprCols, expressions, filterType)
Expand Down
17 changes: 12 additions & 5 deletions nimlite/funcs/groupby.nim
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ iterator iteratePages(paths: seq[string]): seq[PY_ObjectND] =
res.add(i())
finished = finished or finished(i)

proc groupby*(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, Accumulator)], tqdm: nimpy.PyObject = modules().tqdm.classes.TqdmClass): nimpy.PyObject =
proc groupby*(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, Accumulator)], tqdm: nimpy.PyObject = nil, pbarInp: nimpy.PyObject = nil): nimpy.PyObject =
let
m = modules()
tabliteBase = m.tablite.modules.base
Expand Down Expand Up @@ -626,12 +626,19 @@ proc groupby*(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, Accu
if cn notin columnNames:
columnNames.add(cn)

# var relevantT = T.slice(columnNames)
var columnsPaths: OrderedTable[string, seq[string]] = collect(initOrderedTable()):
let columnsPaths: OrderedTable[string, seq[string]] = collect(initOrderedTable()):
for cn in columnNames:
{cn: tabliteBase.collectPages(T[cn])}
var TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm
var pbar = TqdmClass!(desc: &"groupby", total: len(columnsPaths[toSeq(columnsPaths.keys)[0]]))


var pbar: nimpy.PyObject

if pbarInp.isNone:
let TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm
pbar = TqdmClass!(desc: &"groupby", total: len(columnsPaths[toSeq(columnsPaths.keys)[0]]))
else:
pbar = pbarInp

var aggregationFuncs = initOrderedTable[seq[PY_ObjectND], seq[(string, GroupByFunction)]]()
for pagesZipped in pageZipper(columnsPaths):
for row in iteratePages(pagesZipped):
Expand Down
21 changes: 14 additions & 7 deletions nimlite/funcs/imputation.nim
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ proc savePages(sliceData: seq[seq[PY_ObjectND]], columns: seq[nimpy.PyObject], p

proc nearestNeighbourImputation*(T: nimpy.PyObject, sources: seq[string],
missing: seq[PY_ObjectND], targets: seq[string],
tqdm: nimpy.PyObject = modules().tqdm.classes.TqdmClass): nimpy.PyObject =
tqdm: nimpy.PyObject = nil, pbarInp: nimpy.PyObject = nil): nimpy.PyObject =
let
m = modules()
tabliteBase = m.tablite.modules.base
Expand Down Expand Up @@ -153,13 +153,20 @@ proc nearestNeighbourImputation*(T: nimpy.PyObject, sources: seq[string],
for m in missing:
if m in k:
{k: v}
var
missingValsCounts = collect: (for v in missing_value_index.values(): len(v))
totalSteps = sum(missingValsCounts)
TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm


var pbar: nimpy.PyObject
if pbarInp.isNone:
let missingValsCounts = collect: (for v in missing_value_index.values(): len(v))
let totalSteps = sum(missingValsCounts)
let TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm

pbar = TqdmClass!(desc: &"imputation.nearest_neighbour", total: totalSteps)
ranks: seq[PY_ObjectND] = @[]
newOrder = initTable[seq[int], seq[PY_ObjectND]]()
else:
pbar = pbarInp

var ranks: seq[PY_ObjectND] = @[]
var newOrder = initTable[seq[int], seq[PY_ObjectND]]()

for k in missingValueIndex.keys():
for kk in k:
Expand Down
12 changes: 6 additions & 6 deletions nimlite/libnimlite.nim
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ when isLib:

# -------- FILTER -----------
import funcs/filter as ff
proc filter(table: nimpy.PyObject, expressions: seq[nimpy.PyObject], `type`: string, tqdm: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) {.exportpy.} =
return ff.filter(table, expressions, `type`, tqdm)
proc filter(table: nimpy.PyObject, expressions: seq[nimpy.PyObject], `type`: string, tqdm: nimpy.PyObject, pbar: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) {.exportpy.} =
return ff.filter(table, expressions, `type`, tqdm, pbar)

# -------- FILTER -----------

# -------- IMPUTATION -----------
import funcs/imputation
proc nearest_neighbour(T: nimpy.PyObject, sources: seq[string], missing: seq[nimpy.PyObject], targets: seq[string], tqdm: nimpy.PyObject): nimpy.PyObject {.exportpy.} =
proc nearest_neighbour(T: nimpy.PyObject, sources: seq[string], missing: seq[nimpy.PyObject], targets: seq[string], tqdm: nimpy.PyObject, pbar: nimpy.PyObject): nimpy.PyObject {.exportpy.} =
var miss: seq[PY_ObjectND] = @[]
for m in missing:
case modules().builtins.getTypeName(m):
Expand All @@ -150,14 +150,14 @@ when isLib:
miss.add(newPY_Object(m.to(bool)))
else:
raise newException(ValueError, "unrecognized type.")
return nearestNeighbourImputation(T, sources, miss, targets, tqdm)
return nearestNeighbourImputation(T, sources, miss, targets, tqdm, pbar)
# -------- IMPUTATION -----------

# -------- GROUPBY -----------
import funcs/groupby as gb
proc groupby(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, string)], tqdm: nimpy.PyObject): nimpy.PyObject {. exportpy .} =
proc groupby(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, string)], tqdm: nimpy.PyObject, pbar: nimpy.PyObject): nimpy.PyObject {. exportpy .} =
var funcs = collect:
for (cn, fn) in functions:
(cn, str2Accumulator(fn))
return gb.groupby(T, keys, funcs, tqdm)
return gb.groupby(T, keys, funcs, tqdm, pbar)
# -------- GROUPBY -----------
10 changes: 6 additions & 4 deletions nimlite/libnimlite.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def text_reader_task(path, encoding, dia_delimiter, dia_quotechar, dia_escapechar, dia_doublequote, dia_quoting, dia_skipinitialspace, dia_skiptrailingspace, dia_lineterminator, dia_strict, guess_dtypes, tsk_pages, tsk_offset, tsk_count, import_fields):
def text_reader_task(path, encoding, dia_delimiter, dia_quotechar, dia_escapechar, dia_doublequote, dia_quoting, dia_skipinitialspace, dia_skiptrailingspace, dia_lineterminator, dia_strict, guess_dtypes, tsk_pages, tsk_offset, tsk_count, import_fields):
pass


Expand Down Expand Up @@ -30,11 +30,13 @@ def collect_text_reader_page_info_task(task_info, task):
pass


def nearest_neighbour(T, sources, missing, targets, tqdm):
def nearest_neighbour(T, sources, missing, targets, tqdm, pbar):
pass

def groupby(T, keys, functions, tqdm):

def groupby(T, keys, functions, tqdm, pbar):
pass

def filter(table, expressions, type, tqdm):

def filter(table, expressions, type, tqdm, pbar):
pass
4 changes: 2 additions & 2 deletions tablite/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def groupby(self, keys, functions, tqdm=_tqdm, pbar=None):
https://github.com/root-11/tablite/blob/master/tests/test_groupby.py
"""
return _groupby(self, keys, functions, tqdm)
return _groupby(self, keys, functions, tqdm, pbar)

def pivot(self, rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None):
"""
Expand Down Expand Up @@ -730,7 +730,7 @@ def column_select(self, cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=
first table contains the rows that were successfully cast to desired types
second table contains rows that failed to cast + rejection reason
"""
return _column_select(self, cols, tqdm, TaskManager)
return _column_select(self, cols, tqdm=tqdm, TaskManager=TaskManager)

def join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, kind="inner", merge_keys=False, tqdm=_tqdm, pbar=None):
"""
Expand Down
41 changes: 31 additions & 10 deletions tablite/nimlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,19 @@ def text_reader(
guess_datatypes: bool =False,
newline: str='\n', delimiter: str=',', text_qualifier: str='"',
quoting: ValidQuoting, strip_leading_and_tailing_whitespace: bool=True, skip_empty: ValidSkipEmpty = "NONE",
tqdm=_tqdm
tqdm=_tqdm,
pbar:_tqdm = None
) -> K:
assert isinstance(path, Path)
assert isinstance(pid, Path)
with tqdm(total=10, desc=f"importing file") as pbar:

if pbar is None:
pbar = tqdm(total=10, desc=f"importing file")
pbar_close = True
else:
pbar_close = False

try:
table = nl.text_reader(
pid=str(pid),
path=str(path),
Expand Down Expand Up @@ -183,10 +191,14 @@ def next_task(task: Task, page_info):
pbar.update(pbar.total - pbar.n)

table = T(columns=table_dict)
finally:
if pbar_close:
pbar.close()

return table



def wrap(str_: str) -> str:
return '"' + str_.replace('"', '\\"').replace("'", "\\'").replace("\n", "\\n").replace("\t", "\\t") + '"'

Expand All @@ -203,8 +215,14 @@ def _collect_cs_info(i: int, columns: dict, res_cols_pass: list, res_cols_fail:
return el, col_pass, col_fail


def column_select(table: K, cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=TaskManager) -> Tuple[K, K]:
with tqdm(total=100, desc="column select", bar_format='{desc}: {percentage:.1f}%|{bar}{r_bar}') as pbar:
def column_select(table: K, cols: list[ColumnSelectorDict], tqdm=_tqdm, pbar:_tqdm = None, TaskManager=TaskManager) -> Tuple[K, K]:
if pbar is None:
pbar = tqdm(total=100, desc="column select", bar_format='{desc}: {percentage:.1f}%|{bar}{r_bar}')
pbar_close = True
else:
pbar_close = False

try:
T = type(table)
dir_pid = Config.workdir / Config.pid

Expand Down Expand Up @@ -297,18 +315,21 @@ def extend_table(table, columns):
pbar.update(pbar.total - pbar.n)

return tbl_pass, tbl_fail
finally:
if pbar_close:
pbar.close()

def read_page(path: Union[str, Path]) -> np.ndarray:
return nl.read_page(str(path))

def repaginate(column: Column):
nl.repaginate(column)

def nearest_neighbour(T: BaseTable, sources: Union[list[str], None], missing: Union[list, None], targets: Union[list[str], None], tqdm=_tqdm):
return nl.nearest_neighbour(T, sources, list(missing), targets, tqdm)
def nearest_neighbour(T: BaseTable, sources: Union[list[str], None], missing: Union[list, None], targets: Union[list[str], None], tqdm=_tqdm, pbar: _tqdm = None):
return nl.nearest_neighbour(T, sources, list(missing), targets, tqdm, pbar)

def groupby(T, keys, functions, tqdm=_tqdm):
return nl.groupby(T, keys, functions, tqdm)
def groupby(T, keys, functions, tqdm=_tqdm, pbar: _tqdm=None):
return nl.groupby(T, keys, functions, tqdm, pbar)

def filter(table: BaseTable, expressions: list[FilterDict], type: FilterType, tqdm = _tqdm):
return nl.filter(table, expressions, type, tqdm)
def filter(table: BaseTable, expressions: list[FilterDict], type: FilterType, tqdm = _tqdm, pbar: _tqdm = None):
return nl.filter(table, expressions, type, tqdm, pbar)
11 changes: 9 additions & 2 deletions tablite/sortation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ def sort_index(T, mapping, sort_mode="excel", tqdm=_tqdm, pbar=None):

rank = {i: tuple() for i in range(len(T))} # create index and empty tuple for sortation.

_pbar = tqdm(total=len(mapping.items()), desc="creating sort index") if pbar is None else pbar
if pbar is None:
pbar = tqdm(total=len(mapping.items()), desc="creating sort index")
pbar_close = True
else:
pbar_close = False

for key, reverse in mapping.items():
col = T[key][:]
Expand All @@ -48,7 +52,10 @@ def sort_index(T, mapping, sort_mode="excel", tqdm=_tqdm, pbar=None):
v2 = numpy_to_python(v)
rank[ix] += (ranks[v2],) # add tuple for each sortation level.

_pbar.update(1)
pbar.update(1)

if pbar_close:
pbar.close()

del col
del ranks
Expand Down
2 changes: 1 addition & 1 deletion tablite/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
major, minor, patch = 2023, 11, 4
major, minor, patch = 2023, 11, 5
__version_info__ = (major, minor, patch)
__version__ = ".".join(str(i) for i in __version_info__)

0 comments on commit 2d0ecae

Please sign in to comment.