diff --git a/.github/actions/buildnim/action.yml b/.github/actions/buildnim/action.yml new file mode 100644 index 00000000..dca22951 --- /dev/null +++ b/.github/actions/buildnim/action.yml @@ -0,0 +1,32 @@ +name: Build Nimlite +description: "Builds Nimlite" +runs: + using: "composite" + steps: + - uses: jiro4989/setup-nim-action@v1 + with: + nim-version: "2.0.0" + - name: Nimble Install dependencies + shell: bash + run: | + nimble -y refresh + nimble -y install nimpy argparse + - name: Set Environment Variables + uses: allenevans/set-env@v2.0.0 + with: + NIMLITE_DIR: 'nimlite' + NIM_PACKAGE_NAME: 'libnimlite' + TABLITE_PACKAGE_NAME: 'tablite' + NIMC_FLAGS: '--gc:refc --app:lib --threads:on -d:release -d:danger' + - name: Compile Debug & import (Unix) + shell: bash + if: runner.os == 'Linux' || runner.os == 'macOS' + run: | + # compile the libnimlite + nim c ${{ env.NIMC_FLAGS }} --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.so ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim + - name: Compile Debug & import (Windows) + shell: bash + if: runner.os == 'Windows' + run: | + # compile the libnimlite + nim c ${{ env.NIMC_FLAGS }} --tlsEmulation:off --passL:-static --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.pyd ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim \ No newline at end of file diff --git a/.github/actions/validatenim/action.yaml b/.github/actions/validatenim/action.yaml new file mode 100644 index 00000000..d2efae0f --- /dev/null +++ b/.github/actions/validatenim/action.yaml @@ -0,0 +1,17 @@ +name: Validate Nimlite +description: "Validates Nimlite build" +runs: + using: "composite" + steps: + - name: Compile Debug & import (Unix) + shell: bash + if: runner.os == 'Linux' || runner.os == 'macOS' + run: | + # check if compiled successfully + python -c "print(__import__('${{ env.NIMLITE_DIR }}.${{ env.NIM_PACKAGE_NAME }}').__file__)" + - name: Compile Debug & import (Windows) + shell: bash + if: runner.os == 'Windows' + run: | + # check if compiled successfully + python -c "print(__import__('${{ env.NIMLITE_DIR }}.${{ env.NIM_PACKAGE_NAME }}').__file__)" \ No newline at end of file diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index e5569c68..aa6d2605 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -13,21 +13,6 @@ jobs: PYTHON: '3.10' steps: - uses: actions/checkout@v3 - - uses: jiro4989/setup-nim-action@v1 - with: - nim-version: "2.0.0" - - name: Nimble Install dependencies - run: | - nimble -y refresh - nimble -y install nimpy argparse - - name: Set Environment Variables - uses: allenevans/set-env@v2.0.0 - with: - NIMLITE_DIR: 'tablite/_nimlite' - NIM_PACKAGE_NAME: 'nimlite' - TABLITE_PACKAGE_NAME: 'tablite' - NIMC_FLAGS: '--app:lib --threads:on -d:release -d:danger' - - name: Setup Python uses: actions/setup-python@v3 with: @@ -38,12 +23,7 @@ jobs: python -m pip install pytest python -m pip install pytest-cov python -m pip install -r requirements.txt - - name: Compile Debug & import (Unix) - run: | - # compile the nimlite - nim c ${{ env.NIMC_FLAGS }} --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.so ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim - # check if compiled successfully - python -c "print(__import__('${{ env.TABLITE_PACKAGE_NAME }}.${{ env.NIM_PACKAGE_NAME }}').__file__)" + - uses: ./.github/actions/buildnim - name: Generate Report run: | pytest --cov=tablite tests/ --cov-report=xml diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 474d6bb1..908e79fe 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -48,14 +48,6 @@ jobs: cache-dependency-path: | requirements.txt requirements_mkdocs.txt - - name: Install Nim Lang - uses: jiro4989/setup-nim-action@v1 - with: - nim-version: "2.0.0" - - name: Nimble Install dependencies - run: | - nimble -y refresh - nimble -y install nimpy argparse - name: Install PIPs run: | python -m pip install -r requirements.txt @@ -67,16 +59,8 @@ jobs: - name: Set Env for tablite Import uses: allenevans/set-env@v3.0.0 with: - USE_NIMPORTER: 'false' PYTHONPATH: '.:tablite' - NIMLITE_DIR: 'tablite/_nimlite' - NIM_PACKAGE_NAME: 'nimlite' - TABLITE_PACKAGE_NAME: 'tablite' - NIMC_FLAGS: '--app:lib --threads:on -d:release -d:danger' - - name: Compile NimLite - run: | - nim c ${{ env.NIMC_FLAGS }} --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.so ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim - python -c "print(__import__('${{ env.TABLITE_PACKAGE_NAME }}.${{ env.NIM_PACKAGE_NAME }}').__file__)" + - uses: ./.github/actions/buildnim - name: Configure Git user run: | git config --local user.email "github-actions[bot]@users.noreply.github.com" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b4f5503e..8e5ef960 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,30 +11,7 @@ jobs: os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v3 - - uses: jiro4989/setup-nim-action@v1 - with: - nim-version: "2.0.0" - - name: Nimble Install dependencies - run: | - nimble -y refresh - nimble -y install nimpy argparse - - name: Set Environment Variables - uses: allenevans/set-env@v2.0.0 - with: - NIMLITE_DIR: 'tablite/_nimlite' - NIM_PACKAGE_NAME: 'nimlite' - TABLITE_PACKAGE_NAME: 'tablite' - NIMC_FLAGS: '--app:lib --threads:on -d:release -d:danger' - - name: Compile Debug & import (Unix) - if: runner.os == 'Linux' || runner.os == 'macOS' - run: | - # compile the nimlite - nim c ${{ env.NIMC_FLAGS }} --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.so ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim - - name: Compile Debug & import (Windows) - if: runner.os == 'Windows' - run: | - # compile the nimlite - nim c ${{ env.NIMC_FLAGS }} --tlsEmulation:off --passL:-static --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.pyd ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim + - uses: ./.github/actions/buildnim - name: Cache Unix if: runner.os == 'Linux' || runner.os == 'macOS' uses: actions/upload-artifact@v3 @@ -60,8 +37,8 @@ jobs: - name: Set Environment Variables uses: allenevans/set-env@v2.0.0 with: - NIMLITE_DIR: 'tablite/_nimlite' - NIM_PACKAGE_NAME: 'nimlite' + NIMLITE_DIR: 'nimlite' + NIM_PACKAGE_NAME: 'libnimlite' - name: Cache Restore Unix uses: actions/download-artifact@v3 with: @@ -76,7 +53,7 @@ jobs: ${{ env.NIMLITE_DIR }} - name: install python run: | - python3 -m pip install --upgrade build + python3 -m pip install --upgrade build - name: build wheel run: | python3 -m build --wheel diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index e1cc5fd5..8905c135 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -24,39 +24,14 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - uses: jiro4989/setup-nim-action@v1 - with: - nim-version: "2.0.0" - - name: Nimble Install dependencies - run: | - nimble -y refresh - nimble -y install nimpy argparse - - name: Set Environment Variables - uses: allenevans/set-env@v2.0.0 - with: - NIMLITE_DIR: 'tablite/_nimlite' - NIM_PACKAGE_NAME: 'nimlite' - TABLITE_PACKAGE_NAME: 'tablite' - NIMC_FLAGS: '--app:lib --threads:on -d:release -d:danger' - name: Install dependencies + shell: bash run: | python -m pip install --upgrade pip python -m pip install flake8 pytest python -m pip install -r requirements.txt - - name: Compile Debug & import (Unix) - if: runner.os == 'Linux' || runner.os == 'macOS' - run: | - # compile the nimlite - nim c ${{ env.NIMC_FLAGS }} --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.so ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim - # check if compiled successfully - python -c "print(__import__('${{ env.TABLITE_PACKAGE_NAME }}.${{ env.NIM_PACKAGE_NAME }}').__file__)" - - name: Compile Debug & import (Windows) - if: runner.os == 'Windows' - run: | - # compile the nimlite - nim c ${{ env.NIMC_FLAGS }} --tlsEmulation:off --passL:-static --out:${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.pyd ${{ env.NIMLITE_DIR }}/${{ env.NIM_PACKAGE_NAME }}.nim - # check if compiled successfully - python -c "print(__import__('${{ env.TABLITE_PACKAGE_NAME }}.${{ env.NIM_PACKAGE_NAME }}').__file__)" + - uses: ./.github/actions/buildnim + - uses: ./.github/actions/validatenim - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names diff --git a/.gitignore b/.gitignore index 9f60013d..849483e1 100644 --- a/.gitignore +++ b/.gitignore @@ -9,14 +9,8 @@ *.pyc tests/new.zip __pycache__ -*/_nimlite/* -!*/_nimlite/**.nim -!*/_nimlite/funcs/ -!*/_nimlite/includes/ -!*/_nimlite/includes/**.nim -!*/_nimlite/funcs/column_selector/**.nim -!*/_nimlite/**.py -!*/_nimlite/**.pyi +/nimlite/*.so +/nimlite/nimtest # Notebook checkpoints .ipynb_checkpoints/ diff --git a/build_nim.sh b/build_nim.sh index 5599b4c8..39d55941 100755 --- a/build_nim.sh +++ b/build_nim.sh @@ -19,9 +19,9 @@ fi if [ $is_release = true ] then - nim c --app:lib -d:release -d:danger --out:tablite/_nimlite/nimlite.so tablite/_nimlite/nimlite.nim + nim c --app:lib --gc:refc -d:release -d:danger --out:nimlite/libnimlite.so nimlite/libnimlite.nim echo "Built release." else - nim c --app:lib -d:debug --out:tablite/_nimlite/nimlite.so tablite/_nimlite/nimlite.nim + nim c --app:lib --gc:refc -d:debug --out:nimlite/libnimlite.so nimlite/libnimlite.nim echo "Built debug." fi \ No newline at end of file diff --git a/nimlite.nim b/nimlite.nim new file mode 100644 index 00000000..2ca2fea9 --- /dev/null +++ b/nimlite.nim @@ -0,0 +1 @@ +# This is a stub file for other modules to import \ No newline at end of file diff --git a/nimlite.nimble b/nimlite.nimble new file mode 100644 index 00000000..f53b8d4e --- /dev/null +++ b/nimlite.nimble @@ -0,0 +1,13 @@ +# Package + +version = "0.1.0" +author = "Ratchet" +description = "Utilities for tablite to work with nim" +license = "MIT" +# srcDir = "nimlite" + + +# Dependencies + +requires "nim >= 2.0.0" +requires "nimpy >= 0.2.0" \ No newline at end of file diff --git a/tablite/_nimlite/__init__.py b/nimlite/__init__.py similarity index 100% rename from tablite/_nimlite/__init__.py rename to nimlite/__init__.py diff --git a/tablite/_nimlite/dateutils.nim b/nimlite/dateutils.nim similarity index 96% rename from tablite/_nimlite/dateutils.nim rename to nimlite/dateutils.nim index 2db18254..a28509b7 100644 --- a/tablite/_nimlite/dateutils.nim +++ b/nimlite/dateutils.nim @@ -1,5 +1,6 @@ import std/times from utils import divmod +from std/hashes import Hash, hash from std/math import splitDecimal const DAYS_PER_MONTH_TABLE* = [ @@ -118,7 +119,7 @@ proc date2NimDateTime*(year: int, month: int, day: int): DateTime {.inline.} = return dateTime(year, Month(month), MonthdayRange(day), zone=utc()) proc datetime2NimDatetime*(year: int, month: int, day: int, hour: int, minute: int, second: int, microsecond: int): DateTime {.inline.} = - return dateTime(year, Month(month), MonthdayRange(day), hour, second, microsecond * 1000, zone=utc()) + return dateTime(year, Month(month), MonthdayRange(day), hour, minute, second, microsecond * 1000, zone=utc()) proc time2NimDuration*(hour: int, minute: int, second: int, microsecond: int): Duration {.inline.} = return initDuration(hours=hour, minutes=minute, seconds=second, microseconds=microsecond) @@ -134,4 +135,6 @@ proc duration2Seconds*(dur: Duration): float {.inline.} = dur.inMicroseconds / 1 proc duration2Date*(dur: Duration): DateTime {.inline.} = dateTime(1970, mJan, 1, zone=utc()) + dur proc seconds2Date*(seconds: float): DateTime {.inline.} = duration2Date(seconds2Duration(seconds)) -proc datetime2Date*(self: DateTime): DateTime {.inline.} = dateTime(self.year, self.month, self.monthday, zone=utc()) \ No newline at end of file +proc datetime2Date*(self: DateTime): DateTime {.inline.} = dateTime(self.year, self.month, self.monthday, zone=utc()) + +proc hash*(self: DateTime): Hash = hash(self.toTime.toUnixFloat) \ No newline at end of file diff --git a/tablite/_nimlite/funcs/column_selector.nim b/nimlite/funcs/column_selector.nim similarity index 64% rename from tablite/_nimlite/funcs/column_selector.nim rename to nimlite/funcs/column_selector.nim index 548fad1a..1579c274 100644 --- a/tablite/_nimlite/funcs/column_selector.nim +++ b/nimlite/funcs/column_selector.nim @@ -11,15 +11,16 @@ export fromPyObjToDesiredInfos when isMainModule and appType != "lib": import std/[os, tables, sugar, sets, sequtils, paths, macros] - import nimpy as nimpy + import nimpy from ../nimpyext import `!` import std/options as opt - import ../pymodules as pymodules + import ../pymodules import ../numpy import typetraits proc columnSelect(table: nimpy.PyObject, cols: nimpy.PyObject, tqdm: nimpy.PyObject, dir_pid: Path, TaskManager: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) = # this is nim-only implementation, the library build doesn't need it because we need TaskManager to be used for slices + let TableClass = modules().getType(table) var pbar = tqdm!(total: 100, desc: "column select") var (columns, page_count, is_correct_type, desired_column_map, passed_column_data, failed_column_data, res_cols_pass, res_cols_fail, column_names, reject_reason_name) = collectColumnSelectInfo(table, cols, string dir_pid, pbar) @@ -32,21 +33,21 @@ when isMainModule and appType != "lib": for desired_name in failed_column_data: {desired_name: newSeq[nimpy.PyObject]()} - let tbl_pass = tablite().Table(columns = tbl_pass_columns) - let tbl_fail = tablite().Table(columns = tbl_fail_columns) + let tbl_pass = TableClass!(columns = tbl_pass_columns) + let tbl_fail = TableClass!(columns = tbl_fail_columns) return (tbl_pass, tbl_fail) template ordered2PyDict(keys: seq[string]): nimpy.PyObject = - let dict = pymodules.builtins().dict() + let dict = modules().builtins.classes.DictClass!() for k in keys: dict[k] = newSeq[nimpy.PyObject]() dict - var tbl_pass = tablite().Table(columns = passed_column_data.ordered2PyDict()) - var tbl_fail = tablite().Table(columns = failed_column_data.ordered2PyDict()) + var tbl_pass = TableClass!(columns = passed_column_data.ordered2PyDict()) + var tbl_fail = TableClass!(columns = failed_column_data.ordered2PyDict()) var task_list_inp = collect: for i in 0.. 0 and builtins().len(nameOutStripped).to(int) == 0: + if modules().getLen(rename) > 0 and modules().getLen(nameOutStripped) == 0: raise newException(ValueError, "Validating 'column_select' failed, '" & nameInp & "' cannot be whitespace.") rename = nameOutStripped @@ -54,12 +54,12 @@ proc collectColumnSelectInfo*(table: nimpy.PyObject, cols: nimpy.PyObject, dirPi else: collisions[nameOut] = 1 - let desiredType = c.get("type", builtins().None) + let desiredType = c.get("type", nil) desiredColumnMap[nameOut] = DesiredColumnInfo( # collect the information about the column, fill in any defaults originalName: nameInp, `type`: if desiredType.isNone(): K_NONETYPE else: toPageType(desiredType.to(string)), - allowEmpty: c.get("allow_empty", builtins().False).to(bool) + allowEmpty: c.get("allow_empty", false).to(bool) ) discard pbar.update(3) @@ -77,7 +77,7 @@ proc collectColumnSelectInfo*(table: nimpy.PyObject, cols: nimpy.PyObject, dirPi let pyColPages = table[colName].pages let pages = collect: for pyPage in pyColPages: - builtins().str(pyPage.path.absolute()).to(string) + modules().toStr(pyPage.path.absolute()) failedColumnData.add(colName) @@ -112,7 +112,7 @@ proc collectColumnSelectInfo*(table: nimpy.PyObject, cols: nimpy.PyObject, dirPi var isCorrectType = initTable[string, bool]() - proc genpage(dirpid: string): ColSliceInfo {.inline.} = (dir_pid, tabliteBase().SimplePage.next_id(dir_pid).to(string)) + proc genpage(dirpid: string): ColSliceInfo {.inline.} = (dir_pid, modules().tablite.modules.base.classes.SimplePageClass.next_id(dir_pid).to(string)) discard pbar.update(5) discard pbar.display() diff --git a/tablite/_nimlite/funcs/column_selector/infos.nim b/nimlite/funcs/column_selector/infos.nim similarity index 92% rename from tablite/_nimlite/funcs/column_selector/infos.nim rename to nimlite/funcs/column_selector/infos.nim index bc35180a..c99d6611 100644 --- a/tablite/_nimlite/funcs/column_selector/infos.nim +++ b/nimlite/funcs/column_selector/infos.nim @@ -1,7 +1,8 @@ import std/[tables] import nimpy as nimpy from std/sugar import collect -from ../../pymodules import builtins +import ../../pymodules +import ../../nimpyext from ../../pytypes import KindObjectND, str2ObjKind type ColSliceInfo* = (string, string) @@ -23,7 +24,7 @@ proc toPyObj*(infos: var OrderedTable[string, DesiredColumnInfo]): nimpy.PyObjec for (name, info) in infos.pairs: (name, (info.originalName, $info.`type`, info.allowEmpty)) - let res = builtins().dict(elems) + let res = modules().builtins.classes.DictClass!(elems) return res diff --git a/tablite/_nimlite/funcs/column_selector/makepage.nim b/nimlite/funcs/column_selector/makepage.nim similarity index 96% rename from tablite/_nimlite/funcs/column_selector/makepage.nim rename to nimlite/funcs/column_selector/makepage.nim index 26b05939..9e0882c1 100644 --- a/tablite/_nimlite/funcs/column_selector/makepage.nim +++ b/nimlite/funcs/column_selector/makepage.nim @@ -172,10 +172,10 @@ template makePage*[T: typed](dt: typedesc[T], page: BaseNDArray, mask: var seq[M continue buf[i * longest].addr.copyMem(addr str[0], str.len * sizeof(Rune)) - let res = T(shape: @[strCount], buf: buf, size: longest, kind: T.pageKind) + let res = T(shape: @[strCount], buf: buf, size: longest) elif T is ObjectNDArray: - let res = T(shape: @[buf.len], dtypes: dtypes, buf: buf, kind: T.pageKind) + let res = T(shape: @[buf.len], dtypes: dtypes, buf: buf) else: - let res = T(shape: @[buf.len], buf: buf, kind: T.pageKind) + let res = T(shape: @[buf.len], buf: buf) BaseNDArray res diff --git a/tablite/_nimlite/funcs/column_selector/mask.nim b/nimlite/funcs/column_selector/mask.nim similarity index 100% rename from tablite/_nimlite/funcs/column_selector/mask.nim rename to nimlite/funcs/column_selector/mask.nim diff --git a/tablite/_nimlite/funcs/column_selector/pagecasters.nim b/nimlite/funcs/column_selector/pagecasters.nim similarity index 99% rename from tablite/_nimlite/funcs/column_selector/pagecasters.nim rename to nimlite/funcs/column_selector/pagecasters.nim index daba08bd..2030c7f1 100644 --- a/tablite/_nimlite/funcs/column_selector/pagecasters.nim +++ b/nimlite/funcs/column_selector/pagecasters.nim @@ -6,7 +6,6 @@ from ../../pytypes import PY_ObjectND, KindObjectND import casters from mask import Mask from makepage import makePage, canBeNone -from ../../utils import corrupted, implement macro mkPageCaster(nBaseType: typedesc, overrides: untyped) = expectKind(nBaseType, nnkSym) diff --git a/tablite/_nimlite/funcs/column_selector/sliceconv.nim b/nimlite/funcs/column_selector/sliceconv.nim similarity index 99% rename from tablite/_nimlite/funcs/column_selector/sliceconv.nim rename to nimlite/funcs/column_selector/sliceconv.nim index b237f268..0890b740 100644 --- a/tablite/_nimlite/funcs/column_selector/sliceconv.nim +++ b/nimlite/funcs/column_selector/sliceconv.nim @@ -1,6 +1,5 @@ import std/[os, tables, paths, enumerate, sequtils] from std/sugar import collect -from std/strutils import parseInt import nimpy as nimpy import ../../numpy import ../../pytypes diff --git a/tablite/_nimlite/funcs/text_reader.nim b/nimlite/funcs/text_reader.nim similarity index 100% rename from tablite/_nimlite/funcs/text_reader.nim rename to nimlite/funcs/text_reader.nim diff --git a/tablite/_nimlite/funcs/text_reader/cli.nim b/nimlite/funcs/text_reader/cli.nim similarity index 100% rename from tablite/_nimlite/funcs/text_reader/cli.nim rename to nimlite/funcs/text_reader/cli.nim diff --git a/tablite/_nimlite/funcs/text_reader/csvparse.nim b/nimlite/funcs/text_reader/csvparse.nim similarity index 82% rename from tablite/_nimlite/funcs/text_reader/csvparse.nim rename to nimlite/funcs/text_reader/csvparse.nim index a27a90b7..6c6a6953 100644 --- a/tablite/_nimlite/funcs/text_reader/csvparse.nim +++ b/nimlite/funcs/text_reader/csvparse.nim @@ -3,7 +3,7 @@ import encfile # const NOT_SET = uint32.high const EOL = uint32.high - 1 -const field_limit: uint = 128 * 1024 +const fieldLimit: uint = 128 * 1024 type Quoting* {.pure.} = enum QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, @@ -27,16 +27,16 @@ type Dialect* = object type ReaderObj* = object - numeric_field: bool - line_num: uint + numericField: bool + lineNum: uint dialect: Dialect - field_len: uint - field_size: uint + fieldLen: uint + fieldSize: uint field: seq[char] fields: seq[string] - field_count: uint + fieldCount: uint proc newDialect*(delimiter: char = ',', quotechar: char = '"', escapechar: char = '\\', doublequote: bool = true, quoting: Quoting = QUOTE_MINIMAL, skipinitialspace: bool = false, skiptrailingspace: bool = false, lineterminator: char = '\n'): Dialect = Dialect(delimiter: delimiter, quotechar: quotechar, escapechar: escapechar, doublequote: doublequote, quoting: quoting, skipinitialspace: skipinitialspace, skiptrailingspace: skiptrailingspace, lineterminator: lineterminator) @@ -45,37 +45,37 @@ proc newReaderObj*(dialect: Dialect): ReaderObj = ReaderObj(dialect: dialect, fields: newSeq[string](1024)) proc parseGrowBuff(self: var ReaderObj): bool = - let field_size_new: uint = (if self.field_size > 0: 2u * self.field_size else: 4096u) + let field_size_new: uint = (if self.fieldSize > 0: 2u * self.fieldSize else: 4096u) self.field.setLen(field_size_new) - self.field_size = field_size_new + self.fieldSize = field_size_new return true proc parseAddChar(self: var ReaderObj, state: var ParserState, c: char): bool = - if self.field_len >= field_limit: + if self.fieldLen >= fieldLimit: return false - if unlikely(self.field_len == self.field_size and not self.parseGrowBuff()): + if unlikely(self.fieldLen == self.fieldSize and not self.parseGrowBuff()): return false - self.field[self.field_len] = c - inc self.field_len + self.field[self.fieldLen] = c + inc self.fieldLen return true proc parseSaveField(self: var ReaderObj, dia: Dialect): bool = - if self.numeric_field: - self.numeric_field = false + if self.numericField: + self.numericField = false - raise newException(Exception, "not yet implemented: parseSaveField numeric_field") + raise newException(Exception, "not yet implemented: parseSaveField numericField") - var field = newString(self.field_len) + var field = newString(self.fieldLen) - if likely(self.field_len > 0): - copyMem(field[0].addr, self.field[0].addr, self.field_len) + if likely(self.fieldLen > 0): + copyMem(field[0].addr, self.field[0].addr, self.fieldLen) - if unlikely(self.field_count + 1 >= (uint self.field.high)): + if unlikely(self.fieldCount + 1 >= (uint self.field.high)): self.field.setLen(self.field.len() * 2) if dia.skiptrailingspace: @@ -84,11 +84,11 @@ proc parseSaveField(self: var ReaderObj, dia: Dialect): bool = if dia.quoting != Quoting.QUOTE_NONE: field = field.multiReplace(("\n", "\\n"), ("\t", "\\t")) - self.fields[self.field_count] = field + self.fields[self.fieldCount] = field - inc self.field_count + inc self.fieldCount - self.field_len = 0 + self.fieldLen = 0 return true @@ -124,7 +124,7 @@ proc parseProcessChar(self: var ReaderObj, state: var ParserState, cc: uint32): return false else: if dia.quoting == QUOTE_NONNUMERIC: - self.numeric_field = true + self.numericField = true if unlikely(not self.parseAddChar(state, c)): return false state = IN_FIELD @@ -214,17 +214,17 @@ iterator parseCSV*(self: var ReaderObj, fh: BaseEncodedFile): (uint, ptr seq[str let dia = self.dialect var state: ParserState = START_RECORD - var line_num: uint = 0 + var lineNum: uint = 0 var line = newStringOfCap(80) var pos: uint var linelen: uint; - self.field_len = 0 - self.field_count = 0 + self.fieldLen = 0 + self.fieldCount = 0 while likely(not fh.endOfFile): if not fh.readLine(line): - if self.field_len != 0 and state == IN_QUOTED_FIELD: + if self.fieldLen != 0 and state == IN_QUOTED_FIELD: if dia.strict: raise newException(Exception, "unexpected end of data") elif self.parseSaveField(dia): @@ -246,21 +246,21 @@ iterator parseCSV*(self: var ReaderObj, fh: BaseEncodedFile): (uint, ptr seq[str raise newException(Exception, "illegal") if state == START_RECORD: - yield (line_num, addr self.fields, self.field_count) + yield (lineNum, addr self.fields, self.fieldCount) - self.field_count = 0 + self.fieldCount = 0 - inc line_num + inc lineNum -proc readColumns*(path: string, encoding: FileEncoding, dialect: Dialect, row_offset: uint): seq[string] = +proc readColumns*(path: string, encoding: FileEncoding, dialect: Dialect, rowOffset: uint): seq[string] = let fh = newFile(path, encoding) var obj = newReaderObj(dialect) try: - fh.setFilePos(int64 row_offset, fspSet) + fh.setFilePos(int64 rowOffset, fspSet) - for (row_idx, fields, field_count) in obj.parseCSV(fh): - return fields[0..field_count-1] + for (idxRow, fields, fieldCount) in obj.parseCSV(fh): + return fields[0..fieldCount-1] finally: fh.close() @@ -284,32 +284,32 @@ proc str2quoting*(quoting: string): Quoting {.inline.} = else: raise newException(Exception, "invalid quoting: " & quoting) proc findNewlinesNoQualifier*(fh: BaseEncodedFile): (seq[uint], uint) = - var newline_offsets = newSeq[uint](1) - var total_lines: uint = 0 + var newlineOffsets = newSeq[uint](1) + var totalLines: uint = 0 var str: string - newline_offsets[0] = fh.getFilePos() + newlineOffsets[0] = fh.getFilePos() while likely(fh.readLine(str)): - inc total_lines + inc totalLines - newline_offsets.add(fh.getFilePos()) + newlineOffsets.add(fh.getFilePos()) - return (newline_offsets, total_lines) + return (newlineOffsets, totalLines) proc findNewlinesQualifier*(fh: BaseEncodedFile, dia: Dialect): (seq[uint], uint) = - var newline_offsets = newSeq[uint](1) - var total_lines: uint = 0 + var newlineOffsets = newSeq[uint](1) + var totalLines: uint = 0 var obj = newReaderObj(dia) - newline_offsets[0] = fh.getFilePos() + newlineOffsets[0] = fh.getFilePos() - for (row_idx, fields, field_count) in obj.parseCSV(fh): - inc total_lines + for (idxRow, fields, fieldCount) in obj.parseCSV(fh): + inc totalLines - newline_offsets.add(fh.getFilePos()) + newlineOffsets.add(fh.getFilePos()) - return (newline_offsets, total_lines) + return (newlineOffsets, totalLines) proc findNewlines*(fh: BaseEncodedFile, dia: Dialect): (seq[uint], uint) {.inline.} = if dia.quoting == Quoting.QUOTE_NONE: diff --git a/tablite/_nimlite/funcs/text_reader/encfile.nim b/nimlite/funcs/text_reader/encfile.nim similarity index 81% rename from tablite/_nimlite/funcs/text_reader/encfile.nim rename to nimlite/funcs/text_reader/encfile.nim index d198fce6..524008cb 100644 --- a/tablite/_nimlite/funcs/text_reader/encfile.nim +++ b/nimlite/funcs/text_reader/encfile.nim @@ -43,49 +43,49 @@ proc readLine(f: FileConvertable, str: var string): bool = return res proc readLine(f: FileUTF16, str: var string): bool = - var ch_arr: array[2048, uint8] # must be divisible by 2 + var chArr: array[2048, uint8] # must be divisible by 2 var ch: uint16 - let newline_char: uint16 = 0x000a - var wchar_seq = newSeqOfCap[uint16](80) + const nlChar: uint16 = 0x000a + var wchSeq = newSeqOfCap[uint16](80) var file_offset = f.fh.getFilePos() - var elements = f.fh.readBuffer(addr ch_arr, 2) - var el_iter = 0 + var elements = f.fh.readBuffer(addr chArr, 2) + var itElem = 0 if (elements mod 2) != 0: raise newException(Exception, "malformed file") - while likely(el_iter < elements): + while likely(itElem < elements): if f.endianness == bigEndian: # big if true - (ch_arr[el_iter], ch_arr[el_iter+1]) = (ch_arr[el_iter+1], ch_arr[el_iter]) + (chArr[itElem], chArr[itElem+1]) = (chArr[itElem+1], chArr[itElem]) - ch = cast[uint16](ch_arr) + ch = cast[uint16](chArr) - el_iter = el_iter + 2 + itElem = itElem + 2 - if newline_char == ch: - if wchar_seq.len == 0: + if nlChar == ch: + if wchSeq.len == 0: str = "" # empty line return true break - wchar_seq.add(ch) + wchSeq.add(ch) - if el_iter >= elements: + if itElem >= elements: file_offset = f.fh.getFilePos() - elements = f.fh.readBuffer(addr ch_arr, 2) - el_iter = 0 + elements = f.fh.readBuffer(addr chArr, 2) + itElem = 0 if (elements mod 2) != 0: raise newException(Exception, "malformed file") - f.fh.setFilePos(file_offset + el_iter, fspSet) + f.fh.setFilePos(file_offset + itElem, fspSet) - var wstr = newWideCString(wchar_seq.len) + var wstr = newWideCString(wchSeq.len) - if wchar_seq.len > 0: - copyMem(wstr[0].addr, wchar_seq[0].addr, wchar_seq.len * 2) + if wchSeq.len > 0: + copyMem(wstr[0].addr, wchSeq[0].addr, wchSeq.len * 2) else: return false @@ -109,12 +109,12 @@ proc newFileUTF16(filename: string): FileUTF16 = if fh.getFileSize() mod 2 != 0: raise newException(Exception, "invalid size") - var bom_bytes: array[2, uint16] + var bomBytes: array[2, uint16] - if fh.readBuffer(addr bom_bytes, bom_bytes.len) != bom_bytes.len: + if fh.readBuffer(addr bomBytes, bomBytes.len) != bomBytes.len: raise newException(Exception, "cannot find bom") - var bom = cast[uint16](bom_bytes) + var bom = cast[uint16](bomBytes) var endianness: Endianness; if bom == 0xfeff: @@ -130,10 +130,10 @@ proc newFileUTF8(filename: string): FileUTF8 = let fh = open(filename, fmRead) var bom: array[3, uint8] - var bom_bytes = fh.readBytes(bom, 0, 3) + var bomBytes = fh.readBytes(bom, 0, 3) # detect bom - if bom_bytes != 3: + if bomBytes != 3: fh.setFilePos(0, FileSeekPos.fspSet) elif bom[0] != 0xEF or bom[1] != 0xBB or bom[2] != 0xBF: fh.setFilePos(0, FileSeekPos.fspSet) diff --git a/tablite/_nimlite/funcs/text_reader/paging.nim b/nimlite/funcs/text_reader/paging.nim similarity index 77% rename from tablite/_nimlite/funcs/text_reader/paging.nim rename to nimlite/funcs/text_reader/paging.nim index 3071cbb3..00080cfe 100644 --- a/tablite/_nimlite/funcs/text_reader/paging.nim +++ b/nimlite/funcs/text_reader/paging.nim @@ -14,37 +14,35 @@ type PageType = enum PG_DATETIME PG_DATE_SHORT -var none_str = "" +var noneStr = "" proc collectPageInfo*( obj: var ReaderObj, fh: var BaseEncodedFile, - guess_dtypes: bool, n_pages: int, row_count: int, - import_fields: var seq[uint] + guessDtypes: bool, nPages: int, rowCount: int, + importFields: var seq[uint] ): (uint, seq[uint], seq[Rank]) = var ranks: seq[Rank] - var longest_str = collect(newSeqOfCap(n_pages)): - for _ in 0..= 0 and row_idx >= (uint row_count): + for (idxRow, fields, fieldCount) in obj.parseCSV(fh): + if rowCount >= 0 and idxRow >= (uint rowCount): break var fidx = -1 - for idx in 0..= n_pages: + if fidx < 0 or fidx >= nPages: raise newException(Exception, "what") - if not guess_dtypes: - longest_str[fidx] = max(uint field.runeLen, longest_str[fidx]) + if not guessDtypes: + longestStr[fidx] = max(uint field.runeLen, longestStr[fidx]) else: let rank = addr ranks[fidx] let dt = rank[].updateRank(field.addr) if dt == DataTypes.DT_STRING: - longest_str[fidx] = max(uint field.runeLen, longest_str[fidx]) + longestStr[fidx] = max(uint field.runeLen, longestStr[fidx]) - for idx in (fidx+1)..= 0 and row_idx >= (uint row_count): + for (idxRow, fields, fieldCount) in obj.parseCSV(fh): + if rowCount >= 0 and idxRow >= (uint rowCount): break var fidx = -1 - for idx in 0.. 1: ")" else: ", )") & " buf: [" & buf & "])" +template simplePrint(T: typed, name: string): string = self.fullPrint(name, self.buf.join(", ")) + +method `$`(self: BaseNDArray): string {.base.} = implement("BaseNDArray.`$` must be implemented by inheriting class: " & $self.kind) +method `$`*(self: BooleanNDArray): string = self.simplePrint("boolean") +method `$`*(self: Int8NDArray): string = self.simplePrint("int8") +method `$`*(self: Int16NDArray): string = self.simplePrint("int16") +method `$`*(self: Int32NDArray): string = self.simplePrint("int32") +method `$`*(self: Int64NDArray): string = self.simplePrint("int64") +method `$`*(self: Float32NDArray): string = self.simplePrint("float32") +method `$`*(self: Float64NDArray): string = self.simplePrint("float64") +method `$`*(self: UnicodeNDArray): string = self.fullPrint("string<" & $self.size & ">", toSeq(self.pgIter).join(", ")) +method `$`*(self: DateNDArray): string = + let v = collect: + for v in self.buf: + v.format(fmtDate) + self.fullPrint("date", v.join(", ")) +method `$`*(self: DateTimeNDArray): string = + let v = collect: + for v in self.buf: + v.format(fmtDateTime) + self.fullPrint("datetime", v.join(", ")) +method `$`*(self: ObjectNDArray): string = + let v = collect: + for v in self.buf: + v.toRepr + self.fullPrint("object", v.join(", ")) + + proc `[]`(self: UnicodeNDArray, index: int): string = var chars = newSeqOfCap[Rune](self.size) let offset = self.size * index @@ -195,7 +237,7 @@ proc `[]`(self: UnicodeNDArray, slice: seq[int] | openArray[int]): UnicodeNDArra for (i, j) in enumerate(slice): buf[i * self.size].addr.copyMem(addr self.buf[j * self.size], self.size * sizeof(Rune)) - return UnicodeNDArray(shape: @[slice.len], size: self.size, buf: buf, kind: K_STRING) + return UnicodeNDArray(shape: @[slice.len], size: self.size, buf: buf) proc `[]`(self: ObjectNDArray, slice: seq[int] | openArray[int]): BaseNDArray = var dtypes = initTable[KindObjectND, int]() @@ -221,33 +263,33 @@ proc `[]`(self: ObjectNDArray, slice: seq[int] | openArray[int]): BaseNDArray = case baseType: of K_BOOLEAN: let newBuf = collect: (for v in buf: PY_Boolean(v).value) - return BooleanNDArray(shape: shape, buf: newBuf, kind: K_BOOLEAN) + return BooleanNDArray(shape: shape, buf: newBuf) of K_INT: let newBuf = collect: (for v in buf: int64 PY_Int(v).value) - return Int64NDArray(shape: shape, buf: newBuf, kind: K_INT64) + return Int64NDArray(shape: shape, buf: newBuf) of K_FLOAT: let newBuf = collect: (for v in buf: float64 PY_Float(v).value) - return Float64NDArray(shape: shape, buf: newBuf, kind: K_FLOAT64) + return Float64NDArray(shape: shape, buf: newBuf) of K_DATE: let newBuf = collect: (for v in buf: PY_Date(v).value) - return DateNDArray(shape: shape, buf: newBuf, kind: K_DATE) + return DateNDArray(shape: shape, buf: newBuf) of K_DATETIME: let newBuf = collect: (for v in buf: PY_DateTime(v).value) - return DateTimeNDArray(shape: shape, buf: newBuf, kind: K_DATETIME) + return DateTimeNDArray(shape: shape, buf: newBuf) of K_STRING: let newBuf = collect: (for v in buf: PY_String(v).value) return newBuf.newNDArray - of K_NONETYPE, K_TIME: + of K_NONETYPE, K_TIME: # nones and times are always treated as objects discard - return ObjectNDArray(shape: shape, buf: buf, kind: self.kind, dtypes: dtypes) + return ObjectNDArray(shape: shape, buf: buf, dtypes: dtypes) proc primitiveSlice[T: BooleanNDArray | Int8NDArray | Int16NDArray | Int32NDArray | Int64NDArray | Float32NDArray | Float64NDArray | DateNDArray | DateTimeNDArray](self: T, slice: seq[int] | openArray[int]): T = let buf = collect: for i in slice: self.buf[i] - return T(shape: @[buf.len], buf: buf, kind: self.kind) + return T(shape: @[buf.len], buf: buf) proc `[]`*[T: BaseNDArray](self: T, slice: seq[int] | openArray[int]): T = case self.kind: @@ -330,21 +372,6 @@ proc writeNumpyBool*(fh: var File, str: var string): void {.inline.} = proc writeNumpyBool*(fh: var File, value: var bool): void {.inline.} = fh.write(if value: '\x01' else: '\x00') -proc repr(self: ObjectNDArray): string = - let elems = collect: (for e in self.buf: $e) - return "ObjectNDArray(buf: @[" & elems.join(", ") & "], shape: " & $self.shape & ")" - -proc `$`*(self: BaseNDArray): string = - case self.kind: - of K_BOOLEAN: return repr(BooleanNDArray self) - of K_INT64: return repr(Int64NDArray self) - of K_FLOAT64: return repr(Float64NDArray self) - of K_STRING: return repr(UnicodeNDArray self) - of K_OBJECT: return repr(ObjectNDArray self) - of K_DATE: return repr(DateNDArray self) - of K_DATETIME: return repr(DateTimeNDArray self) - else: implement("BaseNDArray.`$`" & $self.kind) - proc validateHeader(fh: File, buf: var array[NUMPY_MAGIC_LEN, uint8], header: string, header_len: int): void {.inline.} = if fh.readBytes(buf, 0, header_len) != header_len: @@ -583,28 +610,27 @@ template readPrimitiveBuffer[T: typed](fh: var File, shape: var Shape): seq[T] = proc newBooleanNDArray(fh: var File, shape: var Shape): BooleanNDArray = return BooleanNDArray( buf: readPrimitiveBuffer[bool](fh, shape), - shape: shape, - kind: K_BOOLEAN + shape: shape ) template newIntNDArray(fh: var File, endianness: Endianness, size: int, shape: var Shape) = case size: - of 1: Int8NDArray(buf: readPrimitiveBuffer[int8](fh, shape), shape: shape, kind: K_INT8) - of 2: Int16NDArray(buf: readPrimitiveBuffer[int16](fh, shape), shape: shape, kind: K_INT16) - of 4: Int32NDArray(buf: readPrimitiveBuffer[int32](fh, shape), shape: shape, kind: K_INT32) - of 8: Int64NDArray(buf: readPrimitiveBuffer[int64](fh, shape), shape: shape, kind: K_INT64) + of 1: Int8NDArray(buf: readPrimitiveBuffer[int8](fh, shape), shape: shape) + of 2: Int16NDArray(buf: readPrimitiveBuffer[int16](fh, shape), shape: shape) + of 4: Int32NDArray(buf: readPrimitiveBuffer[int32](fh, shape), shape: shape) + of 8: Int64NDArray(buf: readPrimitiveBuffer[int64](fh, shape), shape: shape) else: raise newException(IOError, "unsupported int size: " & $size) proc newDateArray_Days(fh: var File, endianness: Endianness, shape: var Shape): DateNDArray {.inline.} = let buf = collect: (for v in readPrimitiveBuffer[int64](fh, shape): days2Date(v)) - return DateNDArray(buf: buf, shape: shape, kind: K_DATE) + return DateNDArray(buf: buf, shape: shape) proc newDateTimeArray_Seconds(fh: var File, endianness: Endianness, shape: var Shape): DateTimeNDArray {.inline.} = let data = readPrimitiveBuffer[int64](fh, shape) let buf = collect: (for v in data: initTime(v, 0).utc()) - return DateTimeNDArray(buf: buf, shape: shape, kind: K_DATETIME) + return DateTimeNDArray(buf: buf, shape: shape) proc newDateTimeArray_Miliseconds(fh: var File, endianness: Endianness, shape: var Shape): DateTimeNDArray {.inline.} = let data = readPrimitiveBuffer[int64](fh, shape) @@ -613,7 +639,7 @@ proc newDateTimeArray_Miliseconds(fh: var File, endianness: Endianness, shape: v let (s, m) = divmod(v, 1000) initTime(s, m * 1000).utc() - return DateTimeNDArray(buf: buf, shape: shape, kind: K_DATETIME) + return DateTimeNDArray(buf: buf, shape: shape) proc newDateTimeArray_Microseconds(fh: var File, endianness: Endianness, shape: var Shape): DateTimeNDArray {.inline.} = let data = readPrimitiveBuffer[int64](fh, shape) @@ -622,12 +648,12 @@ proc newDateTimeArray_Microseconds(fh: var File, endianness: Endianness, shape: let (s, u) = divmod(v, 1_000_000) initTime(s, u).utc() - return DateTimeNDArray(buf: buf, shape: shape, kind: K_DATETIME) + return DateTimeNDArray(buf: buf, shape: shape) template newFloatNDArray(fh: var File, endianness: Endianness, size: int, shape: var Shape) = case size: - of 4: Float32NDArray(buf: readPrimitiveBuffer[float32](fh, shape), shape: shape, kind: K_FLOAT32) - of 8: Float64NDArray(buf: readPrimitiveBuffer[float64](fh, shape), shape: shape, kind: K_FLOAT64) + of 4: Float32NDArray(buf: readPrimitiveBuffer[float32](fh, shape), shape: shape) + of 8: Float64NDArray(buf: readPrimitiveBuffer[float64](fh, shape), shape: shape) else: raise newException(IOError, "unsupported float size: " & $size) proc newUnicodeNDArray(fh: var File, endianness: Endianness, size: int, shape: var Shape): UnicodeNDArray = @@ -639,7 +665,7 @@ proc newUnicodeNDArray(fh: var File, endianness: Endianness, size: int, shape: v if fh.readBuffer(addr buf[0], buf_size) != buf_size: raise newException(IOError, "malformed unicode buffer") - return UnicodeNDArray(buf: buf, shape: shape, size: size, kind: K_STRING) + return UnicodeNDArray(buf: buf, shape: shape, size: size) proc newObjectNDArray(fh: var File, endianness: Endianness, shape: var Shape): ObjectNDArray = var elements = calcShapeElements(shape) @@ -648,7 +674,7 @@ proc newObjectNDArray(fh: var File, endianness: Endianness, shape: var Shape): O if calcShapeElements(shape) != elements: raise newException(IOError, "invalid object array shape " & $shape & "(" & $calcShapeElements(shape) & ") != " & $elements) - return ObjectNDArray(shape: shape, buf: buf, dtypes: dtypes, kind: K_OBJECT) + return ObjectNDArray(shape: shape, buf: buf, dtypes: dtypes) proc readPageInfo(fh: var File): (NDArrayDescriptor, bool, Shape) = var header_bytes: array[NUMPY_MAGIC_LEN, uint8] @@ -726,87 +752,59 @@ proc toNumpyPrimitive[T: bool | int8 | int16 | int32 | int64 | float32 | float64 else: raise newException(IOError, "invalid primitive type: " & T.name) -proc toPython(self: BooleanNDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[bool](self.shape, addr self.buf[0]) +method toNimpy(self: PY_ObjectND): nimpy.PyObject {.base.} = implement("must be implemented by inheriting class") +method toNimpy(self: PY_NoneType): nimpy.PyObject = nil +method toNimpy(self: PY_Boolean): nimpy.PyObject = modules().builtins.classes.BoolClass!(self.value) +method toNimpy(self: PY_Int): nimpy.PyObject = modules().builtins.classes.IntClass!(self.value) +method toNimpy(self: PY_Float): nimpy.PyObject = modules().builtins.classes.FloatClass!(self.value) +method toNimpy(self: PY_String): nimpy.PyObject = modules().builtins.classes.StrClass!(self.value) +method toNimpy(self: PY_Date): nimpy.PyObject = modules().datetime.classes.DateClass!(self.value.year, self.value.month, self.value.monthday) +method toNimpy(self: PY_Time): nimpy.PyObject = + let hour = self.getHour() + let minute = self.getMinute() + let second = self.getSecond() + let microsecond = self.getMicrosecond() -proc toPython(self: Int8NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int8](self.shape, addr self.buf[0]) -proc toPython(self: Int16NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int16](self.shape, addr self.buf[0]) -proc toPython(self: Int32NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int32](self.shape, addr self.buf[0]) -proc toPython(self: Int64NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int64](self.shape, addr self.buf[0]) + return modules().datetime.classes.TimeClass!( + hour: hour, minute: minute, second: second, microsecond: microsecond + ) -proc toPython(self: Float32NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[float32](self.shape, addr self.buf[0]) -proc toPython(self: Float64NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[float64](self.shape, addr self.buf[0]) +method toNimpy(self: PY_DateTime): nimpy.PyObject = + return modules().datetime.classes.DateTimeClass!( + self.value.year, self.value.month, self.value.monthday, + self.value.hour, self.value.minute, self.value.second, int(self.value.nanosecond / 1000) + ) -proc toPython(self: UnicodeNDArray): nimpy.PyObject = toNumpyPrimitive(gendtypeStr(self.size), self.shape, self.size * sizeof(Rune), addr self.buf[0]) -proc toPython(self: DateNDArray): nimpy.PyObject = +method toPython*(self: BaseNDArray): nimpy.PyObject {.inline, base.} = implement("must be implemented by inheriting class") +method toPython*(self: BooleanNDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[bool](self.shape, addr self.buf[0]) +method toPython*(self: Int8NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int8](self.shape, addr self.buf[0]) +method toPython*(self: Int16NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int16](self.shape, addr self.buf[0]) +method toPython*(self: Int32NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int32](self.shape, addr self.buf[0]) +method toPython*(self: Int64NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[int64](self.shape, addr self.buf[0]) +method toPython*(self: Float32NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[float32](self.shape, addr self.buf[0]) +method toPython*(self: Float64NDArray): nimpy.PyObject {.inline.} = toNumpyPrimitive[float64](self.shape, addr self.buf[0]) +method toPython*(self: UnicodeNDArray): nimpy.PyObject = toNumpyPrimitive(gendtypeStr(self.size), self.shape, self.size * sizeof(Rune), addr self.buf[0]) +method toPython*(self: DateNDArray): nimpy.PyObject = var buf = collect: for el in self.buf: el.toTime.time2Duration.inDays return toNumpyPrimitive(self.dtype, self.shape, sizeof(int64), addr buf[0]) -proc toPython(self: DateTimeNDArray): nimpy.PyObject = +method toPython*(self: DateTimeNDArray): nimpy.PyObject = var buf = collect: for el in self.buf: el.toTime.time2Duration.inMicroseconds return toNumpyPrimitive(self.dtype, self.shape, sizeof(int64), addr buf[0]) -proc toNimpy(self: PY_NoneType): nimpy.PyObject = pymodules.builtins().None -proc toNimpy(self: PY_Boolean): nimpy.PyObject = pymodules.builtins().bool(self.value) -proc toNimpy(self: PY_Int): nimpy.PyObject = pymodules.builtins().int(self.value) -proc toNimpy(self: PY_Float): nimpy.PyObject = pymodules.builtins().float(self.value) -proc toNimpy(self: PY_String): nimpy.PyObject = pymodules.builtins().str(self.value) -proc toNimpy(self: PY_Date): nimpy.PyObject = - return pymodules.datetime().date(self.value.year, self.value.month, self.value.monthday) -proc toNimpy(self: PY_Time): nimpy.PyObject = - let hour = self.getHour() - let minute = self.getMinute() - let second = self.getSecond() - let microsecond = self.getMicrosecond() - - return pymodules.datetime().time( - hour = hour, minute = minute, second = second, microsecond = microsecond - ) - -proc toNimpy(self: PY_DateTime): nimpy.PyObject = - return pymodules.datetime().datetime( - self.value.year, self.value.month, self.value.monthday, - self.value.hour, self.value.minute, self.value.second, int(self.value.nanosecond / 1000) - ) - -proc toNimpy(self: PY_ObjectND): nimpy.PyObject = - case self.kind: - of K_NONETYPE: return PY_NoneType(self).toNimpy() - of K_BOOLEAN: return PY_Boolean(self).toNimpy() - of K_INT: return PY_Int(self).toNimpy() - of K_FLOAT: return PY_Float(self).toNimpy() - of K_STRING: return PY_String(self).toNimpy() - of K_DATE: return PY_Date(self).toNimpy() - of K_TIME: return PY_Time(self).toNimpy() - of K_DATETIME: return PY_DateTime(self).toNimpy() - -proc toPython(self: ObjectNDArray): nimpy.PyObject = +method toPython*(self: ObjectNDArray): nimpy.PyObject = let buf = collect: for el in self.buf: el.toNimpy() - return numpy().array(buf) - - -proc toPython*(self: BaseNDArray): nimpy.PyObject = - case self.kind: - of K_BOOLEAN: return BooleanNDArray(self).toPython() - of K_INT8: return Int8NDArray(self).toPython() - of K_INT16: return Int16NDArray(self).toPython() - of K_INT32: return Int32NDArray(self).toPython() - of K_INT64: return Int64NDArray(self).toPython() - of K_FLOAT32: return Float32NDArray(self).toPython() - of K_FLOAT64: return Float64NDArray(self).toPython() - of K_DATE: return DateNDArray(self).toPython() - of K_DATETIME: return DateTimeNDArray(self).toPython() - of K_STRING: return UnicodeNDArray(self).toPython() - of K_OBJECT: return ObjectNDArray(self).toPython() + return modules().numpy.classes.NdArrayClass!(buf) proc getPageLen*(fh: var File): int = var (_, _, shape) = readPageInfo(fh) @@ -946,7 +944,7 @@ proc saveAsUnicode(self: ObjectNDArray, path: string): void = let elements = uint calcShapeElements(self.shape) var fh = open(path, fmWrite) - + fh.writeNumpyHeader(gendtypeStr(longest), elements) for v in self.pgIter: @@ -959,9 +957,9 @@ proc save(self: ObjectNDArray, path: string): void = for (k, v) in self.dtypes.pairs: if v == 0: continue {k: v} - + var hasNones = K_NONETYPE in dtypes - + if not hasNones: # we have no nones, we may be able to save as primitive var colDtypes = toSeq(dtypes.keys) @@ -979,8 +977,8 @@ proc save(self: ObjectNDArray, path: string): void = of K_STRING: self.saveAsUnicode(path); return of K_DATE: self.saveAsPrimitive(path, DateNDArray.headerType, writeDate); return of K_DATETIME: self.saveAsPrimitive(path, DateTimeNDArray.headerType, writeDateTime); return - of K_NONETYPE: discard # can't happen - of K_TIME: discard # time is always an object + of K_NONETYPE: discard # can't happen + of K_TIME: discard # time is always an object let dtype = self.dtype let elements = uint calcShapeElements(self.shape) @@ -1007,14 +1005,14 @@ proc newNDArray*(arr: seq[string] | openArray[string] | iterator(): string): Uni longest = max(longest, res.len) page_len = page_len + 1 res - + let shape = @[page_len] let buf = newSeq[Rune](longest * page_len) for (i, str) in enumerate(runes): buf[i * longest].addr.copyMem(addr str[0], str.len * sizeof(Rune)) - return UnicodeNDArray(shape: shape, buf: buf, size: longest, kind: K_STRING) + return UnicodeNDArray(shape: shape, buf: buf, size: longest) proc save*(self: BaseNDArray, path: string): void = case self.kind: @@ -1030,28 +1028,55 @@ proc save*(self: BaseNDArray, path: string): void = of K_DATETIME: DateTimeNDArray(self).save(path) of K_OBJECT: ObjectNDArray(self).save(path) +proc save*(self: BaseNDArray, page: nimpy.PyObject): void = + let m = modules() + + if not m.isinstance(page, m.tablite.modules.base.classes.SimplePageClass): + raise newException(ValueError, "must be a page") + + let path = m.toStr(page.path) + + self.save(path) + proc type2PyType(`type`: KindObjectND): nimpy.PyObject = + let m = modules() + case `type`: - of K_BOOLEAN: return pymodules.builtins().getattr("bool") # nim's word reservation behaviour is stupid - of K_INT: return pymodules.builtins().getattr("int") # ditto - of K_FLOAT: return pymodules.builtins().getattr("float") # ditto - of K_STRING: return pymodules.builtins().str - of K_NONETYPE: return pymodules.PyNoneClass - of K_DATE: return pymodules.datetime().date - of K_TIME: return pymodules.datetime().time - of K_DATETIME: return pymodules.datetime().datetime + of K_BOOLEAN: return m.builtins.classes.BoolClass + of K_INT: return m.builtins.classes.IntClass + of K_FLOAT: return m.builtins.classes.FloatClass + of K_STRING: return m.builtins.classes.StrClass + of K_NONETYPE: return m.builtins.classes.NoneTypeClass + of K_DATE: return m.datetime.classes.DateClass + of K_TIME: return m.datetime.classes.TimeClass + of K_DATETIME: return m.datetime.classes.DateTimeClass proc newPyPage*(id: string, path: string, len: int, dtypes: Table[KindObjectND, int]): nimpy.PyObject = - let pyDtypes = pymodules.builtins().dict() - + let pyDtypes = modules().builtins.classes.DictClass!() + for (dt, n) in dtypes.pairs: let obj = dt.type2PyType() pyDtypes[obj] = n - let pg = pymodules.tabliteBase().SimplePage(id, path, len, pyDtypes) + let pg = modules().tablite.modules.base.classes.SimplePageClass!(id, path, len, pyDtypes) return pg +proc newPyPage*(self: BaseNDArray, workdir: string, pid: string): nimpy.PyObject = newPyPage(pid, workdir, self.len, self.getPageTypes) +proc newPyPage*(self: BaseNDArray, workdir: string): nimpy.PyObject = + let pid = modules().tablite.modules.base.classes.SimplePageClass.next_id(workdir).to(string) + + return self.newPyPage(workdir, pid) + +proc newPyPage*(self: BaseNDArray): nimpy.PyObject = + let tabliteConfig = modules().tablite.modules.config.classes.Config + let wpid = tabliteConfig.pid.to(string) + let tablitDir = Path(modules().builtins.toStr(tabliteConfig.workdir)) + let workdir = string (tablitDir / Path(wpid)) + let pid = modules().tablite.modules.base.classes.SimplePageClass.next_id(workdir).to(string) + + return self.newPyPage(workdir, pid) + proc calcRepaginationSteps(shapes: seq[int] | var seq[int] | ptr seq[int], pageSize: int): (seq[seq[(int, int, int)]], int) = var colLen = 0 @@ -1061,7 +1086,7 @@ proc calcRepaginationSteps(shapes: seq[int] | var seq[int] | ptr seq[int], pageS else: for v in shapes: colLen = colLen + v - + let resultPages = int ceil(colLen / pageSize) var steps = collect: (for _ in 0.. 0: + raise newException(ValueError, "invalid page kind, expected only '" & $expected & "' but got: " & $types) + +iterator iterateIntPage(page: BaseNDArray): int = + template collectValues(page: typed) = + for v in page.pgIter: + yield int v + + case page.kind: + of K_INT8: Int8NDArray(page).collectValues + of K_INT16: Int16NDArray(page).collectValues + of K_INT32: Int32NDArray(page).collectValues + of K_INT64: Int64NDArray(page).collectValues + of K_OBJECT: + page.validatePageKind(K_INT) + for v in ObjectNDArray(page).pgIter: + yield PY_Int(v).value + + else: raise newException(ValueError, "invalid page type: " & $page.kind) + +iterator iterateFloatPage(page: BaseNDArray): float = + template collectValues(page: typed) = + for v in page.pgIter: + yield float v + + case page.kind: + of K_FLOAT32: Float32NDArray(page).collectValues + of K_FLOAT64: Float64NDArray(page).collectValues + of K_OBJECT: + page.validatePageKind(K_FLOAT) + for v in ObjectNDArray(page).pgIter: + yield PY_Float(v).value + + else: raise newException(ValueError, "invalid page type: " & $page.kind) + +iterator iterateBooleanPage(page: BaseNDArray): bool = + case page.kind: + of K_BOOLEAN: + for v in BooleanNDArray(page).pgIter: + yield v + of K_OBJECT: + page.validatePageKind(K_BOOLEAN) + for v in ObjectNDArray(page).pgIter: + yield PY_Boolean(v).value + else: raise newException(ValueError, "invalid page type: " & $page.kind) + + +iterator iterateStringPage(page: BaseNDArray): string = + case page.kind: + of K_STRING: + for v in UnicodeNDArray(page).pgIter: + yield v + of K_OBJECT: + page.validatePageKind(K_STRING) + for v in ObjectNDArray(page).pgIter: + yield PY_String(v).value + else: raise newException(ValueError, "invalid page type: " & $page.kind) + +iterator iterateObjectPage(page: BaseNDArray): PY_ObjectND = + case page.kind: + of K_BOOLEAN: (for v in BooleanNDArray(page).pgIter: yield newPY_Object(v)) + of K_INT8: (for v in Int8NDArray(page).pgIter: yield newPY_Object(v)) + of K_INT16: (for v in Int16NDArray(page).pgIter: yield newPY_Object(v)) + of K_INT32: (for v in Int32NDArray(page).pgIter: yield newPY_Object(v)) + of K_INT64: (for v in Int64NDArray(page).pgIter: yield newPY_Object(v)) + of K_FLOAT32: (for v in Float32NDArray(page).pgIter: yield newPY_Object(v)) + of K_FLOAT64: (for v in Float64NDArray(page).pgIter: yield newPY_Object(v)) + of K_STRING: (for v in UnicodeNDArray(page).pgIter: yield newPY_Object(v)) + of K_DATE: (for v in DateNDArray(page).pgIter: yield newPY_Object(v, K_DATE)) + of K_DATETIME: (for v in DateTimeNDArray(page).pgIter: yield newPY_Object(v, K_DATETIME)) + of K_OBJECT: (for v in ObjectNDArray(page).pgIter: yield v) + +iterator iterateColumn*[T: bool | int | float | string | PY_ObjectND](column: seq[string]): T = + + for pgPath in column: + let page = readNumpy(pgPath) + for v in ( + when T is bool: page.iterateBooleanPage + elif T is int: page.iterateIntPage + elif T is float: page.iterateFloatPage + elif T is string: page.iterateStringPage + elif T is PY_ObjectND: page.iterateObjectPage + else: + raise newException(FieldDefect, "unsupported column type: " & T.name) + ): + yield v + +iterator iterateColumn*(column: seq[string], kind: KindObjectND): DateTime = + for pgPath in column: + let page = readNumpy(pgPath) + page.validatePageKind(kind) + + case page.kind: + of K_DATE: (for v in DateNDArray(page).pgIter: yield v) + of K_DATETIME: (for v in DateTimeNDArray(page).pgIter: yield v) + of K_OBJECT: + case kind: + of K_DATE: (for v in ObjectNDArray(page).pgIter: yield PY_Date(v).value) + of K_DATETIME: (for v in ObjectNDArray(page).pgIter: yield PY_DateTime(v).value) + else: discard + else: discard + +iterator iterateColumn*[T: bool | int | float | string | PY_ObjectND](column: nimpy.PyObject): T = + for v in iterateColumn[T](modules().tablite.modules.base.collectPages(column)): + yield v + +iterator iterateColumn*(column: nimpy.PyObject, kind: KindObjectND): DateTime = + for v in iterateColumn(modules().tablite.modules.base.collectPages(column), kind): + yield v when isMainModule and appType != "lib": - let workdir = Path(pymodules.builtins().str(pymodules.tabliteConfig().Config.workdir).to(string)) + let tabliteConfig = modules().tablite.modules.config.classes.Config + # let workdir = Path(modules().toStr(tabliteConfig.workdir)) let pid = "nim" - let pagedir = workdir / Path(pid) / Path("pages") + # let pagedir = workdir / Path(pid) / Path("pages") - createDir(string pagedir) + #[ + getCurrentDir() is missing + I'M GOING TO SHOOT UP A WALLMART. In Minecraft. + ]# - pymodules.tabliteConfig().Config.pid = pid - pymodules.tabliteConfig().Config.PAGE_SIZE = 2 + echo readNumpy("tests/data/pages/scalar.npy").len - let columns = pymodules.builtins().dict({"A": @[1, 22, 333, 4444, 55555, 666666, 7777777]}.toTable) - let table = pymodules.tablite().Table(columns = columns) - let pages = collect: (for p in table["A"].pages: pymodules.builtins().str(p.path).to(string)) + # createDir(string pagedir) + + tabliteConfig.pid = pid + tabliteConfig.PAGE_SIZE = 2 + + let columns = modules().builtins.classes.DictClass!({"A": @["1", "22", "333", "4444", "55555", "666666", "7777777"]}.toTable) + let table = modules().tablite.classes.TableClass!(columns = columns) + let pages = collect: (for p in table["A"].pages: modules().toStr(p.path)) let newPages = repaginate(pages) - echo newPages \ No newline at end of file + echo newPages + + for i in toSeq(iterateColumn[string](table["A"])): + echo i + + echo newNDArray[DateNDArray](@[now().utc]) + echo newNDArray[DateTimeNDArray](@[now().utc]) + echo newNDArray(@[false, false, true]) + echo newNDArray(@[1, 2, 3]) + echo newNDArray(@[1.0, 2.0, 3.0]) + echo newNDArray(@["a", "bb", "ccc"]) + echo newNDArray(@[newPY_Object()]) \ No newline at end of file diff --git a/tablite/_nimlite/pickleproto.nim b/nimlite/pickleproto.nim similarity index 100% rename from tablite/_nimlite/pickleproto.nim rename to nimlite/pickleproto.nim diff --git a/tablite/_nimlite/pickling.nim b/nimlite/pickling.nim similarity index 100% rename from tablite/_nimlite/pickling.nim rename to nimlite/pickling.nim diff --git a/nimlite/pymodules.nim b/nimlite/pymodules.nim new file mode 100644 index 00000000..41ab6ef3 --- /dev/null +++ b/nimlite/pymodules.nim @@ -0,0 +1,168 @@ +from std/os import getEnv +from std/strutils import split +from std/sugar import collect +import nimpy +import std/options + +type PyModule[T] {.requiresInit.} = ref object of RootObj + module*: nimpy.PyObject + classes*: T + +type PyDeepModule[T, K] {.requiresInit.} = ref object of PyModule[T] + modules*: K + +type PyEmpty = object +type PyEmptyModule = ref object of PyModule[PyEmpty] + +type PyNumpy {.requiresInit.} = object + NdArrayClass*: nimpy.PyObject + +type PyBuiltins {.requiresInit.} = object + NoneTypeClass*: nimpy.PyObject + DictClass*: nimpy.PyObject + ListClass*: nimpy.PyObject + BoolClass*: nimpy.PyObject + IntClass*: nimpy.PyObject + FloatClass*: nimpy.PyObject + StrClass*: nimpy.PyObject + +type PyDatetime {.requiresInit.} = object + DateClass*: nimpy.PyObject + TimeClass*: nimpy.PyObject + DateTimeClass*: nimpy.PyObject + +type PyTablite {.requiresInit.} = object + TableClass*: nimpy.PyObject + +type PyMplite {.requiresInit.} = object + TaskManager*: nimpy.PyObject + +type PyTabliteConfig {.requiresInit.} = object + Config*: nimpy.PyObject + +type PyTabliteBase {.requiresInit.} = object + ColumnClass*: nimpy.PyObject + SimplePageClass*: nimpy.PyObject + +type PyTabliteSubModules {.requiresInit.} = object + config*: PyModule[PyTabliteConfig] + base*: PyModule[PyTabliteBase] + +type PyTqdm {.requiresInit.} = object + TqdmClass*: nimpy.PyObject + +type PyModules {.requiresInit.} = object + sys*: PyEmptyModule + builtins*: PyModule[PyBuiltins] + datetime*: PyModule[PyDatetime] + tablite*: PyDeepModule[PyTablite, PyTabliteSubModules] + numpy*: PyModule[PyNumpy] + mplite*: PyModule[PyMplite] + nimlite*: PyEmptyModule + tqdm*: PyModule[PyTqdm] + +proc newModule[K, T](Class: typedesc[K], module: nimpy.PyObject, classes: T): K {.inline.} = Class(module: module, classes: classes) +proc newModule[K, T1, T2](Class: typedesc[K], module: nimpy.PyObject, classes: T1, modules: T2): K {.inline.} = Class(module: module, classes: classes, modules: modules) +proc newEmptyModule(module: nimpy.PyObject): PyEmptyModule {.inline.} = PyEmptyModule.newModule(module, PyEmpty()) + +var py = none[PyModules]() + +proc importPy(): void = + if py.isSome: + return + + let envs = getEnv("NIM_PYTHON_MODULES", "").split(":") + let iSys = nimpy.pyImport("sys") + + discard iSys.path.extend(envs) + + let iBuiltins = nimpy.pyBuiltinsModule() + + let iDateTime = nimpy.pyImport("datetime") + let iTablite = nimpy.pyImport("tablite") + let iTabliteBase = nimpy.pyImport("tablite.base") + let iTabliteConfig = nimpy.pyImport("tablite.config") + let iMplite = nimpy.pyImport("mplite") + let iNumpy = nimpy.pyImport("numpy") + let iTqdm = nimpy.pyImport("tqdm") + let iNimlite = nimpy.pyImport("tablite.nimlite") + + let iPyBuiltins = PyBuiltins( + NoneTypeClass: iBuiltins.None.getattr("__class__"), + DictClass: iBuiltins.getattr("dict"), + ListClass: iBuiltins.getattr("list"), + BoolClass: iBuiltins.getattr("bool"), + IntClass: iBuiltins.getattr("int"), + FloatClass: iBuiltins.getattr("float"), + StrClass: iBuiltins.getattr("str"), + ) + + let iPyDateTime = PyDatetime( + DateClass: iDateTime.date, + TimeClass: iDateTime.time, + DateTimeClass: iDateTime.datetime, + ) + + let iPyTablite = PyTablite(TableClass: iTablite.Table) + let iPyTabliteConf = PyTabliteConfig(Config: iTabliteConfig.Config) + let iPyTabliteBase = PyTabliteBase(ColumnClass: iTabliteBase.Column, SimplePageClass: iTabliteBase.SimplePage) + let iPyTabliteSub = PyTabliteSubModules( + config: PyModule[PyTabliteConfig].newModule(iTabliteConfig, iPyTabliteConf), + base: PyModule[PyTabliteBase].newModule(iTabliteBase, iPyTabliteBase) + ) + + let iPyNumpy = PyNumpy(NdArrayClass: iBuiltins.getattr(iNumpy, "array")) + let iPyMplite = PyMplite(TaskManager: iBuiltins.getattr(iMplite, "TaskManager")) + let iPyTqdm = PyTqdm(TqdmClass: iBuiltins.getattr(iTqdm, "tqdm")) + + let pyModules = PyModules( + sys: newEmptyModule(iSys), + builtins: PyModule[PyBuiltins].newModule(iBuiltins, iPyBuiltins), + numpy: PyModule[PyNumpy].newModule(iNumpy, iPyNumpy), + datetime: PyModule[PyDatetime].newModule(iDateTime, iPyDateTime), + tablite: PyDeepModule[PyTablite, PyTabliteSubModules].newModule(iTablite, iPyTablite, iPyTabliteSub), + mplite: PyModule[PyMplite].newModule(iMplite, iPyMplite), + nimlite: newEmptyModule(iNimlite), + tqdm: PyModule[PyTqdm].newModule(iTqdm, iPyTqdm), + ) + + py = some(pyModules) + + +proc modules*(): PyModules = + importPy() + + return py.get + + +proc isinstance*(inst: PyModule[PyBuiltins], obj: PyObject, other: nimpy.PyObject): bool {.inline.} = inst.module.isinstance(obj, other).to(bool) +proc getAttr*(inst: PyModule[PyBuiltins], attr: string): PyObject {.inline.} = inst.module.getattr(inst.module, attr) +proc getAttr*(inst: PyModule[PyBuiltins], obj: PyObject, attr: string): PyObject {.inline.} = inst.module.getattr(obj, attr) +proc getType*(inst: PyModule[PyBuiltins], obj: PyObject): PyObject {.inline.} = inst.module.type(obj) +proc getTypeName*(inst: PyModule[PyBuiltins], obj: PyObject): string {.inline.} = inst.getAttr(inst.getType(obj), "__name__").to(string) +proc toStr*(inst: PyModule[PyBuiltins], obj: PyObject): string {.inline.} = inst.module.str(obj).to(string) +proc toRepr*(inst: PyModule[PyBuiltins], obj: PyObject): string {.inline.} = inst.module.repr(obj).to(string) +proc getLen*(inst: PyModule[PyBuiltins], obj: PyObject): int {.inline.} = inst.module.len(obj).to(int) + +proc fromFile*(inst: PyModule[PyTablite], path: string): PyObject {.inline.} = inst.classes.TableClass.from_file(path) +proc collectPages*(inst: PyModule[PyTabliteBase], column: PyObject): seq[string] {.inline.} = + let builtins = modules().builtins + + if not builtins.isinstance(column, inst.classes.ColumnClass): + raise newException(ValueError, "not a column") + + return collect: + for p in column.pages: + builtins.toStr(p.path) + +proc isinstance*(self: PyModules, obj: PyObject, other: nimpy.PyObject): bool {.inline.} = self.builtins.isinstance(obj, other) +proc getAttr*(self: PyModules, obj: PyObject, attr: string): PyObject {.inline.} = self.builtins.getAttr(obj, attr) +proc getAttr*(self: PyModules, attr: string): PyObject {.inline.} = self.builtins.getAttr(attr) +proc getType*(self: PyModules, obj: PyObject): PyObject {.inline.} = self.builtins.getType(obj) +proc getTypeName*(self: PyModules, obj: PyObject): string {.inline.} = self.builtins.getTypeName(obj) +proc toStr*(self: PyModules, obj: PyObject): string {.inline.} = self.builtins.toStr(obj) +proc toRepr*(self: PyModules, obj: PyObject): string {.inline.} = self.builtins.toRepr(obj) +proc getLen*(self: PyModules, obj: PyObject): int {.inline.} = self.builtins.getLen(obj) + + +proc isNone*(obj: PyObject): bool {.inline.} = modules().builtins.isinstance(obj, py.get.builtins.classes.NoneTypeClass) diff --git a/tablite/_nimlite/pytypes.nim b/nimlite/pytypes.nim similarity index 84% rename from tablite/_nimlite/pytypes.nim rename to nimlite/pytypes.nim index aa92d766..87f6e61f 100644 --- a/tablite/_nimlite/pytypes.nim +++ b/nimlite/pytypes.nim @@ -1,6 +1,7 @@ from std/tables import Table import std/times import dateutils +from ./utils import implement const fmtDate* = initTimeFormat("yyyy-MM-dd") const fmtDateTime* = initTimeFormat("yyyy-MM-dd HH:mm:ss") @@ -59,16 +60,15 @@ type Py_Dict* = ref object of Py_Object let PY_None* = PY_NoneType(kind: KindObjectND.K_NONETYPE) -proc toRepr*(self: PY_ObjectND): string {.inline.} = - case self.kind: - of K_NONETYPE: "None" - of K_BOOLEAN: $PY_Boolean(self).value - of K_INT: $PY_Int(self).value - of K_FLOAT: $PY_Float(self).value - of K_STRING: $PY_String(self).value - of K_DATE: PY_Date(self).value.format(fmtDate) - of K_TIME: $PY_Time(self).value.duration2Date.format(fmtTime) - of K_DATETIME: PY_DateTime(self).value.format(fmtDateTime) +method toRepr*(self: PY_ObjectND): string {.base, inline.} = implement("PY_ObjectND.`$` must be implemented by inheriting class: " & $self.kind) +method toRepr*(self: PY_NoneType): string = "None" +method toRepr*(self: PY_Boolean): string = $self.value +method toRepr*(self: PY_Int): string = $self.value +method toRepr*(self: PY_Float): string = $self.value +method toRepr*(self: PY_String): string = "'" & self.value & "'" +method toRepr*(self: PY_Date): string = self.value.format(fmtDate) +method toRepr*(self: PY_Time): string = self.value.duration2Date.format(fmtTime) +method toRepr*(self: PY_DateTime): string = self.value.format(fmtDateTime) proc newPY_Date*(year: uint16, month, day: uint8): PY_Date {.inline.} = PY_Date(value: date2NimDatetime(int year, int month, int day), kind: K_DATE) @@ -100,10 +100,10 @@ proc newPY_Time*(hour, minute, second: uint8, microsecond: uint32): PY_Time {.in proc newPY_Time*(date: DateTime): PY_Time = PY_Time(value: date.toTime.time2Duration, kind: K_TIME) -proc `$`*(self: PY_ObjectND): string {.inline.} = "PY_ObjectND" -proc `$`*(self: PY_Date): string {.inline.} = "Date(" & self.toRepr & ")" -proc `$`*(self: PY_Time): string {.inline.} = "Time(" & self.toRepr & ")" -proc `$`*(self: PY_DateTime): string {.inline.} = "DateTime(" & self.toRepr & ")" +method `$`*(self: PY_ObjectND): string {.inline, base.} = "PY_ObjectND" +method `$`*(self: PY_Date): string {.inline.} = "Date(" & self.toRepr & ")" +method `$`*(self: PY_Time): string {.inline.} = "Time(" & self.toRepr & ")" +method `$`*(self: PY_DateTime): string {.inline.} = "DateTime(" & self.toRepr & ")" proc calcShapeElements*(shape: var Shape): int {.inline.} = var elements = 1 diff --git a/tablite/_nimlite/ranking.nim b/nimlite/ranking.nim similarity index 100% rename from tablite/_nimlite/ranking.nim rename to nimlite/ranking.nim diff --git a/tablite/_nimlite/unpickling.nim b/nimlite/unpickling.nim similarity index 99% rename from tablite/_nimlite/unpickling.nim rename to nimlite/unpickling.nim index f19be584..80f8c5cc 100644 --- a/tablite/_nimlite/unpickling.nim +++ b/nimlite/unpickling.nim @@ -389,7 +389,7 @@ proc newReducePickle(fn: var GlobalPickle, args: var TuplePickle): PY_Object = let dtypeName = PY_NpDType(args.elems[0]).dtype let bytes = BinBytesPickle(args.elems[1]).value let byteCount = bytes.len - let np = pymodules.numpy() + let np = pymodules.modules().numpy.module let dtypePy = np.dtype(dtypeName) let bytesPy = np.empty(byteCount, dtype="bytes") # can't do it we builtins.bytes() because it creates readonly buffer @@ -405,7 +405,7 @@ proc newReducePickle(fn: var GlobalPickle, args: var TuplePickle): PY_Object = let pyBytes = bytesPy.data.tobytes() let valPy = np.core.multiarray.scalar(dtypePy, pyBytes).tolist() - let typeName = pymodules.builtins().getattr(pymodules.builtins().type(valPy), "__name__").to(string) + let typeName = pymodules.modules().getTypeName(valPy) case typeName: # construct the nim native python object of "float": return newPY_Object(valPy.to(float)) diff --git a/tablite/_nimlite/utils.nim b/nimlite/utils.nim similarity index 59% rename from tablite/_nimlite/utils.nim rename to nimlite/utils.nim index bec5af69..b021c74e 100644 --- a/tablite/_nimlite/utils.nim +++ b/nimlite/utils.nim @@ -1,9 +1,12 @@ -from std/random import randomize, sample +import std/options +from std/random import Rand, initRand, sample from std/math import floor +from std/enumerate import enumerate +from std/unicode import Rune, toRunes -randomize() +var rng = none[Rand]() -const rand_chars = {'a'..'z','A'..'Z', '0'..'9'} +const randChars = {'a'..'z', 'A'..'Z', '0'..'9'} template corrupted*(d: typedesc = IOError): void = raise newException(d, "file corrupted") template implement*(name: string = ""): void = raise newException(Exception, if name.len == 0: "not yet imlemented" else: "'" & name & "' not yet imlemented") @@ -38,10 +41,33 @@ proc divmod*(x: int, y: int): (int, int) {.inline.} = return (z, x - y * z) proc generateRandomString*(len: int): string {.inline.} = + if rng.isNone: + rng = some(initRand()) + ## generates a random string of len var str = newString(len) for i in 0..