diff --git a/arraymancer.nimble b/arraymancer.nimble index b8a3178b4..b7ef792d1 100644 --- a/arraymancer.nimble +++ b/arraymancer.nimble @@ -229,88 +229,28 @@ task test_mkl_omp, "Run all tests - Intel MKL + OpenMP": task test_release, "Run all tests - Release mode": test "tests_cpu", " -d:release" -task gen_doc, "Generate Arraymancer documentation": - # TODO: Industrialize: something more robust that only check nim files (and not .DS_Store ...) - for filePath in listFiles("src/tensor/"): - let modName = filePath[11..^5] # Removing src/tensor/ (11 chars) and .nim (4 chars) # TODO: something more robust - # Cuda doc is broken https://github.com/nim-lang/Nim/issues/6910 - # Delete doc comment from nimcuda before using this - exec r"nim doc -o:docs/build/tensor." & modName & ".html " & filePath - - for filePath in listFiles("src/nn_primitives/"): - let modName = filePath[18..^5] # Removing src/nn_primitives/ (18 chars) and .nim (4 chars) # TODO: something more robust - # Cuda doc is broken https://github.com/nim-lang/Nim/issues/6910 - # Delete doc comment from nimcuda before using this - exec r"nim doc -o:docs/build/nnp." & modName & ".html " & filePath - - for filePath in listFiles("src/autograd/"): - let modName = filePath[13..^5] # Removing src/autograd/ (13 chars) and .nim (4 chars) # TODO: something more robust - exec r"nim doc -o:docs/build/ag." & modName & ".html " & filePath - - for filePath in listFiles("src/nn/"): - let modName = filePath[7..^5] # Removing src/nn_primitives/ (18 chars) and .nim (4 chars) # TODO: something more robust - exec r"nim doc -o:docs/build/nn." & modName & ".html " & filePath - - # TODO auto check subdir - for filePath in listFiles("src/nn/activation/"): - let modName = filePath[18..^5] - exec r"nim doc -o:docs/build/nn_activation." & modName & ".html " & filePath - - for filePath in listFiles("src/nn/layers/"): - let modName = filePath[14..^5] - exec r"nim doc -o:docs/build/nn_layers." & modName & ".html " & filePath - - for filePath in listFiles("src/nn/loss/"): - let modName = filePath[12..^5] - exec r"nim doc -o:docs/build/nn_loss." & modName & ".html " & filePath - - for filePath in listFiles("src/nn/optimizers/"): - let modName = filePath[18..^5] - exec r"nim doc -o:docs/build/nn_optimizers." & modName & ".html " & filePath - - for filePath in listFiles("src/nn_dsl/"): - let modName = filePath[11..^5] - exec r"nim doc -o:docs/build/nn_dsl." & modName & ".html " & filePath - - for filePath in listFiles("src/linear_algebra/"): - let modName = filePath[19..^5] - exec r"nim doc -o:docs/build/la." & modName & ".html " & filePath - - for filePath in listFiles("src/stats/"): - let modName = filePath[10..^5] - exec r"nim doc -o:docs/build/stats." & modName & ".html " & filePath - - for filePath in listFiles("src/ml/clustering/"): - let modName = filePath[18..^5] - exec r"nim doc -o:docs/build/ml." & modName & ".html " & filePath - - for filePath in listFiles("src/ml/dimensionality_reduction/"): - let modName = filePath[32..^5] - exec r"nim doc -o:docs/build/ml." & modName & ".html " & filePath - - for filePath in listFiles("src/ml/metrics/"): - let modName = filePath[15..^5] - exec r"nim doc -o:docs/build/ml." & modName & ".html " & filePath - - block: - let filePath = "src/nlp/tokenizers.nim" - let modName = filePath[8..^5] - exec r"nim doc -o:docs/build/nlp." & modName & ".html " & filePath - - for filePath in listFiles("src/io/"): - let modName = filePath[7..^5] - exec r"nim doc -o:docs/build/io." & modName & ".html " & filePath - - for filePath in listFiles("src/datasets/"): - let modName = filePath[13..^5] - exec r"nim doc -o:docs/build/datasets." & modName & ".html " & filePath - - # Process the rst - for filePath in listFiles("docs/"): - if filePath[^4..^1] == ".rst": - let modName = filePath[5..^5] - exec r"nim rst2html -o:docs/build/" & modName & ".html " & filePath - - # Copy stylesheets - cpFile("docs/docutils.css", "docs/build/docutils.css") - cpFile("docs/nav.css", "docs/build/nav.css") + +template canImport(x: untyped): untyped = + compiles: + import x + +when canImport(docs / docs): + # can define the `gen_docs` task (docs already imported now) + # this is to hack around weird nimble + nimscript behavior. + # when overwriting an install nimble will try to parse the generated + # nimscript file and for some reason then it won't be able to import + # the module (even if it's put into `src/`). + task gen_docs, "Generate Arraymancer documentation": + # generate nimdoc.cfg file so we can generate the correct header for the + # index.html page without having to mess with the HTML manually. + genNimdocCfg("src/") + # build the actual docs and the index + buildDocs("src/", "docs/build") + # Copy our stylesheets + cpFile("docs/docutils.css", "docs/build/docutils.css") + cpFile("docs/nav.css", "docs/build/nav.css") + # Process the rst + for filePath in listFiles("docs/"): + if filePath[^4..^1] == ".rst": + let modName = filePath[5..^5] + exec r"nim rst2html -o:docs/build/" & modName & ".html " & filePath diff --git a/docs/docs.nim b/docs/docs.nim new file mode 100644 index 000000000..5bbbdedeb --- /dev/null +++ b/docs/docs.nim @@ -0,0 +1,309 @@ +import macros, strformat, strutils, sequtils, sets, tables, algorithm + +from os import parentDir, getCurrentCompilerExe, DirSep, extractFilename, `/`, setCurrentDir + +when defined(nimdoc): + from os import getCurrentDir, paramCount, paramStr + +#[ +This file is a slightly modified version of the same file of `nimterop`: +https://github.com/nimterop/nimterop/blob/master/nimterop/docs.nim +]# + + +proc getNimRootDir(): string = + #[ + hack, but works + alternatively (but more complex), use (from a nim file, not nims otherwise + you get Error: ambiguous call; both system.fileExists): + import "$nim/testament/lib/stdtest/specialpaths.nim" + nimRootDir + ]# + fmt"{currentSourcePath}".parentDir.parentDir.parentDir + +const + DirSep = when defined(windows): '\\' else: '/' + +proc execAction(cmd: string): string = + var + ccmd = "" + ret = 0 + when defined(Windows): + ccmd = "cmd /c " & cmd + elif defined(posix): + ccmd = cmd + else: + doAssert false + + (result, ret) = gorgeEx(ccmd) + doAssert ret == 0, "Command failed: " & $ret & "\ncmd: " & ccmd & "\nresult:\n" & result + +template genRemove(name: untyped): untyped = + proc `name`(s, toRemove: string): string = + result = s + result.`name`(toRemove) +genRemove(removePrefix) +genRemove(removeSuffix) + +proc getFiles*(path: string): seq[string] = + # Add files and dirs here, which should be skipped. + #const excludeDirs = [] + #let ExcludeDirSet = toSet(excludeDirs) + #if path.extractFilename in ExcludeDirSet: return + # The files below are not valid by themselves, they are only included + # from other files + const excludeFiles = [ "blas_l3_gemm_aux.nim", + "blas_l3_gemm_data_structure.nim", + "blas_l3_gemm_macro_kernel.nim", + "blas_l3_gemm_micro_kernel.nim", + "blas_l3_gemm_packing.nim", + "p_checks_cuda.nim", + "p_checks_opencl.nim", + "blis_api.nim" ] + let ExcludeFileSet = toSet(excludeFiles) + + for file in listFiles(path): + if file.endsWith(".nim") and file.extractFilename notin ExcludeFileSet: + result.add file + for dir in listDirs(path): + result.add getFiles(dir) + +import nimDocTemplates + +proc buildDocs*(path: string, docPath: string, baseDir = getProjectPath() & $DirSep, + masterBranch = "master", + defines: openArray[string] = @[]) = + ## Generate docs for all nim files in `path` and output all HTML files to the + ## `docPath` in a flattened form (subdirectories are removed). + ## + ## If duplicate filenames are detected, they will be printed at the end. + ## + ## `baseDir` is the project path by default and `files` and `path` are relative + ## to that directory. Set to "" if using absolute paths. + ## + ## `masterBranch` is the name of the default branch to which the docs should link + ## when clicking the `Source` button below a procedure etc. + ## + ## `defines` is a list of `-d:xxx` define flags (the `xxx` part) that should be passed + ## to `nim doc` so that `getHeader()` is invoked correctly. + ## + ## Use the `--publish` flag with nimble to publish docs contained in + ## `path` to Github in the `gh-pages` branch. This requires the ghp-import + ## package for Python: `pip install ghp-import` + ## + ## WARNING: `--publish` will destroy any existing content in this branch. + ## + ## NOTE: `buildDocs()` only works correctly on Windows with Nim 1.0+ since + ## https://github.com/nim-lang/Nim/pull/11814 is required. + when defined(windows) and (NimMajor, NimMinor, NimPatch) < (1, 0, 0): + echo "buildDocs() unsupported on Windows for Nim < 1.0 - requires PR #11814" + else: + let + baseDir = + if baseDir == $DirSep: + getCurrentDir() & $DirSep + else: + baseDir + docPath = baseDir & docPath + path = baseDir & path + defStr = block: + var defStr = "" + for def in defines: + defStr &= " -d:" & def + defStr + nim = getCurrentCompilerExe() + + # now we walk the whole `path` and build the documentation for each `.nim` file. + # While doing that we flatten the directory structure for the generated HTML files. + # `src/foo/bar/baz.nim` just becomes + # `docPath/baz.html`. + # This allows for all files to be in the `docPath` directory, which means each + # file will be able to find the `dochack.js` file, which will be put into + # the `docPath` directory, too (the inclusion of the `dochack.js` is done statically + # via our generated nimdoc.cfg file and is fixed for each generated HTML). + let files = getFiles(path) + var idx = 0 + var fileSet = initHashSet[string]() + var duplSet = initHashSet[string]() + for file in files: + let baseName = file.extractFilename() + let relPath = file.removePrefix(path).removeSuffix(baseName) + let prefix = relPath.strip(chars = {'/'}) # remove possible trailing `/` + .split('/') # split path parts + .join(".") # concat by `.` instead + var outfile = baseName.replace(".nim", ".html") + if outfile in fileSet: + duplSet.incl outfile + else: + fileSet.incl outfile + outfile = docPath / outfile + echo "Processing: ", outfile, " [", idx, "/", files.len, "]" + # NOTE: Changing the current working directory to the project path is required in order for + # `git.commit:` to work! Otherwise we sit in `docs` and for some reason the relative path + # will eat one piece of the resulting `source` links and thereby removing the actual branch + # and we end up with a broken link! + echo execAction(&"cd {getProjectPath()} && {nim} doc {defStr} --git.commit:{masterBranch} -o:{outfile} --index:on {file}") + inc idx + ## now build the index + echo execAction(&"{nim} buildIndex -o:{docPath}/theindex.html {docPath}") + when declared(getNimRootDir): + #[ + NOTE: running it locally doesn't work anymore on modern chromium browser, + because they block "access from origin 'null' due to CORS policy". + this enables doc search, works at least locally with: + cd {docPath} && python -m SimpleHTTPServer 9009 + ]# + echo execAction(&"{nim} js -o:{docPath}/dochack.js {getNimRootDir()}/tools/dochack/dochack.nim") + + for i in 0 .. paramCount(): + if paramStr(i) == "--publish": + echo execAction(&"cd {docPath} && ghp-import --no-jekyll -fp {docPath}") + break + + # echo "Processed files: ", fileSet + if duplSet.card > 0: + echo "WARNING: Duplicate filenames detected: ", duplSet + + +let nameMap = { + "dsl_core" : "Neural network: Declaration", + "relu" : "Activation: Relu (Rectified linear Unit)", + "sigmoid" : "Activation: Sigmoid", + "tanh" : "Activation: Tanh", + "conv2D" : "Layers: Convolution 2D", + "embedding" : "Layers: Embedding", + "gru" : "Layers: GRU (Gated Linear Unit)", + "linear" : "Layers: Linear/Dense", + "maxpool2D" : "Layers: Maxpool 2D", + "cross_entropy_losses" : "Loss: Cross-Entropy losses", + "mean_square_error_loss" : "Loss: Mean Square Error", + "softmax" : "Softmax", + "optimizers" : "Optimizers", + "init" : "Layers: Initializations", + + "reshape_flatten" : "Reshape & Flatten", + + "decomposition" : "Eigenvalue decomposition", + "decomposition_rand" : "Randomized Truncated SVD", + "least_squares" : "Least squares solver", + "linear_systems" : "Linear systems solver", + "special_matrices" : "Special linear algebra matrices", + "stats" : "Statistics", + "pca" : "Principal Component Analysis (PCA)", + "accuracy_score" : "Accuracy score", + "common_error_functions" : "Common errors, MAE and MSE (L1, L2 loss)", + "kmeans" : "K-Means", + + "mnist" : "MNIST", + "imdb" : "IMDB", + "io_csv" : "CSV reading and writing", + "io_hdf5" : "HDF5 files reading and writing", + "io_image" : "Images reading and writing", + "io_npy" : "Numpy files reading and writing", + + "autograd_common" : "Data structure", + "gates_basic" : "Basic operations", + "gates_blas" : "Linear algebra operations", + "gates_hadamard" : "Hadamard product (elementwise matrix multiply)", + "gates_reduce" : "Reduction operations", + "gates_shapeshifting_concat_split" : "Concatenation, stacking, splitting, chunking operations", + "gates_shapeshifting_views" : "Linear algebra operations", + + "nnp_activation" : "Activations", + "nnp_convolution" : "Convolution 2D", + "nnp_conv2d_cudnn" : "Convolution 2D - CuDNN", + "nnp_embedding" : "Embeddings", + "nnp_gru" : "Gated Recurrent Unit (GRU)", + "nnp_linear" : "Linear / Dense layer", + "nnp_maxpooling" : "Maxpooling", + "nnp_numerical_gradient" : "Numerical gradient", + "nnp_sigmoid_cross_entropy" : "Sigmoid Cross-Entropy loss", + "nnp_softmax_cross_entropy" : "Softmax Cross-Entropy loss", + "nnp_softmax" : "Softmax" +}.toTable + +proc wrap(name: string): string = + const tmpl = """
  • $#
  • """ + if name in nameMap: + result = tmpl % [name & ".html", nameMap[name]] + else: + result = tmpl % [name & ".html", name] + +proc getHeaderMap(path: string): seq[seq[string]] = + ## returns a nesteed seq where each element is a `seq[string]` containing + ## all elements to be added to the header at the index. The index + ## corresponds to the `$N` of the `nimDocTemplates.headerTmpl` field. + const excludeFiles = [ "nn", # only imports and exports `NN` files + "nn_dsl", # only imports and exports `NN DSL` files + "ml", # only imports and exports `ML` files + "io", # only imports and exports `io` files + "autograd", # only imports and exports `autograd` files + "blis" # doesn't import or export anything + ] + let ExcludeFileSet = toSet(excludeFiles) + # map of the different header categories + let catMap = { "tensor" : 1, + "nn" : 2, + "nn_dsl" : 2, + "linear_algebra" : 3, + "stats" : 3, + "ml" : 3, + "datasets" : 4, + "io" : 4, + "autograd" : 5 , + "nn_primitives" : 6, + "nlp" : 7, + "math_ops_fusion" : 7, + "laser" : 7, + "private" : 7}.toTable + + # `indexOverride` is used to override the index of the header the file + # is added to. Some files may be part of e.g. `tensor` but shouldn't be + # listed there, since they aren't that important. + # NOTE: the elements here are ``filenames`` and ``not`` directories! + let indexOverride = { "global_config" : 7 }.toTable + let files = getFiles(path) + + result = newSeq[seq[string]](7) + for file in files: + let baseName = file.extractFilename() + let outfile = baseName.replace(".nim", "") + if outfile in ExcludeFileSet: continue + let subDir = file.removePrefix(path).split('/')[0] + if subDir in catMap: + var idx: int + if outfile notin indexOverride: + idx = catMap[subDir] - 1 + else: + idx = indexOverride[outfile] - 1 + result[idx].add outfile + +proc genNimdocCfg*(path: string) = + ## This proc generates the `nimdoc.cfg`, which sits at the root of the + ## arraymancer repository. We generate it so that we can combine the + ## front page template derived from flyx's NimYaml: https://github.com/flyx/NimYAML + ## with the standard Nim document generation. We generate the fields for + ## the header links from the actual files found in each diretory. + ## + ## NOTE: manual intervention is required for each directory that is added + ## and should show up as its own tab in the header. Essentially look at the + ## `$` spans in the `docFileTmpl` above to see what to do. + let headerMap = getHeaderMap(path) + # create the strings based on the header map for each span + var spans = newSeq[string](7) + for idx in 0 ..< spans.len: + spans[idx] = headerMap[idx].sorted.mapIt(wrap(it)).join("\n") + # fill the HTML generation template from the filenames + let htmlTmpl = headerTmpl % [ spans[0], spans[1], spans[2], + spans[3], spans[4], spans[5], + spans[6]] + # first "header" + var fdata = "" + fdata.add("# Arraymancer documentation generation\n\n") + fdata.add(&"git.url = \"{gitUrl}\"\n\n") + fdata.add(&"doc.item.seesrc = \"\"\"{docItemSeeSrc}\"\"\"\n\n") + # finally write the HTML document template + fdata.add(&"doc.file = \"\"\"{docFileTmpl}{htmlTmpl}\"\"\"\n") + + # now build the content for the spans + writeFile(getProjectPath() & $DirSep & "nimdoc.cfg", fdata) diff --git a/docs/index.rst b/docs/index.rst index b0ac76120..9354d1e44 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -144,7 +144,7 @@ Installation: Nim is available in some Linux repositories and on Homebrew for macOS. I however recommend installing Nim in your user profile via -```choosenim`` `__. Once choosenim +`choosenim `_. Once choosenim installed Nim, you can ``nimble install arraymancer`` which will pull arraymancer and all its dependencies. diff --git a/docs/nav.css b/docs/nav.css index 7efa28b2c..0a35153b0 100644 --- a/docs/nav.css +++ b/docs/nav.css @@ -77,6 +77,12 @@ header span ul.monospace a { font-family: "Source Code Pro", Menlo, "Courier New", Courier, monospace; } +header span ul span ul { + max-height: 800px;/* you can change as you need it */ + overflow:auto;/* to get scroll */ +} + + header a:link, header a:visited { background: inherit; diff --git a/docs/nimDocTemplates.nim b/docs/nimDocTemplates.nim new file mode 100644 index 000000000..e3e463cac --- /dev/null +++ b/docs/nimDocTemplates.nim @@ -0,0 +1,194 @@ +const gitUrl* = "https://github.com/mratsim/arraymancer" + +const docItemSeeSrc* = """  Source +Edit +""" + +# TODO: industrialize similar to Nim website: https://github.com/nim-lang/Nim/blob/e758b9408e8fe935117f7f793164f1c9b74cec06/tools/nimweb.nim#L45 +# And: https://github.com/nim-lang/Nim/blob/d3f966922ef4ddd05c137f82e5b2329b3d5dc485/web/website.ini#L31 + +# TODO: move the technical reference to the end (need some CSS so that elements are properly placed) + +const docFileTmpl* = """ + + + + + + + + + + + + + + + + + +$title + + + + + + + + + +Arraymancer - $title + + + + + + + +Fork me on GitHub + + +
    +
    +

    $title

    + $content +
    + +
    +
    +
    +$analytics +""" + +const headerTmpl* = """ +
    + Arraymancer + + Technical reference + + + + Tutorial + + + + Spellbook (How-To's) + + + + Under the hood + + +
    + + +""" diff --git a/nimdoc.cfg b/nimdoc.cfg index 740838201..5d9d3f5f6 100644 --- a/nimdoc.cfg +++ b/nimdoc.cfg @@ -1,5 +1,4 @@ -## Arraymancer documentation generation -# Inspiration from flyx's NimYaml: https://github.com/flyx/NimYAML +# Arraymancer documentation generation git.url = "https://github.com/mratsim/arraymancer" @@ -9,26 +8,103 @@ class="link-seesrc" target="_blank">Source Edit """ -# TODO: industrialize similar to Nim website: https://github.com/nim-lang/Nim/blob/e758b9408e8fe935117f7f793164f1c9b74cec06/tools/nimweb.nim#L45 -# And: https://github.com/nim-lang/Nim/blob/d3f966922ef4ddd05c137f82e5b2329b3d5dc485/web/website.ini#L31 - -# TODO: move the technical reference to the end (need some CSS so that elements are properly placed) - -doc.file = """ - +doc.file = """ + + - - Arraymancer - $title + + + + + + + + + + + + + +$title + + + + + - - - + + +Arraymancer - $title + + + + + + + Fork me on GitHub + + +
    +
    +

    $title

    + $content +
    + +
    +
    +
    +$analytics
    Arraymancer @@ -37,115 +113,206 @@ doc.file = """ Core tensor API Neural network API Linear algebra, stats, ML IO & Datasets Autograd Neuralnet primitives + + + Other docs + @@ -179,19 +346,6 @@ doc.file = """
    -
    -
    -

    $title

    - $content -
    - -
    -
    -
    """ diff --git a/src/laser/primitives/matrix_multiplication/gemm_prepacked.nim b/src/laser/primitives/matrix_multiplication/gemm_prepacked.nim index 1f3a90584..9e8338d4c 100644 --- a/src/laser/primitives/matrix_multiplication/gemm_prepacked.nim +++ b/src/laser/primitives/matrix_multiplication/gemm_prepacked.nim @@ -298,8 +298,9 @@ proc gemm_packed*[T: SomeNumber]( # # ############################################################ -when isMainModule: - +when false: + ## these tests don't work in arraymancer, since the imported files are not + ## part of arraymancer's repository. import ../../tensor/[allocator, datatypes, initialization], strformat diff --git a/src/laser/primitives/matrix_multiplication/gemm_tiling.nim b/src/laser/primitives/matrix_multiplication/gemm_tiling.nim index 0ccf3ecf4..a29cd3177 100644 --- a/src/laser/primitives/matrix_multiplication/gemm_tiling.nim +++ b/src/laser/primitives/matrix_multiplication/gemm_tiling.nim @@ -218,7 +218,7 @@ func x86_ukernel*(cpu: CPUFeatureX86, T: typedesc, c_unit_stride: bool): MicroKe result.nb_vecs_nr = NbVecs[cpu] # SIMD vectors of B result.nr = result.nb_vecs_nr * result.nb_scalars -############################################# +# ############################################# # Workaround "undeclared identifier mr or nr" # for some reason the compiler cannot access fields in # the static MicroKernel. diff --git a/src/laser/primitives/matrix_multiplication/gemm_ukernel_generator.nim b/src/laser/primitives/matrix_multiplication/gemm_ukernel_generator.nim index f6dc50058..a99afe5a0 100644 --- a/src/laser/primitives/matrix_multiplication/gemm_ukernel_generator.nim +++ b/src/laser/primitives/matrix_multiplication/gemm_ukernel_generator.nim @@ -188,10 +188,10 @@ macro ukernel_simd_impl*( var declBody = newStmtList() for a in rA: declBody.add quote do: - var `a`{.noinit.}: `V` + var `a`{.noInit.}: `V` for b in rB: declBody.add quote do: - var `b`{.noinit.}: `V` + var `b`{.noInit.}: `V` for i in 0 ..< MR: for j in 0 ..< NbVecs: let ab = rAB[i][j] diff --git a/src/laser/primitives/matrix_multiplication/gemm_ukernel_sse2.nim b/src/laser/primitives/matrix_multiplication/gemm_ukernel_sse2.nim index c6f844d5a..2ec32e034 100644 --- a/src/laser/primitives/matrix_multiplication/gemm_ukernel_sse2.nim +++ b/src/laser/primitives/matrix_multiplication/gemm_ukernel_sse2.nim @@ -25,11 +25,11 @@ ukernel_generator( simd_fma = float64x2_muladd_unfused ) -####################################### +# ####################################### # # Int32: hack to unroll scalar code # -####################################### +# ####################################### # This is faster than using the fallback for mm_mullo_epi32 # in laser/primitives/private/sse2_utils @@ -80,11 +80,11 @@ ukernel_generator( ) -####################################### +# ####################################### # # Int64: hack to unroll scalar code # -####################################### +# ####################################### type Int64x2 = array[2, int64] diff --git a/src/linear_algebra/helpers/auxiliary_lapack.nim b/src/linear_algebra/helpers/auxiliary_lapack.nim index 8ad74c913..e198105bd 100644 --- a/src/linear_algebra/helpers/auxiliary_lapack.nim +++ b/src/linear_algebra/helpers/auxiliary_lapack.nim @@ -167,6 +167,7 @@ proc ormqr*[T: SomeFloat](C: var Tensor[T], Q: Tensor[T], tau: openarray[T], sid when isMainModule: import ./decomposition_lapack import ../../ml/metrics/common_error_functions + import ../../private/sequninit let a = [[12.0, -51.0, 4.0], [ 6.0, 167.0, -68.0], diff --git a/src/linear_algebra/helpers/solve_lapack.nim b/src/linear_algebra/helpers/solve_lapack.nim index 68ab1198c..3d38566c5 100644 --- a/src/linear_algebra/helpers/solve_lapack.nim +++ b/src/linear_algebra/helpers/solve_lapack.nim @@ -7,15 +7,15 @@ import ./overload, ../../tensor/tensor -# Wrappers for Fortran LAPACK linear equation driver routines *SV -# Currently only *GESV is wrapped +# Wrappers for Fortran LAPACK linear equation driver routines `*SV` +# Currently only `*GESV` is wrapped # TODO: Implement GBSV, GTSV, POSV, PBSV, PTSV, SYSV overload(gesv, sgesv) overload(gesv, dgesv) proc gesv*[T: SomeFloat](a, b: var Tensor[T], pivot_indices: var seq[int32]) = - ## Wrapper for LAPACK *gesv routines + ## Wrapper for LAPACK `*gesv` routines ## Solve AX = B for general matrix ## ## In-place version, this will overwrite a and b diff --git a/src/nn_primitives/backend/cudnn.nim b/src/nn_primitives/backend/cudnn.nim index 211f8f49b..a57aa7a10 100644 --- a/src/nn_primitives/backend/cudnn.nim +++ b/src/nn_primitives/backend/cudnn.nim @@ -56,7 +56,7 @@ template asCudnnType*[T: SomeFloat](typ: typedesc[T]): cudnnDataType_t = # ##################################################################### # Tensor descriptor -proc newCudnn4DTensorDesc*[T: SomeFloat](t: CudaTensor[T]): cudnnTensorDescriptor_t {.inline, noinit.}= +proc newCudnn4DTensorDesc*[T: SomeFloat](t: CudaTensor[T]): cudnnTensorDescriptor_t {.inline, noInit.}= # TODO: destroy descriptor automatically # TODO: generalize with the NDTensor Desc check cudnnCreateTensorDescriptor(result.addr) diff --git a/src/nn_primitives/backend/nnpack_interface.nim b/src/nn_primitives/backend/nnpack_interface.nim index 1fd8fdf08..2defc6532 100644 --- a/src/nn_primitives/backend/nnpack_interface.nim +++ b/src/nn_primitives/backend/nnpack_interface.nim @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../../tensor/tensor, ../types +import ../../tensor/tensor, ../private/p_nnp_types import ./nnpack proc nnpack_conv2d*(input, weight, bias: Tensor[float32], padding, stride: Size2D): Tensor[float32] {.noInit.}= # TODO use a single convention, return value or var result diff --git a/src/nn_primitives/nnp_maxpooling.nim b/src/nn_primitives/nnp_maxpooling.nim index a6ff1e5d6..654d42a4c 100644 --- a/src/nn_primitives/nnp_maxpooling.nim +++ b/src/nn_primitives/nnp_maxpooling.nim @@ -21,7 +21,7 @@ proc maxpool2d*[T](input: Tensor[T], kernel: Size2D, padding: Size2D = (0,0), stride: Size2D = (1,1) - ): tuple[max_indices: Tensor[int], maxpooled: Tensor[T]] {.noinit.}= + ): tuple[max_indices: Tensor[int], maxpooled: Tensor[T]] {.noInit.}= ## MaxPool 2D forward pass assert input.rank == 4 and input.is_C_contiguous diff --git a/src/tensor/einsum.nim b/src/tensor/einsum.nim index b414a407a..ad7cf6a37 100644 --- a/src/tensor/einsum.nim +++ b/src/tensor/einsum.nim @@ -4,163 +4,162 @@ import ./shapeshifting # Note: importing shapeshifting_cuda will trigger a Nim inference bug # in genContiguous with no workaround -#[ -This module provides Einstein summation for an arbitrary number of tensors. +## This module provides Einstein summation for an arbitrary number of tensors. +## +## Einstein summation describes a special application of +## `index notation `_ +## in which indices that appear more than once are implicitly summed over. +## This allows for a concise notation of many vector / matrix / tensor calculations, +## while exactly representing the required calculation. +## +## In general Einstein summation is a subset of +## `Ricci calculus `_. +## +## The implementation of `einsum` in different languages however, typically goes +## above and beyond actual Einstein summation, allowing for many aspects of +## Ricci calculus. +## +## Simple Einstein summation examples +## ================================== +## +## Typical examples include matrix-vector multiplcation, matrix-matrix multiplication +## or the cross product. The examples below use the `einsum` / notation for the +## elements of tensors, namely `m[i,j]` for element `i,j` of the matrix ``m``, instead of +## the more mathematical notation `m_ij`. +## +## Matrix-vector multiplication +## ---------------------------- +## +## Let ``m`` be an `NxM` matrix and ``v`` a `M` vector. Then matrix-vector multiplication +## `m * v` is defined as: +## `w[i] = \sum_j m[i,j] * v[j]`. +## The result is an `N` vector ``w`` consisting of elements `w[i]`. +## Since `j` appears twice on the RHS of the equation, Einstein summation implies that +## the sum over `j` is implicit, hence we can write: +## +## `w[i] = m[i,j] * v[j]`. +## +## Matrix-matrix multiplication +## ---------------------------- +## +## The same can be applied to matrix-matrix multiplication. Let ``m``, ``n`` be two +## compatible matrices (both `NxN` or `NxM` and `MxN`) with elements `m[i,j]` and +## `n[i,j]`. Matrix-matrix multiplication is defined as +## +## `a[i,k] = \sum_j m[i,j] * n[j,k]` +## +## and thus in Einstein summation: +## +## `a[i,k] = m[i,j] * n[j,k]`. +## +## Cross-product of two vectors +## ---------------------------- +## +## The cross product of two 3 vectors ``v``, ``w`` can be conveniently defined using +## the `Levi-Civita symbol `_ +## `\epsilon_{ijk}`: +## +## `a[i] = \epsilon_{ijk} v[j] * w[k]`, +## +## which implies `j` and `k` are summed over, while `i` is kept for the resulting tensor. +## +## More complex examples +## ===================== +## +## In this implementation of `einsum` (similar to other `einsum` implementations), +## it's also possible to explicitly keep different dimensions of the multiplied +## tensors or even perform calculations without a single index appearing mutliple +## times, for instance to transpose a tensor. For these cases the explicit form +## of the `einsum` macro has to be used, see below. +## +## Transposition of a matrix +## ------------------------- +## +## Transposition of a matrix can be expressed in index notation simply as an +## exchange of indices, namely let ``m`` be an `NxM` matrix, the transposed +## `MxN` matrix ``m^T`` is written as: +## +## `m[j,i] = m[i,j]`. +## +## Hadamard product +## ---------------- +## +## The Hadamard product defines the product of two `NxM` matrices ``n``, ``m`` +## in which the matrices are multiplied element wise. It is a good example +## of the extension of `einsum` over standard Einstein summation: +## +## `a[i,j] = m[i,j] * n[i,j]`. +## +## Naive Einstein summation would demand a sum over both `i` and `j`, resulting +## in a scalar on the LHS instead of another `NxM` matrix. +## +## Contracting a whole matrix +## -------------------------- +## +## Contraction of a full matrix describes summing all elements of a matrix +## ``m``, resulting in a scalar `a`. It is expressed by: +## +## `a = m[i,i]`. +## +## The `einsum` macro +## ================== +## +## The `einsum` macro provides two different usage paradigms. +## * implicit <- normal Einstein summation +## * explicit <- potential extended Einstein summation +## +## The macro takes a `varargs[Tensor]` and a single statement. It +## returns a `Tensor[T]`, where `T` is deduced from the subtype of the +## given tensors, if the result is not a scalar. For a scalar result +## the return value is of type `T`. Note that the type of all given tensors +## must match! +## +## The statement given to the macro is just a single line making use of +## Einstein summation as in all the examples above. As a matter of fact +## all examples above are valid statements for the `einsum` macro! +## +## Of course only tensors, which are given to the macro in the `varargs` +## may be used in the statement. +## +## If only the `RHS` of the examples above are given, the required indices +## for the resulting tensor are automatically calculated using pure Einstein +## summation. Assuming `a`, `b` are two 2D arraymancer tensors , we could +## express their matrix mutliplcation as +## +## .. code:: nim +## let c = einsum(a, b): +## a[i,j] * b[j,k] +## +## Of course the same can be written in explicit form: +## +## .. code:: nim +## let c = einsum(a, b): +## c[i,k] = a[i,j] * b[j,k] +## +## A few things must be noted here for the explicit case: +## * the indices on the LHS are taken as "the truth"! Any index appearing here +## will ``not`` be summed over. +## * the order on the LHS is taken into account, allowing for transposing +## dimensions. +## * the identifier used on the LHS is arbitrary. It can match what the user assigns +## to, but need not. +## +## For many more examples for typical applications, take a look at the test case +## `<../../tests/tensor/test_einsum.nim>`_. +## +## Implementation details +## ---------------------- +## +## The macro calculates, which indices must be contracted and which remain in the +## final tensor. For each appearing index (of either case) we create a for loop, +## while the contracting for loops appear within the non contracting indices. +## +## The macro creates a `block`, in which the code is produced and returns the +## temporary tensor used in it. +## +## It also forces the tensors into contiguous, row major form by creating +## local copies with `asContiguous`. -Einstein summation describes a special application of -`index notation `_ -in which indices that appear more than once are implicitly summed over. -This allows for a concise notation of many vector / matrix / tensor calculations, -while exactly representing the required calculation. - -In general Einstein summation is a subset of -`Ricci calculus `_. - -The implementation of `einsum` in different languages however, typically goes -above and beyond actual Einstein summation, allowing for many aspects of -Ricci calculus. - -Simple Einstein summation examples -================================== - -Typical examples include matrix-vector multiplcation, matrix-matrix multiplication -or the cross product. The examples below use the `einsum` / notation for the -elements of tensors, namely `m[i,j]` for element `i,j` of the matrix ``m``, instead of -the more mathematical notation `m_ij`. - -Matrix-vector multiplication ----------------------------- - -Let ``m`` be an `NxM` matrix and ``v`` a `M` vector. Then matrix-vector multiplication -`m * v` is defined as: -`w[i] = \sum_j m[i,j] * v[j]`. -The result is an `N` vector ``w`` consisting of elements `w[i]`. -Since `j` appears twice on the RHS of the equation, Einstein summation implies that -the sum over `j` is implicit, hence we can write: - -`w[i] = m[i,j] * v[j]`. - -Matrix-matrix multiplication ----------------------------- - -The same can be applied to matrix-matrix multiplication. Let ``m``, ``n`` be two -compatible matrices (both `NxN` or `NxM` and `MxN`) with elements `m[i,j]` and -`n[i,j]`. Matrix-matrix multiplication is defined as - -`a[i,k] = \sum_j m[i,j] * n[j,k]` - -and thus in Einstein summation: - -`a[i,k] = m[i,j] * n[j,k]`. - -Cross-product of two vectors ----------------------------- - -The cross product of two 3 vectors ``v``, ``w`` can be conveniently defined using -the `Levi-Civita symbol `_ -`\epsilon_{ijk}`: - -`a[i] = \epsilon_{ijk} v[j] * w[k]`, - -which implies `j` and `k` are summed over, while `i` is kept for the resulting tensor. - -More complex examples -===================== - -In this implementation of `einsum` (similar to other `einsum` implementations), -it's also possible to explicitly keep different dimensions of the multiplied -tensors or even perform calculations without a single index appearing mutliple -times, for instance to transpose a tensor. For these cases the explicit form -of the `einsum` macro has to be used, see below. - -Transposition of a matrix -------------------------- - -Transposition of a matrix can be expressed in index notation simply as an -exchange of indices, namely let ``m`` be an `NxM` matrix, the transposed -`MxN` matrix ``m^T`` is written as: - -`m[j,i] = m[i,j]`. - -Hadamard product ----------------- - -The Hadamard product defines the product of two `NxM` matrices ``n``, ``m`` -in which the matrices are multiplied element wise. It is a good example -of the extension of `einsum` over standard Einstein summation: - -`a[i,j] = m[i,j] * n[i,j]`. - -Naive Einstein summation would demand a sum over both `i` and `j`, resulting -in a scalar on the LHS instead of another `NxM` matrix. - -Contracting a whole matrix --------------------------- - -Contraction of a full matrix describes summing all elements of a matrix -``m``, resulting in a scalar `a`. It is expressed by: - -`a = m[i,i]`. - -The `einsum` macro -================== - -The `einsum` macro provides two different usage paradigms. -* implicit <- normal Einstein summation -* explicit <- potential extended Einstein summation - -The macro takes a `varargs[Tensor]` and a single statement. It -returns a `Tensor[T]`, where `T` is deduced from the subtype of the -given tensors, if the result is not a scalar. For a scalar result -the return value is of type `T`. Note that the type of all given tensors -must match! - -The statement given to the macro is just a single line making use of -Einstein summation as in all the examples above. As a matter of fact -all examples above are valid statements for the `einsum` macro! - -Of course only tensors, which are given to the macro in the `varargs` -may be used in the statement. - -If only the `RHS` of the examples above are given, the required indices -for the resulting tensor are automatically calculated using pure Einstein -summation. Assuming `a`, `b` are two 2D arraymancer tensors , we could -express their matrix mutliplcation as - -.. code:: nim - let c = einsum(a, b): - a[i,j] * b[j,k] - -Of course the same can be written in explicit form: - -.. code:: nim - let c = einsum(a, b): - c[i,k] = a[i,j] * b[j,k] - -A few things must be noted here for the explicit case: -* the indices on the LHS are taken as "the truth"! Any index appearing here - will ``not`` be summed over. -* the order on the LHS is taken into account, allowing for transposing - dimensions. -* the identifier used on the LHS is arbitrary. It can match what the user assigns - to, but need not. - -For many more examples for typical applications, take a look at the test case -`<../../tests/tensor/test_einsum.nim>`_. - -Implementation details ----------------------- - -The macro calculates, which indices must be contracted and which remain in the -final tensor. For each appearing index (of either case) we create a for loop, -while the contracting for loops appear within the non contracting indices. - -The macro creates a `block`, in which the code is produced and returns the -temporary tensor used in it. - -It also forces the tensors into contiguous, row major form by creating -local copies with `asContiguous`. -]# type # enum which stores whether an `einsum` call is explicit `skAssign` (statement diff --git a/src/tensor/fallback/legacy/blas_l3_gemm_macro_kernel.nim b/src/tensor/fallback/legacy/blas_l3_gemm_macro_kernel.nim index eff49f9f4..572e0fe75 100644 --- a/src/tensor/fallback/legacy/blas_l3_gemm_macro_kernel.nim +++ b/src/tensor/fallback/legacy/blas_l3_gemm_macro_kernel.nim @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../backend/openmp - proc gemm_macro_kernel[T](mc, nc, kc: int, alpha: T, beta: T, @@ -58,4 +56,4 @@ proc gemm_macro_kernel[T](mc, nc, kc: int, buffer_C, 1, MR, C, i*MR*incRowC+j*NR*incColC + offC, - incRowC, incColC) \ No newline at end of file + incRowC, incColC) diff --git a/src/tensor/fallback/legacy/blas_l3_gemm_micro_kernel.nim b/src/tensor/fallback/legacy/blas_l3_gemm_micro_kernel.nim index 2932c9e20..b21fbd20a 100644 --- a/src/tensor/fallback/legacy/blas_l3_gemm_micro_kernel.nim +++ b/src/tensor/fallback/legacy/blas_l3_gemm_micro_kernel.nim @@ -13,7 +13,7 @@ # limitations under the License. import macros, - ../backend/memory_optimization_hints + ../../backend/memory_optimization_hints macro unroll_ukernel[MRNR, T](AB: array[MRNR, T], a: ptr UncheckedArray[T], offA: int, diff --git a/src/tensor/private/p_kernels_interface_opencl.nim b/src/tensor/private/p_kernels_interface_opencl.nim index 736fef7a4..4b9f7e5d7 100644 --- a/src/tensor/private/p_kernels_interface_opencl.nim +++ b/src/tensor/private/p_kernels_interface_opencl.nim @@ -126,11 +126,11 @@ template genClInfixOp*( T: typedesc, export procName template gen_cl_apply2*(kern_name, ctype, op: string): string = - ## Generates an OpenCL kernel for an elementwise in-place binary infix operation (like +=, -=, *.= or /.=) + ## Generates an OpenCL kernel for an elementwise in-place binary infix operation (like `+=, -=, *.= or /.=`) ## Input: ## - The C type ## - The C kernel name (this only helps debugging the C code) - ## - The C operation (+=, -=, *.= or /.=) + ## - The C operation (`+=, -=, *.= or /.=`) opencl_getIndexOfElementID() & """ __kernel @@ -165,13 +165,13 @@ template genClInPlaceOp*( T: typedesc, cInfixOp: string, exported: static[bool] = true): untyped = ## Generates an OpenCL kernel for an elementwise in-place binary - ## infix operation (like +=, -=, *.= or /.=) + ## infix operation (like `+=, -=, *.= or /.=`) ## Input: ## - The Nim type of the elements of the input tensors ## - The equivalent C type ## - The Nim identifier of the resulting proc ## - The C kernel name (this only helps debugging the C code) - ## - The C operation (+=, -=, *.= or /.=) + ## - The C operation (`+=, -=, *.= or /.=`) proc procName(dst: var ClTensor[T], src: ClTensor[T]) = when compileOption("boundChecks"):