diff --git a/README.md b/README.md index 37adbe4d..6e776ec0 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,16 @@ # ARCtrl -> **ARCtrl** the easy way to read, manipulate and write ARCs in __.NET__ -and __JavaScript__! ❤️ +> **ARCtrl** the easy way to read, manipulate and write ARCs in __.NET__, __JavaScript__ and __Python__! ❤️ | Version | Downloads | | :--------|-----------:| |Nuget|Nuget| |NPM|NPM| - +|PyPI|PyPI| ## Install -(currently only prereleases available, check the [nuget page](https://www.nuget.org/packages/ARCtrl) or [npm page](https://www.npmjs.com/package/@nfdi4plants/arctrl) respectively) - -### .NET +#### .NET ```fsharp #r "nuget: ARCtrl" @@ -23,12 +20,18 @@ and __JavaScript__! ❤️ ``` -### JavaScript +#### JavaScript ```bash npm i @nfdi4plants/arctrl ``` +#### Python + +```bash +pip install arctrl +``` + ## Docs Currently we provide some documentation in form of markdown files in the `/docs` folder of this repository! @@ -37,7 +40,7 @@ Currently we provide some documentation in form of markdown files in the `/docs` ## Development -### Requirements +#### Requirements - [nodejs and npm](https://nodejs.org/en/download) - verify with `node --version` (Tested with v18.16.1) @@ -47,7 +50,7 @@ Currently we provide some documentation in form of markdown files in the `/docs` - [Python](https://www.python.org/downloads/) - verify with `py --version` (Tested with 3.12.2, known to work only for >=3.11) -### Local Setup +#### Local Setup 1. Setup dotnet tools @@ -68,3 +71,18 @@ Currently we provide some documentation in form of markdown files in the `/docs` 3. `.\.venv\Scripts\python.exe -m poetry install --no-root` Verify correct setup with `./build.cmd runtests` ✨ + +## Performance + +Measured on 13th Gen Intel(R) Core(TM) i7-13800H + +| Name | Description | FSharp Time (ms) | JavaScript Time (ms) | Python Time (ms) | +| --- | --- | --- | --- | --- | +| Table_GetHashCode | From a table with 1 column and 10000 rows, retrieve the Hash Code | 5 | 21 | 226 | +| Table_AddRows | Add 10000 rows to a table with 4 columns. | 15 | 22 | 289 | +| Table_fillMissingCells | For a table 6 columns and 20000 rows, where each row has one missing value, fill those values with default values. | 49 | 108 | 4813 | +| Table_ToJson | Serialize a table with 5 columns and 10000 rows to json. | 1099 | 481 | 6833 | +| Table_ToCompressedJson | Serialize a table with 5 columns and 10000 rows to compressed json. | 261 | 2266 | 717334 | +| Assay_toJson | Parse an assay with one table with 10000 rows and 6 columns to json | 915 | 2459 | 28799 | +| Study_FromWorkbook | Parse a workbook with one study with 10000 rows and 6 columns to an ArcStudy | 97 | 87 | 1249 | +| Investigation_ToWorkbook_ManyStudies | Parse an investigation with 1500 studies to a workbook | 621 | 379 | 9974 | diff --git a/build/BasicTasks.fs b/build/BasicTasks.fs index d24953d6..449e4f9f 100644 --- a/build/BasicTasks.fs +++ b/build/BasicTasks.fs @@ -83,6 +83,17 @@ module Helper = let dotnet = createProcess "dotnet" + let node = + let nodePath = + match ProcessUtils.tryFindFileOnPath "node" with + | Some path -> path + | None -> + "node was not found in path. Please install it and make sure it's available from your path. " + + "See https://safe-stack.github.io/docs/quickstart/#install-pre-requisites for more info" + |> failwith + + createProcess nodePath + let npx = let npmPath = match ProcessUtils.tryFindFileOnPath "npx" with diff --git a/build/Build.fs b/build/Build.fs index bf3cc6d5..0ffa83f7 100644 --- a/build/Build.fs +++ b/build/Build.fs @@ -30,6 +30,7 @@ let _preRelease = [setPrereleaseTag; clean; build; runTests; packPrerelease; createPrereleaseTag; publishNugetPrerelease; publishNPMPrerelease] ReleaseNotesTasks.updateReleaseNotes |> ignore +PerformanceTasks.perforanceReport |> ignore [] let main args = diff --git a/build/Build.fsproj b/build/Build.fsproj index ecbd44dd..7c5011fd 100644 --- a/build/Build.fsproj +++ b/build/Build.fsproj @@ -13,6 +13,7 @@ + diff --git a/build/PerformanceTasks.fs b/build/PerformanceTasks.fs new file mode 100644 index 00000000..2685ae1f --- /dev/null +++ b/build/PerformanceTasks.fs @@ -0,0 +1,43 @@ +module PerformanceTasks + +open BlackFox.Fake +open Fake.DotNet + +open ProjectInfo +open BasicTasks +open Fake.Core + +module PerformanceReport = + + let mutable cpu = "" + + let promptCpu() = + if cpu = "" then + printfn "Please enter your cpu name" + cpu <- System.Console.ReadLine() + + let testPerformancePy = BuildTask.create "testPerformancePy" [clean; build] { + promptCpu() + let path = "tests/Speedtest" + //transpile py files from fsharp code + run dotnet $"fable {path} -o {path}/py --lang python" "" + // run pyxpecto in target path to execute tests in python + run python $"{path}/py/program.py \"{cpu}\"" "" + } + let testPerformanceJs = BuildTask.create "testPerformanceJS" [clean; build] { + promptCpu() + let path = "tests/Speedtest" + // transpile js files from fsharp code + run dotnet $"fable {path} -o {path}/js" "" + // run mocha in target path to execute tests + run node $"{path}/js/program.js \"{cpu}\"" "" + } + let testPerformanceDotnet = BuildTask.create "testPerformanceDotnet" [clean; build] { + promptCpu() + let path = "tests/Speedtest" + run dotnet $"run --project {path} \"{cpu}\"" "" + } + +let perforanceReport = BuildTask.create "PerformanceReport" [PerformanceReport.testPerformancePy; PerformanceReport.testPerformanceJs; PerformanceReport.testPerformanceDotnet] { + () +} \ No newline at end of file diff --git a/docs/scripts_fsharp/ArcInvestigation.fsx b/docs/scripts_fsharp/ArcInvestigation.fsx index 9d39b58e..ea3d0d71 100644 --- a/docs/scripts_fsharp/ArcInvestigation.fsx +++ b/docs/scripts_fsharp/ArcInvestigation.fsx @@ -20,7 +20,7 @@ open FsSpreadsheet.Net let fswb = ArcInvestigation.toFsWorkbook investigation_comments -fswb.ToFile("test2.isa.investigation.xlsx") +fswb.ToXlsxFile("test2.isa.investigation.xlsx") // ## Json - Write diff --git a/tests/ISA/ISA.Json.Tests/ArcTypes.Tests.fs b/tests/ISA/ISA.Json.Tests/ArcTypes.Tests.fs index 8c9d1dfc..8ac184f0 100644 --- a/tests/ISA/ISA.Json.Tests/ArcTypes.Tests.fs +++ b/tests/ISA/ISA.Json.Tests/ArcTypes.Tests.fs @@ -179,16 +179,6 @@ let tests_ArcTable = testList "ArcTable" [ let decoded = ArcTable.fromCompressedJsonString encoded Expect.equal decoded filled "empty table is wrong after compressed encoding and decoding" // Set to pTest in Fable, as compressed parsing is around 10times slower than uncompressed parsing. This is weird, since in dotnet it is around 10times faster - #if FABLE_COMPILER - ptestCase "Performance" <| fun _ -> - #else - testCase "Performance" <| fun _ -> - #endif - let t = TestObjects.Spreadsheet.Study.LargeFile.table - Expect.isFasterThan (t.ToCompressedJsonString >> ignore) (t.ToJsonString >> ignore) "toCompressedJsonString is slower than to uncompressed" - let json = t.ToJsonString() - let compressed = Expect.wantFaster (t.ToCompressedJsonString) 1000 "toCompressedJsonString should be faster" - Expect.isTrue (compressed.Length*5 < json.Length) $"compressed should be more than 10 times smaller than uncompressed, but was only {float json.Length / float compressed.Length}x smaller" testCase "rangeColumnSize" <| fun _ -> // testTable column should be saved as range column, this should make it smaller than the IO column even though it has more cells let testTable = ArcTable.init("Test") @@ -258,44 +248,6 @@ let tests_ArcAssay = testList "ArcAssay" [ let decoded = ArcAssay.fromCompressedJsonString encoded Expect.equal decoded filled "empty table is wrong after compressed encoding and decoding" ] - testList "performance" [ - testCase "SingleLongTable_JsonAssay" <| fun _ -> - let a = ArcAssay.init("MyAssay") - let t = a.InitTable("MyTable") - t.AddColumn(CompositeHeader.Input IOType.Source) - t.AddColumn(CompositeHeader.Parameter (OntologyAnnotation.fromString("MyParameter1"))) - t.AddColumn(CompositeHeader.Parameter (OntologyAnnotation.fromString("MyParameter2"))) - t.AddColumn(CompositeHeader.Parameter (OntologyAnnotation.fromString("MyParameter3"))) - t.AddColumn(CompositeHeader.Characteristic (OntologyAnnotation.fromString("MyCharacteristic"))) - t.AddColumn(CompositeHeader.Output IOType.Sample) - let rowCount = 10000 - for i = 0 to rowCount - 1 do - let cells = - [| - CompositeCell.FreeText $"Source{i}" - CompositeCell.FreeText $"Parameter1_value" - CompositeCell.FreeText $"Parameter2_value" - CompositeCell.FreeText $"Parameter3_value{i - i % 10}" - CompositeCell.FreeText $"Characteristic_value" - CompositeCell.FreeText $"Sample{i}" - |] - for j = 0 to cells.Length - 1 do - t.Values.[(j,i)] <- cells.[j] - let f() = ArcAssay.toJsonString a - #if FABLE_COMPILER_JAVASCRIPT - let expectedMs = 5000 - #endif - #if FABLE_COMPILER_PYTHON - let expectedMs = 100000 - #endif - #if !FABLE_COMPILER - let expectedMs = 2500 - #endif - // 1200ms in Dotnet on i7-13800H - // 3412ms in Javascript on i7-13800H - // 24562ms in Javascript on i7-13800H - Expect.wantFaster f expectedMs "toJsonString should be faster" |> ignore - ] ] let tests_ArcStudy = testList "ArcStudy" [ diff --git a/tests/ISA/ISA.Spreadsheet.Tests/ISA.Spreadsheet.Tests.fsproj b/tests/ISA/ISA.Spreadsheet.Tests/ISA.Spreadsheet.Tests.fsproj index 8728b2bf..6ee34c7a 100644 --- a/tests/ISA/ISA.Spreadsheet.Tests/ISA.Spreadsheet.Tests.fsproj +++ b/tests/ISA/ISA.Spreadsheet.Tests/ISA.Spreadsheet.Tests.fsproj @@ -17,7 +17,6 @@ - diff --git a/tests/ISA/ISA.Spreadsheet.Tests/Main.fs b/tests/ISA/ISA.Spreadsheet.Tests/Main.fs index 77b328f5..3c8d206b 100644 --- a/tests/ISA/ISA.Spreadsheet.Tests/Main.fs +++ b/tests/ISA/ISA.Spreadsheet.Tests/Main.fs @@ -12,7 +12,6 @@ let all = testSequenced <| testList "ISA.Spreadsheet" [ ArcStudyTests.main SparseTableTests.main IdentifierTests.main - Tests.Performance.Main ] [] diff --git a/tests/ISA/ISA.Spreadsheet.Tests/Performance.Tests.fs b/tests/ISA/ISA.Spreadsheet.Tests/Performance.Tests.fs deleted file mode 100644 index e344213f..00000000 --- a/tests/ISA/ISA.Spreadsheet.Tests/Performance.Tests.fs +++ /dev/null @@ -1,45 +0,0 @@ -module Tests.Performance - -open ARCtrl.ISA -open FsSpreadsheet -open TestingUtils -open ARCtrl.ISA.Spreadsheet - -let private tests_Study = testList "Study" [ - testCase "LargeWorkbook" <| fun _ -> - let fswb = TestObjects.Spreadsheet.Study.LargeFile.Workbook - let convertToArcFile(fswb:FsWorkbook) = - let timer_start = System.DateTime.Now - let s,_ = ArcStudy.fromFsWorkbook fswb - let timer_end = System.DateTime.Now - let runtime = (timer_end - timer_start).Milliseconds - #if FABLE_COMPILER_PYTHON - let expectedMs = 1500 - #else - let expectedMs = 300 // this is too high and should be reduced - #endif - Expect.equal s.TableCount 1 "Table count" - Expect.isTrue (runtime <= expectedMs) $"Expected conversion to be finished in under {expectedMs}, but it took {runtime}" - convertToArcFile fswb -] - -let private tests_Investigation = testList "Investigation" [ - testCase "WriteManyStudies" <| fun _ -> - let inv = ArcInvestigation.init("MyInvestigation") - for i = 0 to 1500 do - let s = ArcStudy.init($"Study{i}") - inv.AddRegisteredStudy(s) - let testF = fun () -> ArcInvestigation.toFsWorkbook inv - #if FABLE_COMPILER_PYTHON - let expectedMs = 50000 - #else - let expectedMs = 1000 - #endif - let wb = Expect.wantFaster testF expectedMs "Parsing investigation to Workbook is too slow" - Expect.equal (wb.GetWorksheets().Count) 1 "Worksheet count" -] - -let Main = testList "Performance" [ - tests_Study - tests_Investigation -] \ No newline at end of file diff --git a/tests/ISA/ISA.Tests/ArcTable.Tests.fs b/tests/ISA/ISA.Tests/ArcTable.Tests.fs index d7a40565..cca40753 100644 --- a/tests/ISA/ISA.Tests/ArcTable.Tests.fs +++ b/tests/ISA/ISA.Tests/ArcTable.Tests.fs @@ -108,18 +108,6 @@ let private tests_GetHashCode = testList "GetHashCode" [ let notActual = create_testTable() Expect.notEqual actual notActual "equal" Expect.notEqual (actual.GetHashCode()) (notActual.GetHashCode()) "Hash" - testCase "Performance" <| fun _ -> - let testTable = ArcTable.init("Test") - let values = Array.init 10000 (fun i -> CompositeCell.createFreeText (string i)) - testTable.AddColumn(CompositeHeader.FreeText "Header", values) - let f1 () = testTable.GetHashCode() - #if FABLE_COMPILER_PYTHON - let expectedMs = 500 - #else - let expectedMs = 50 - #endif - // On i7-13800H, 2ms in Dotnet and 18ms in javascript - Expect.wantFaster f1 expectedMs "GetHashCode is too slow" |> ignore ] let private tests_validate = @@ -483,29 +471,6 @@ let private tests_UpdateCell = let eval() = table.UpdateCellAt(0,0,cell) Expect.throws eval "" ) - - // Commented this test out, as the behaviour is different in dotnet and js, but both implementations are very close together performance-wise - - //testCase "performance" (fun () -> - // // Test, that for most cases (because of performance), setter should be used - // let f1 = fun () -> - // let table = ArcTable.init("Table") - // for i = 0 to 10 do - // table.Headers.Insert(i,CompositeHeader.FreeText $"Header_{i}") - // for j = 0 to 5000 do - // ArcTableAux.Unchecked.setCellAt(i,j,CompositeCell.createFreeText $"Cell_{i}_{j}") table.Values - // let f2 = fun () -> - // let table = ArcTable.init("Table") - // for i = 0 to 10 do - // table.Headers.Insert(i,CompositeHeader.FreeText $"Header_{i}") - // for j = 0 to 5000 do - // ArcTableAux.Unchecked.addCellAt(i,j,CompositeCell.createFreeText $"Cell_{i}_{j}") table.Values - // Expect.isFasterThan f1 f2 "SetCell Implementation should be faster than reference" - - - - //) - ] let private tests_UpdateColumn = @@ -2023,19 +1988,6 @@ let private tests_AddRows = else Expect.equal table.Values.[columnIndex, rowIndex] newTable.Values.[columnIndex, rowIndex-newColumnCount] $"Cell {columnIndex},{rowIndex}" ) - testCase "performance" (fun () -> - let table = ArcTable("MyTable",ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample], System.Collections.Generic.Dictionary()) - let rows = - Array.init 10000 (fun i -> - [|CompositeCell.FreeText $"Source_{i}"; CompositeCell.FreeText $"FT1_{i}"; CompositeCell.FreeText $"FT2_{i}"; CompositeCell.FreeText $"Sample_{i}"; |]) - let testF = fun () -> table.AddRows(rows) - #if FABLE_COMPILER_PYTHON - let expectedMs = 1000 - #else - let expectedMs = 100 - #endif - Expect.wantFaster testF expectedMs $"AddRows is too slow." |> ignore - ) ] let private tests_UpdateRefWithSheet = @@ -2316,34 +2268,9 @@ let private tests_equality = testList "equality" [ ] -let private tests_fillMissing = testList "fillMissing" [ - testCase "performance" <| fun _ -> - - let headers = ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample] - let values = System.Collections.Generic.Dictionary() - for i = 0 to 20000 do - if i%2 = 0 then - ArcTableAux.Unchecked.setCellAt(0,i,(CompositeCell.FreeText $"Source_{i}")) values - ArcTableAux.Unchecked.setCellAt(1,i,(CompositeCell.FreeText $"FT1_{i}")) values - ArcTableAux.Unchecked.setCellAt(2,i,(CompositeCell.FreeText $"FT2_{i}")) values - ArcTableAux.Unchecked.setCellAt(3,i,(CompositeCell.FreeText $"FT3_{i}")) values - ArcTableAux.Unchecked.setCellAt(6,i,(CompositeCell.FreeText $"Sample_{i}")) values - else - ArcTableAux.Unchecked.setCellAt(0,i,(CompositeCell.FreeText $"Source_{i}")) values - ArcTableAux.Unchecked.setCellAt(3,i,(CompositeCell.FreeText $"FT3_{i}")) values - ArcTableAux.Unchecked.setCellAt(4,i,(CompositeCell.FreeText $"FT4_{i}")) values - ArcTableAux.Unchecked.setCellAt(5,i,(CompositeCell.FreeText $"FT5_{i}")) values - ArcTableAux.Unchecked.setCellAt(6,i,(CompositeCell.FreeText $"Sample_{i}")) values - let testF = fun () -> ArcTableAux.Unchecked.fillMissingCells headers values - #if FABLE_COMPILER_PYTHON - let expectedMs = 10000 - #else - let expectedMs = 220 - #endif - //4800ms in python on i7-13800H - Expect.wantFaster testF expectedMs "fillMissing is too slow." |> ignore // 130ms in javascript, dotnet faster than 100ms - ] +//let private tests_fillMissing = testList "fillMissing" [ +// ] let main = testList "ArcTable" [ @@ -2368,5 +2295,5 @@ let main = tests_IterColumns tests_GetHashCode tests_equality - tests_fillMissing + //tests_fillMissing ] \ No newline at end of file diff --git a/tests/Speedtest/PerformanceReport.fs b/tests/Speedtest/PerformanceReport.fs new file mode 100644 index 00000000..cf71e576 --- /dev/null +++ b/tests/Speedtest/PerformanceReport.fs @@ -0,0 +1,223 @@ +module PerformanceReport + +open ARCtrl.ISA +open ARCtrl.ISA.Json +open ARCtrl.ISA.Spreadsheet +open Fable.Core + +#if FABLE_COMPILER_JAVASCRIPT +open Node.Api +#endif + +#if FABLE_COMPILER_PYTHON +open Fable.Python.Builtins +#endif + +let writeFile (path : string) (content : string) = + #if FABLE_COMPILER_JAVASCRIPT + fs.writeFileSync(path,content) + #endif + #if FABLE_COMPILER_PYTHON + let file = builtins.``open``(path, OpenTextMode.Write) + file.write(content) |> ignore + #endif + #if !FABLE_COMPILER + System.IO.File.WriteAllText(path,content) + #endif + + //fs.writeFile(path,content) + +type PerformanceTest = + { + Name : string + Description : string + Test : unit -> unit + Time : int option + } + + member this.Run() = + printfn "%A: Running test: %s" System.DateTime.Now this.Name + let time = TestingUtils.Stopwatch.measure(this.Test) |> int + printfn "\tC%A: Completeted in %ims" System.DateTime.Now time + {this with Time = Some time} + + static member create name description test = {Name = name; Description = description; Test = test; Time = None} + +type PerformanceReport = + { + CPU : string + Lang : string + Tests : PerformanceTest list + } + + static member create cpu lang tests = {CPU = cpu; Lang = lang; Tests = tests} + + member this.RunTests() = + {this with Tests = this.Tests |> List.map (fun t -> t.Run())} + + member this.ToMarkdown() = + let header = $"| Name | Description | CPU | {this.Lang} Time (ms) |" + let separator = $"| --- | --- | --- | --- |" + let tests = this.Tests |> List.map (fun t -> $"| {t.Name} | {t.Description} | {this.CPU} | {t.Time.Value} |") + String.concat "\n" [header; separator; tests |> String.concat "\n"] + + +let table_GetHashCode = + let testTable = ArcTable.init("Test") + let values = Array.init 10000 (fun i -> CompositeCell.createFreeText (string i)) + testTable.AddColumn(CompositeHeader.FreeText "Header", values) + PerformanceTest.create + "Table_GetHashCode" + "From a table with 1 column and 10000 rows, retrieve the Hash Code" + (fun _ -> + testTable.GetHashCode() + |> ignore + ) + + +// // Commented this test out, as the behaviour is different in dotnet and js, but both implementations are very close together performance-wise +// //testCase "performance" (fun () -> +// // // Test, that for most cases (because of performance), setter should be used +// // let f1 = fun () -> +// // let table = ArcTable.init("Table") +// // for i = 0 to 10 do +// // table.Headers.Insert(i,CompositeHeader.FreeText $"Header_{i}") +// // for j = 0 to 5000 do +// // ArcTableAux.Unchecked.setCellAt(i,j,CompositeCell.createFreeText $"Cell_{i}_{j}") table.Values +// // let f2 = fun () -> +// // let table = ArcTable.init("Table") +// // for i = 0 to 10 do +// // table.Headers.Insert(i,CompositeHeader.FreeText $"Header_{i}") +// // for j = 0 to 5000 do +// // ArcTableAux.Unchecked.addCellAt(i,j,CompositeCell.createFreeText $"Cell_{i}_{j}") table.Values +// // Expect.isFasterThan f1 f2 "SetCell Implementation should be faster than reference" + + +let table_AddRows = + + let table = ArcTable("MyTable",ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample], System.Collections.Generic.Dictionary()) + let rows = + Array.init 10000 (fun i -> + [|CompositeCell.FreeText $"Source_{i}"; CompositeCell.FreeText $"FT1_{i}"; CompositeCell.FreeText $"FT2_{i}"; CompositeCell.FreeText $"Sample_{i}"; |]) + PerformanceTest.create + "Table_AddRows" + "Add 10000 rows to a table with 4 columns." + (fun _ -> + table.AddRows(rows) + |> ignore + ) + +let table_fillMissingCells = + let headers = ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample] + let values = System.Collections.Generic.Dictionary() + for i = 0 to 20000 do + if i%2 = 0 then + ArcTableAux.Unchecked.setCellAt(0,i,(CompositeCell.FreeText $"Source_{i}")) values + ArcTableAux.Unchecked.setCellAt(1,i,(CompositeCell.FreeText $"FT1_{i}")) values + ArcTableAux.Unchecked.setCellAt(2,i,(CompositeCell.FreeText $"FT2_{i}")) values + ArcTableAux.Unchecked.setCellAt(3,i,(CompositeCell.FreeText $"FT3_{i}")) values + ArcTableAux.Unchecked.setCellAt(6,i,(CompositeCell.FreeText $"Sample_{i}")) values + else + ArcTableAux.Unchecked.setCellAt(0,i,(CompositeCell.FreeText $"Source_{i}")) values + ArcTableAux.Unchecked.setCellAt(3,i,(CompositeCell.FreeText $"FT3_{i}")) values + ArcTableAux.Unchecked.setCellAt(4,i,(CompositeCell.FreeText $"FT4_{i}")) values + ArcTableAux.Unchecked.setCellAt(5,i,(CompositeCell.FreeText $"FT5_{i}")) values + ArcTableAux.Unchecked.setCellAt(6,i,(CompositeCell.FreeText $"Sample_{i}")) values + PerformanceTest.create + "Table_fillMissingCells" + "For a table 6 columns and 20000 rows, where each row has one missing value, fill those values with default values." + (fun () -> ArcTableAux.Unchecked.fillMissingCells headers values |> ignore) + +let table_toJson = + let t = TestObjects.Spreadsheet.Study.LargeFile.table + PerformanceTest.create + "Table_ToJson" + "Serialize a table with 5 columns and 10000 rows to json." + (fun _ -> t.ToJsonString() |> ignore) + +let table_toCompressedJson = + let t = TestObjects.Spreadsheet.Study.LargeFile.table + PerformanceTest.create + "Table_ToCompressedJson" + "Serialize a table with 5 columns and 10000 rows to compressed json." + (fun _ -> t.ToCompressedJsonString() |> ignore) + +let assay_toJson = + let a = ArcAssay.init("MyAssay") + let t = a.InitTable("MyTable") + t.AddColumn(CompositeHeader.Input IOType.Source) + t.AddColumn(CompositeHeader.Parameter (OntologyAnnotation.fromString("MyParameter1"))) + t.AddColumn(CompositeHeader.Parameter (OntologyAnnotation.fromString("MyParameter"))) + t.AddColumn(CompositeHeader.Parameter (OntologyAnnotation.fromString("MyParameter"))) + t.AddColumn(CompositeHeader.Characteristic (OntologyAnnotation.fromString("MyCharacteristic"))) + t.AddColumn(CompositeHeader.Output IOType.Sample) + let rowCount = 10000 + for i = 0 to rowCount - 1 do + let cells = + [| + CompositeCell.FreeText $"Source{i}" + CompositeCell.FreeText $"Parameter1_value" + CompositeCell.FreeText $"Parameter2_value" + CompositeCell.FreeText $"Parameter3_value{i - i % 10}" + CompositeCell.FreeText $"Characteristic_value" + CompositeCell.FreeText $"Sample{i}" + |] + for j = 0 to cells.Length - 1 do + t.Values.[(j,i)] <- cells.[j] + PerformanceTest.create + "Assay_toJson" + "Parse an assay with one table with 10000 rows and 6 columns to json" + (fun _ -> ArcAssay.toJsonString a |> ignore) + +let study_fromWorkbook = + let fswb = TestObjects.Spreadsheet.Study.LargeFile.Workbook + PerformanceTest.create + "Study_FromWorkbook" + "Parse a workbook with one study with 10000 rows and 6 columns to an ArcStudy" + (fun _ -> ArcStudy.fromFsWorkbook fswb |> ignore) + +let investigation_toWorkbook_ManyStudies = + let inv = ArcInvestigation.init("MyInvestigation") + for i = 0 to 1500 do + let s = ArcStudy.init($"Study{i}") + inv.AddRegisteredStudy(s) + PerformanceTest.create + "Investigation_ToWorkbook_ManyStudies" + "Parse an investigation with 1500 studies to a workbook" + (fun _ -> ArcInvestigation.toFsWorkbook inv |> ignore) + + +let allPerformanceTests = + [ + table_GetHashCode + table_AddRows + table_fillMissingCells + table_toJson + table_toCompressedJson + assay_toJson + study_fromWorkbook + investigation_toWorkbook_ManyStudies + ] + + +let createMarkdownPerformanceReport lang cpu = + let report = PerformanceReport.create cpu lang allPerformanceTests + report.RunTests().ToMarkdown() + +let lang = + #if FABLE_COMPILER_JAVASCRIPT + "JavaScript" + #endif + #if FABLE_COMPILER_PYTHON + "Python" + #endif + #if !FABLE_COMPILER + "FSharp" + #endif + +let runReport cpu = + let report = createMarkdownPerformanceReport lang cpu + let outFile = $"tests/Speedtest/PerformanceReport/PerformanceReport_{lang}.md" + writeFile outFile report + printfn "%s" report + 0 diff --git a/tests/Speedtest/PerformanceReport/PerformanceReport_Combined.md b/tests/Speedtest/PerformanceReport/PerformanceReport_Combined.md new file mode 100644 index 00000000..1b9a6684 --- /dev/null +++ b/tests/Speedtest/PerformanceReport/PerformanceReport_Combined.md @@ -0,0 +1,14 @@ +## 2024_15_03 + +Measured on 13th Gen Intel(R) Core(TM) i7-13800H + +| Name | Description | FSharp Time (ms) | JavaScript Time (ms) | Python Time (ms) | +| --- | --- | --- | --- | --- | +| Table_GetHashCode | From a table with 1 column and 10000 rows, retrieve the Hash Code | 5 | 21 | 226 | +| Table_AddRows | Add 10000 rows to a table with 4 columns. | 15 | 22 | 289 | +| Table_fillMissingCells | For a table 6 columns and 20000 rows, where each row has one missing value, fill those values with default values. | 49 | 108 | 4813 | +| Table_ToJson | Serialize a table with 5 columns and 10000 rows to json. | 1099 | 481 | 6833 | +| Table_ToCompressedJson | Serialize a table with 5 columns and 10000 rows to compressed json. | 261 | 2266 | 717334 | +| Assay_toJson | Parse an assay with one table with 10000 rows and 6 columns to json | 915 | 2459 | 28799 | +| Study_FromWorkbook | Parse a workbook with one study with 10000 rows and 6 columns to an ArcStudy | 97 | 87 | 1249 | +| Investigation_ToWorkbook_ManyStudies | Parse an investigation with 1500 studies to a workbook | 621 | 379 | 9974 | \ No newline at end of file diff --git a/tests/Speedtest/PerformanceReport/PerformanceReport_FSharp.md b/tests/Speedtest/PerformanceReport/PerformanceReport_FSharp.md new file mode 100644 index 00000000..ff21c915 --- /dev/null +++ b/tests/Speedtest/PerformanceReport/PerformanceReport_FSharp.md @@ -0,0 +1,10 @@ +| Name | Description | CPU | FSharp Time (ms) | +| --- | --- | --- | --- | +| Table_GetHashCode | From a table with 1 column and 10000 rows, retrieve the Hash Code | 13th Gen Intel(R) Core(TM) i7-13800H | 5 | +| Table_AddRows | Add 10000 rows to a table with 4 columns. | 13th Gen Intel(R) Core(TM) i7-13800H | 15 | +| Table_fillMissingCells | For a table 6 columns and 20000 rows, where each row has one missing value, fill those values with default values. | 13th Gen Intel(R) Core(TM) i7-13800H | 47 | +| Table_ToJson | Serialize a table with 5 columns and 10000 rows to json. | 13th Gen Intel(R) Core(TM) i7-13800H | 1298 | +| Table_ToCompressedJson | Serialize a table with 5 columns and 10000 rows to compressed json. | 13th Gen Intel(R) Core(TM) i7-13800H | 289 | +| Assay_toJson | Parse an assay with one table with 10000 rows and 6 columns to json | 13th Gen Intel(R) Core(TM) i7-13800H | 927 | +| Study_FromWorkbook | Parse a workbook with one study with 10000 rows and 6 columns to an ArcStudy | 13th Gen Intel(R) Core(TM) i7-13800H | 129 | +| Investigation_ToWorkbook_ManyStudies | Parse an investigation with 1500 studies to a workbook | 13th Gen Intel(R) Core(TM) i7-13800H | 556 | \ No newline at end of file diff --git a/tests/Speedtest/PerformanceReport/PerformanceReport_JavaScript.md b/tests/Speedtest/PerformanceReport/PerformanceReport_JavaScript.md new file mode 100644 index 00000000..d58be2db --- /dev/null +++ b/tests/Speedtest/PerformanceReport/PerformanceReport_JavaScript.md @@ -0,0 +1,10 @@ +| Name | Description | CPU | JavaScript Time (ms) | +| --- | --- | --- | --- | +| Table_GetHashCode | From a table with 1 column and 10000 rows, retrieve the Hash Code | 13th Gen Intel(R) Core(TM) i7-13800H | 11 | +| Table_AddRows | Add 10000 rows to a table with 4 columns. | 13th Gen Intel(R) Core(TM) i7-13800H | 23 | +| Table_fillMissingCells | For a table 6 columns and 20000 rows, where each row has one missing value, fill those values with default values. | 13th Gen Intel(R) Core(TM) i7-13800H | 111 | +| Table_ToJson | Serialize a table with 5 columns and 10000 rows to json. | 13th Gen Intel(R) Core(TM) i7-13800H | 521 | +| Table_ToCompressedJson | Serialize a table with 5 columns and 10000 rows to compressed json. | 13th Gen Intel(R) Core(TM) i7-13800H | 2368 | +| Assay_toJson | Parse an assay with one table with 10000 rows and 6 columns to json | 13th Gen Intel(R) Core(TM) i7-13800H | 1673 | +| Study_FromWorkbook | Parse a workbook with one study with 10000 rows and 6 columns to an ArcStudy | 13th Gen Intel(R) Core(TM) i7-13800H | 92 | +| Investigation_ToWorkbook_ManyStudies | Parse an investigation with 1500 studies to a workbook | 13th Gen Intel(R) Core(TM) i7-13800H | 428 | \ No newline at end of file diff --git a/tests/Speedtest/PerformanceReport/PerformanceReport_Python.md b/tests/Speedtest/PerformanceReport/PerformanceReport_Python.md new file mode 100644 index 00000000..61bf76b1 --- /dev/null +++ b/tests/Speedtest/PerformanceReport/PerformanceReport_Python.md @@ -0,0 +1,10 @@ +| Name | Description | CPU | Python Time (ms) | +| --- | --- | --- | --- | +| Table_GetHashCode | From a table with 1 column and 10000 rows, retrieve the Hash Code | 13th Gen Intel(R) Core(TM) i7-13800H | 200 | +| Table_AddRows | Add 10000 rows to a table with 4 columns. | 13th Gen Intel(R) Core(TM) i7-13800H | 322 | +| Table_fillMissingCells | For a table 6 columns and 20000 rows, where each row has one missing value, fill those values with default values. | 13th Gen Intel(R) Core(TM) i7-13800H | 4973 | +| Table_ToJson | Serialize a table with 5 columns and 10000 rows to json. | 13th Gen Intel(R) Core(TM) i7-13800H | 7866 | +| Table_ToCompressedJson | Serialize a table with 5 columns and 10000 rows to compressed json. | 13th Gen Intel(R) Core(TM) i7-13800H | 792683 | +| Assay_toJson | Parse an assay with one table with 10000 rows and 6 columns to json | 13th Gen Intel(R) Core(TM) i7-13800H | 28535 | +| Study_FromWorkbook | Parse a workbook with one study with 10000 rows and 6 columns to an ArcStudy | 13th Gen Intel(R) Core(TM) i7-13800H | 1755 | +| Investigation_ToWorkbook_ManyStudies | Parse an investigation with 1500 studies to a workbook | 13th Gen Intel(R) Core(TM) i7-13800H | 6138 | \ No newline at end of file diff --git a/tests/Speedtest/Program.fs b/tests/Speedtest/Program.fs index bbff80cc..0d55aec0 100644 --- a/tests/Speedtest/Program.fs +++ b/tests/Speedtest/Program.fs @@ -29,6 +29,7 @@ let main argv = FillMissing.newF t3 FillMissing.newSeqF t4 1 + #if !FABLE_COMPILER elif Array.contains "--bigJson" argv then let createAssay() = let a = ArcAssay.init("MyAssay") @@ -66,5 +67,13 @@ let main argv = |> toJson |> toFS 1 + #endif else - 0 + //let argumentNumber = + // #if FABLE_COMPILER_JAVASCRIPT + // 1 + // #else + // 0 + // #endif + let cpu = argv.[0] + PerformanceReport.runReport cpu \ No newline at end of file diff --git a/tests/Speedtest/Speedtest.fsproj b/tests/Speedtest/Speedtest.fsproj index e81ed5a4..16d21c1a 100644 --- a/tests/Speedtest/Speedtest.fsproj +++ b/tests/Speedtest/Speedtest.fsproj @@ -6,16 +6,26 @@ + + + + + + + + + + diff --git a/tests/TestingUtils/Library.fs b/tests/TestingUtils/Library.fs index 3240f2d8..80b76242 100644 --- a/tests/TestingUtils/Library.fs +++ b/tests/TestingUtils/Library.fs @@ -59,6 +59,13 @@ type Stopwatch() = | Some start, Some stop -> stop - start | _, _ -> failwith "Error. Unable to call `Elapsed` without calling `Start` and `Stop` before." + static member measure f = + let stopwatch = Stopwatch() + stopwatch.Start() + let res = f() + stopwatch.Stop() + stopwatch.Elapsed.TotalMilliseconds + /// Fable compatible Expecto/Mocha/Pyxpecto unification module Expect = open Utils diff --git a/tests/TestingUtils/TestingUtils.fsproj b/tests/TestingUtils/TestingUtils.fsproj index 8c6d504a..e5ee8fb3 100644 --- a/tests/TestingUtils/TestingUtils.fsproj +++ b/tests/TestingUtils/TestingUtils.fsproj @@ -24,6 +24,7 @@ +