diff --git a/src/ARCTokenization/ARCTokenization.fsproj b/src/ARCTokenization/ARCTokenization.fsproj index d6c79b2..b44b1dd 100644 --- a/src/ARCTokenization/ARCTokenization.fsproj +++ b/src/ARCTokenization/ARCTokenization.fsproj @@ -12,6 +12,10 @@ snupkg + + + + @@ -34,6 +38,7 @@ + diff --git a/src/ARCTokenization/FileSystem.fs b/src/ARCTokenization/FileSystem.fs new file mode 100644 index 0000000..1ecf7bf --- /dev/null +++ b/src/ARCTokenization/FileSystem.fs @@ -0,0 +1,54 @@ +namespace ARCTokenization + +open ControlledVocabulary +open FSharpAux +open FsSpreadsheet +open ARCTokenization.Terms +open ARCTokenization.StructuralOntology + +open System.IO +open System +open ControlledVocabulary + +module internal FS = + + let tokenizeRelativeDirectoryPaths (rootPath:string) = + let root = System.Uri(rootPath) + seq { + for dir in Directory.EnumerateDirectories(rootPath, "*", SearchOption.AllDirectories) do + let currentUri = System.Uri(dir) + yield CvParam( + cvTerm = AFSO.``Directory Path``, + v = root.MakeRelativeUri(currentUri).ToString() + ) + } + + let tokenizeAbsoluteDirectoryPaths (rootPath:string) = + seq { + for dir in Directory.EnumerateDirectories(rootPath, "*", SearchOption.AllDirectories) do + yield CvParam( + cvTerm = AFSO.``Directory Path``, + v = dir.Replace("\\","/") + ) + } + + + let tokenizeRelativeFilePaths (rootPath:string) = + let root = System.Uri(rootPath) + seq { + for file in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories) do + let currentFileUri = System.Uri(file) + yield CvParam( + cvTerm = AFSO.``File Path``, + v = root.MakeRelativeUri(currentFileUri).ToString() + ) + } + + let tokenizeAbsoluteFilePaths (rootPath:string) = + seq { + for file in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories) do + yield CvParam( + cvTerm = AFSO.``File Path``, + v = file.Replace("\\","/") + ) + } \ No newline at end of file diff --git a/src/ARCTokenization/Tokenization.fs b/src/ARCTokenization/Tokenization.fs index 456d526..9b855c1 100644 --- a/src/ARCTokenization/Tokenization.fs +++ b/src/ARCTokenization/Tokenization.fs @@ -7,7 +7,7 @@ open ARCTokenization.Terms module Tokenization = - let convertTokens (keyParser: IParam list -> string -> (ParamValue -> IParam)) (line : FsCell seq) = + let convertMetadataTokens (keyParser: IParam list -> string -> (ParamValue -> IParam)) (line : FsCell seq) = match line |> Seq.toList with | [] -> failwith "Cannot convert nothin" | key :: [] -> diff --git a/src/ARCTokenization/TopLevelParsers.fs b/src/ARCTokenization/TopLevelParsers.fs index 91d2602..b73e421 100644 --- a/src/ARCTokenization/TopLevelParsers.fs +++ b/src/ARCTokenization/TopLevelParsers.fs @@ -5,6 +5,52 @@ open FSharpAux open FsSpreadsheet open FsSpreadsheet.ExcelIO +type FileSystem = + + /// + /// Returns all directories in the given rootPath as a list of CvParams containing the annotated absolute directory paths. + /// + /// Note that rootPath must be an absolute path ending with a trailing slash. + /// + /// absolute path ending with a trailing slash + static member parseAbsoluteDirectoryPaths( + rootPath:string + ) = + FS.tokenizeAbsoluteDirectoryPaths rootPath + + /// + /// Returns all files in the given rootPath as a list of CvParams containing the annotated absolute file paths. + /// + /// Note that rootPath must be an absolute path ending with a trailing slash. + /// + /// absolute path ending with a trailing slash + static member parseAbsoluteFilePaths( + rootPath:string + ) = + FS.tokenizeAbsoluteFilePaths rootPath + + /// + /// Returns all directories in the given rootPath as a list of CvParams containing the annotated relative directory paths. + /// + /// Note that rootPath must be an absolute path ending with a trailing slash. + /// + /// absolute path ending with a trailing slash + static member parseRelativeDirectoryPaths( + rootPath:string + ) = + FS.tokenizeRelativeDirectoryPaths rootPath + + /// + /// Returns all files in the given rootPath as a list of CvParams containing the annotated relative file paths. + /// + /// Note that rootPath must be an absolute path ending with a trailing slash. + /// + /// absolute path ending with a trailing slash + static member parseRelativeFilePaths( + rootPath:string + ) = + FS.tokenizeRelativeFilePaths rootPath + type Investigation = /// @@ -21,7 +67,7 @@ type Investigation = FsWorkbook.fromXlsxFile path |> Workbook.getInvestigationMetadataSheet useLastSheetOnIncorrectName - |> Worksheet.parseRowsWith (Tokenization.convertTokens MetadataSheet.parseInvestigationKey) + |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseInvestigationKey) /// /// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s. @@ -54,7 +100,7 @@ type Study = FsWorkbook.fromXlsxFile path |> Workbook.getStudyMetadataSheet useLastSheetOnIncorrectName - |> Worksheet.parseRowsWith (Tokenization.convertTokens MetadataSheet.parseStudyKey) + |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseStudyKey) /// /// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s. @@ -95,7 +141,7 @@ type Assay = FsWorkbook.fromXlsxFile path |> Workbook.getAssayMetadataSheet useLastSheetOnIncorrectName - |> Worksheet.parseRowsWith (Tokenization.convertTokens MetadataSheet.parseAssayKey) + |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseAssayKey) /// /// Parses the metadata sheet from an ISA Assay XLSX file as a flat list of `IParam`s. diff --git a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj index 308ee53..9f588d5 100644 --- a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj +++ b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj @@ -7,7 +7,7 @@ - + @@ -36,7 +36,12 @@ + + + + + diff --git a/tests/ARCTokenization.Tests/Fixtures/testPaths/1/1_1/.gitkeep b/tests/ARCTokenization.Tests/Fixtures/testPaths/1/1_1/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_1/.gitkeep b/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_1/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_2/2_2_1/.gitkeep b/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_2/2_2_1/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/ARCTokenization.Tests/ReferenceObjects.fs b/tests/ARCTokenization.Tests/ReferenceObjects.fs index 4bd1373..c16956c 100644 --- a/tests/ARCTokenization.Tests/ReferenceObjects.fs +++ b/tests/ARCTokenization.Tests/ReferenceObjects.fs @@ -96,7 +96,7 @@ module Tokenization = lmaoooo", ParamValue.Value 1) ] - module ConvertTokens = + module ConvertMetadataTokens = let referenceTerms = [ CvTerm.create(accession = "1", name = "ReferenceTerm1", ref = "1") @@ -150,4 +150,66 @@ module Tokenization = UserParam("fk u lmaooooo", ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey) UserParam("fk u lmaooooo", ParamValue.Value "some value") UserParam("fk u lmaooooo", ParamValue.Value "another value") - ] \ No newline at end of file + ] + + module FileSystem = + + let referenceRelativeDirectoryPaths = + [ + @"1" + @"2" + @"1/1_1" + @"2/2_1" + @"2/2_2" + @"2/2_2/2_2_1" + ] + |> List.map (fun v -> + CvParam( + cvTerm = CvTerm.create("AFSO:00000010","Directory Path","AFSO"), + v = v + ) + ) + + let referenceAbsoluteDirectoryPaths(root) = + [ + @"1" + @"2" + @"1/1_1" + @"2/2_1" + @"2/2_2" + @"2/2_2/2_2_1" + ] + |> List.map (fun f -> System.IO.Path.Combine(root, f)) + |> List.map (fun v -> + CvParam( + cvTerm = CvTerm.create("AFSO:00000010","Directory Path","AFSO"), + v = v.Replace("\\", "/") + ) + ) + + let referenceRelativeFilePaths = + [ + @"1/1_1/.gitkeep" + @"2/2_1/.gitkeep" + @"2/2_2/2_2_1/.gitkeep" + ] + |> List.map (fun v -> + CvParam( + cvTerm = CvTerm.create("AFSO:00000009","File Path","AFSO"), + v = v + ) + ) + + let referenceAbsoluteFilePaths(root) = + [ + @"1/1_1/.gitkeep" + @"2/2_1/.gitkeep" + @"2/2_2/2_2_1/.gitkeep" + ] + |> List.map (fun f -> System.IO.Path.Combine(root, f)) + |> List.map (fun v -> + CvParam( + cvTerm = CvTerm.create("AFSO:00000009","File Path","AFSO"), + v = v.Replace("\\", "/") + ) + ) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs b/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs index cff94fb..de52deb 100644 --- a/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs +++ b/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs @@ -4,6 +4,55 @@ open ControlledVocabulary open ARCTokenization open Xunit +module FileSystem = + + open ReferenceObjects.Tokenization.FileSystem + open System.IO + + let parsedRelativeDirectoryPaths = FS.tokenizeRelativeDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq + + [] + let ``Relative directory paths are tokenized correctly`` () = + let actual = parsedRelativeDirectoryPaths + let expected = referenceRelativeDirectoryPaths + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + + let parsedRelativeFilePaths = FS.tokenizeRelativeFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq + + [] + let ``Relative file paths are tokenized correctly`` () = + let actual = parsedRelativeFilePaths + let expected = referenceRelativeFilePaths + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + + let parsedAbsoluteDirectoryPaths = FS.tokenizeAbsoluteDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq + + [] + let ``Absolute directory paths are tokenized correctly`` () = + let actual = parsedAbsoluteDirectoryPaths + let expected = referenceAbsoluteDirectoryPaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/")) + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + + let parsedAbsoluteFilePaths = FS.tokenizeAbsoluteFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq + + [] + let ``Absolute file paths are tokenized correctly`` () = + let actual = parsedAbsoluteFilePaths + let expected = referenceAbsoluteFilePaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/")) + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + module ParseKeyWithTerms = open ReferenceObjects.Tokenization.KeyParser @@ -75,12 +124,12 @@ module ParseKeyWithTerms = fun (e, a) -> Assert.True(Param.equals e a) ) -module ConvertTokens = +module ConvertMetadataTokens = - open ReferenceObjects.Tokenization.ConvertTokens + open ReferenceObjects.Tokenization.ConvertMetadataTokens open FsSpreadsheet - let tokenizer : FsCell seq -> IParam list = Tokenization.convertTokens (MetadataSheet.parseKeyWithTerms referenceTerms) + let tokenizer : FsCell seq -> IParam list = Tokenization.convertMetadataTokens (MetadataSheet.parseKeyWithTerms referenceTerms) let parsedCvParams = tokenizer referenceRow @@ -148,4 +197,4 @@ module ConvertTokens = let ``UserParam row has metadata section key as value of first token`` () = let actual = parsedUserParams.[0] |> Param.getValueAsTerm let expected = Terms.StructuralTerms.metadataSectionKey - Assert.Equal(expected, actual) \ No newline at end of file + Assert.Equal(expected, actual)