diff --git a/src/ARCTokenization/ARCTokenization.fsproj b/src/ARCTokenization/ARCTokenization.fsproj
index d6c79b2..b44b1dd 100644
--- a/src/ARCTokenization/ARCTokenization.fsproj
+++ b/src/ARCTokenization/ARCTokenization.fsproj
@@ -12,6 +12,10 @@
snupkg
+
+
+
+
@@ -34,6 +38,7 @@
+
diff --git a/src/ARCTokenization/FileSystem.fs b/src/ARCTokenization/FileSystem.fs
new file mode 100644
index 0000000..1ecf7bf
--- /dev/null
+++ b/src/ARCTokenization/FileSystem.fs
@@ -0,0 +1,54 @@
+namespace ARCTokenization
+
+open ControlledVocabulary
+open FSharpAux
+open FsSpreadsheet
+open ARCTokenization.Terms
+open ARCTokenization.StructuralOntology
+
+open System.IO
+open System
+open ControlledVocabulary
+
+module internal FS =
+
+ let tokenizeRelativeDirectoryPaths (rootPath:string) =
+ let root = System.Uri(rootPath)
+ seq {
+ for dir in Directory.EnumerateDirectories(rootPath, "*", SearchOption.AllDirectories) do
+ let currentUri = System.Uri(dir)
+ yield CvParam(
+ cvTerm = AFSO.``Directory Path``,
+ v = root.MakeRelativeUri(currentUri).ToString()
+ )
+ }
+
+ let tokenizeAbsoluteDirectoryPaths (rootPath:string) =
+ seq {
+ for dir in Directory.EnumerateDirectories(rootPath, "*", SearchOption.AllDirectories) do
+ yield CvParam(
+ cvTerm = AFSO.``Directory Path``,
+ v = dir.Replace("\\","/")
+ )
+ }
+
+
+ let tokenizeRelativeFilePaths (rootPath:string) =
+ let root = System.Uri(rootPath)
+ seq {
+ for file in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories) do
+ let currentFileUri = System.Uri(file)
+ yield CvParam(
+ cvTerm = AFSO.``File Path``,
+ v = root.MakeRelativeUri(currentFileUri).ToString()
+ )
+ }
+
+ let tokenizeAbsoluteFilePaths (rootPath:string) =
+ seq {
+ for file in Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories) do
+ yield CvParam(
+ cvTerm = AFSO.``File Path``,
+ v = file.Replace("\\","/")
+ )
+ }
\ No newline at end of file
diff --git a/src/ARCTokenization/Tokenization.fs b/src/ARCTokenization/Tokenization.fs
index 456d526..9b855c1 100644
--- a/src/ARCTokenization/Tokenization.fs
+++ b/src/ARCTokenization/Tokenization.fs
@@ -7,7 +7,7 @@ open ARCTokenization.Terms
module Tokenization =
- let convertTokens (keyParser: IParam list -> string -> (ParamValue -> IParam)) (line : FsCell seq) =
+ let convertMetadataTokens (keyParser: IParam list -> string -> (ParamValue -> IParam)) (line : FsCell seq) =
match line |> Seq.toList with
| [] -> failwith "Cannot convert nothin"
| key :: [] ->
diff --git a/src/ARCTokenization/TopLevelParsers.fs b/src/ARCTokenization/TopLevelParsers.fs
index 91d2602..b73e421 100644
--- a/src/ARCTokenization/TopLevelParsers.fs
+++ b/src/ARCTokenization/TopLevelParsers.fs
@@ -5,6 +5,52 @@ open FSharpAux
open FsSpreadsheet
open FsSpreadsheet.ExcelIO
+type FileSystem =
+
+ ///
+ /// Returns all directories in the given rootPath as a list of CvParams containing the annotated absolute directory paths.
+ ///
+ /// Note that rootPath must be an absolute path ending with a trailing slash.
+ ///
+ /// absolute path ending with a trailing slash
+ static member parseAbsoluteDirectoryPaths(
+ rootPath:string
+ ) =
+ FS.tokenizeAbsoluteDirectoryPaths rootPath
+
+ ///
+ /// Returns all files in the given rootPath as a list of CvParams containing the annotated absolute file paths.
+ ///
+ /// Note that rootPath must be an absolute path ending with a trailing slash.
+ ///
+ /// absolute path ending with a trailing slash
+ static member parseAbsoluteFilePaths(
+ rootPath:string
+ ) =
+ FS.tokenizeAbsoluteFilePaths rootPath
+
+ ///
+ /// Returns all directories in the given rootPath as a list of CvParams containing the annotated relative directory paths.
+ ///
+ /// Note that rootPath must be an absolute path ending with a trailing slash.
+ ///
+ /// absolute path ending with a trailing slash
+ static member parseRelativeDirectoryPaths(
+ rootPath:string
+ ) =
+ FS.tokenizeRelativeDirectoryPaths rootPath
+
+ ///
+ /// Returns all files in the given rootPath as a list of CvParams containing the annotated relative file paths.
+ ///
+ /// Note that rootPath must be an absolute path ending with a trailing slash.
+ ///
+ /// absolute path ending with a trailing slash
+ static member parseRelativeFilePaths(
+ rootPath:string
+ ) =
+ FS.tokenizeRelativeFilePaths rootPath
+
type Investigation =
///
@@ -21,7 +67,7 @@ type Investigation =
FsWorkbook.fromXlsxFile path
|> Workbook.getInvestigationMetadataSheet useLastSheetOnIncorrectName
- |> Worksheet.parseRowsWith (Tokenization.convertTokens MetadataSheet.parseInvestigationKey)
+ |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseInvestigationKey)
///
/// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s.
@@ -54,7 +100,7 @@ type Study =
FsWorkbook.fromXlsxFile path
|> Workbook.getStudyMetadataSheet useLastSheetOnIncorrectName
- |> Worksheet.parseRowsWith (Tokenization.convertTokens MetadataSheet.parseStudyKey)
+ |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseStudyKey)
///
/// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s.
@@ -95,7 +141,7 @@ type Assay =
FsWorkbook.fromXlsxFile path
|> Workbook.getAssayMetadataSheet useLastSheetOnIncorrectName
- |> Worksheet.parseRowsWith (Tokenization.convertTokens MetadataSheet.parseAssayKey)
+ |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseAssayKey)
///
/// Parses the metadata sheet from an ISA Assay XLSX file as a flat list of `IParam`s.
diff --git a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj
index 308ee53..9f588d5 100644
--- a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj
+++ b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj
@@ -7,7 +7,7 @@
-
+
@@ -36,7 +36,12 @@
+
+
+
+
+
diff --git a/tests/ARCTokenization.Tests/Fixtures/testPaths/1/1_1/.gitkeep b/tests/ARCTokenization.Tests/Fixtures/testPaths/1/1_1/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_1/.gitkeep b/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_1/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_2/2_2_1/.gitkeep b/tests/ARCTokenization.Tests/Fixtures/testPaths/2/2_2/2_2_1/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/tests/ARCTokenization.Tests/ReferenceObjects.fs b/tests/ARCTokenization.Tests/ReferenceObjects.fs
index 4bd1373..c16956c 100644
--- a/tests/ARCTokenization.Tests/ReferenceObjects.fs
+++ b/tests/ARCTokenization.Tests/ReferenceObjects.fs
@@ -96,7 +96,7 @@ module Tokenization =
lmaoooo", ParamValue.Value 1)
]
- module ConvertTokens =
+ module ConvertMetadataTokens =
let referenceTerms = [
CvTerm.create(accession = "1", name = "ReferenceTerm1", ref = "1")
@@ -150,4 +150,66 @@ module Tokenization =
UserParam("fk u lmaooooo", ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey)
UserParam("fk u lmaooooo", ParamValue.Value "some value")
UserParam("fk u lmaooooo", ParamValue.Value "another value")
- ]
\ No newline at end of file
+ ]
+
+ module FileSystem =
+
+ let referenceRelativeDirectoryPaths =
+ [
+ @"1"
+ @"2"
+ @"1/1_1"
+ @"2/2_1"
+ @"2/2_2"
+ @"2/2_2/2_2_1"
+ ]
+ |> List.map (fun v ->
+ CvParam(
+ cvTerm = CvTerm.create("AFSO:00000010","Directory Path","AFSO"),
+ v = v
+ )
+ )
+
+ let referenceAbsoluteDirectoryPaths(root) =
+ [
+ @"1"
+ @"2"
+ @"1/1_1"
+ @"2/2_1"
+ @"2/2_2"
+ @"2/2_2/2_2_1"
+ ]
+ |> List.map (fun f -> System.IO.Path.Combine(root, f))
+ |> List.map (fun v ->
+ CvParam(
+ cvTerm = CvTerm.create("AFSO:00000010","Directory Path","AFSO"),
+ v = v.Replace("\\", "/")
+ )
+ )
+
+ let referenceRelativeFilePaths =
+ [
+ @"1/1_1/.gitkeep"
+ @"2/2_1/.gitkeep"
+ @"2/2_2/2_2_1/.gitkeep"
+ ]
+ |> List.map (fun v ->
+ CvParam(
+ cvTerm = CvTerm.create("AFSO:00000009","File Path","AFSO"),
+ v = v
+ )
+ )
+
+ let referenceAbsoluteFilePaths(root) =
+ [
+ @"1/1_1/.gitkeep"
+ @"2/2_1/.gitkeep"
+ @"2/2_2/2_2_1/.gitkeep"
+ ]
+ |> List.map (fun f -> System.IO.Path.Combine(root, f))
+ |> List.map (fun v ->
+ CvParam(
+ cvTerm = CvTerm.create("AFSO:00000009","File Path","AFSO"),
+ v = v.Replace("\\", "/")
+ )
+ )
\ No newline at end of file
diff --git a/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs b/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs
index cff94fb..de52deb 100644
--- a/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs
+++ b/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs
@@ -4,6 +4,55 @@ open ControlledVocabulary
open ARCTokenization
open Xunit
+module FileSystem =
+
+ open ReferenceObjects.Tokenization.FileSystem
+ open System.IO
+
+ let parsedRelativeDirectoryPaths = FS.tokenizeRelativeDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq
+
+ []
+ let ``Relative directory paths are tokenized correctly`` () =
+ let actual = parsedRelativeDirectoryPaths
+ let expected = referenceRelativeDirectoryPaths
+ Assert.All(
+ List.zip expected actual,
+ fun (e, a) -> Assert.True(e.Equals(a))
+ )
+
+ let parsedRelativeFilePaths = FS.tokenizeRelativeFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq
+
+ []
+ let ``Relative file paths are tokenized correctly`` () =
+ let actual = parsedRelativeFilePaths
+ let expected = referenceRelativeFilePaths
+ Assert.All(
+ List.zip expected actual,
+ fun (e, a) -> Assert.True(e.Equals(a))
+ )
+
+ let parsedAbsoluteDirectoryPaths = FS.tokenizeAbsoluteDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq
+
+ []
+ let ``Absolute directory paths are tokenized correctly`` () =
+ let actual = parsedAbsoluteDirectoryPaths
+ let expected = referenceAbsoluteDirectoryPaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/"))
+ Assert.All(
+ List.zip expected actual,
+ fun (e, a) -> Assert.True(e.Equals(a))
+ )
+
+ let parsedAbsoluteFilePaths = FS.tokenizeAbsoluteFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq
+
+ []
+ let ``Absolute file paths are tokenized correctly`` () =
+ let actual = parsedAbsoluteFilePaths
+ let expected = referenceAbsoluteFilePaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/"))
+ Assert.All(
+ List.zip expected actual,
+ fun (e, a) -> Assert.True(e.Equals(a))
+ )
+
module ParseKeyWithTerms =
open ReferenceObjects.Tokenization.KeyParser
@@ -75,12 +124,12 @@ module ParseKeyWithTerms =
fun (e, a) -> Assert.True(Param.equals e a)
)
-module ConvertTokens =
+module ConvertMetadataTokens =
- open ReferenceObjects.Tokenization.ConvertTokens
+ open ReferenceObjects.Tokenization.ConvertMetadataTokens
open FsSpreadsheet
- let tokenizer : FsCell seq -> IParam list = Tokenization.convertTokens (MetadataSheet.parseKeyWithTerms referenceTerms)
+ let tokenizer : FsCell seq -> IParam list = Tokenization.convertMetadataTokens (MetadataSheet.parseKeyWithTerms referenceTerms)
let parsedCvParams = tokenizer referenceRow
@@ -148,4 +197,4 @@ module ConvertTokens =
let ``UserParam row has metadata section key as value of first token`` () =
let actual = parsedUserParams.[0] |> Param.getValueAsTerm
let expected = Terms.StructuralTerms.metadataSectionKey
- Assert.Equal(expected, actual)
\ No newline at end of file
+ Assert.Equal(expected, actual)