nfdi4plants · kMutagene · Mar 2, 2024 · Feb 23, 2024 · Feb 23, 2024 · Feb 23, 2024
diff --git a/src/ARCTokenization/TopLevelParsers.fs b/src/ARCTokenization/TopLevelParsers.fs
@@ -9,11 +9,11 @@ module internal ISA =
 
     open System.IO
 
-    let tryParseMetadataSheetFromToken (rootPath:string) (isaCvTerm: CvTerm) (isaMdsParsingF: string -> IParam list) (refFileToken: IParam) =
+    let tryParseMetadataSheetFromToken (rootPath:string) (isaCvTerm: CvTerm) (isaMdsParsingF: string -> IParam list) (relFileToken: IParam) =
 
-        let cvpStr = Param.getValueAsString refFileToken
+        let cvpStr = Param.getValueAsString relFileToken
         let path = Path.Combine(rootPath, cvpStr)
-        let containsToken = refFileToken|> (fun x -> x.Name = isaCvTerm.Name) 
+        let containsToken = relFileToken|> (fun x -> x.Name = isaCvTerm.Name) 
 
         if containsToken then
             try 
@@ -22,12 +22,12 @@ module internal ISA =
                 None
         else None
 
-    let parseMetadataSheetsFromTokens (rootPath:string) (isaCvTerm: CvTerm) (isaMdsParsingF: string -> IParam list) (refFileTokens: #IParam seq) =
-        refFileTokens
+    let parseMetadataSheetsFromTokens (rootPath:string) (isaCvTerm: CvTerm) (isaMdsParsingF: string -> IParam list) (relFileTokens: #IParam seq) =
+        relFileTokens
         |> Seq.choose (fun token ->  tryParseMetadataSheetFromToken rootPath isaCvTerm isaMdsParsingF token)
 
-    let parseProcessGraphColumnsFromToken (rootPath:string) (refFileToken: IParam) =
-        let cvpStr = Param.getValueAsString refFileToken
+    let parseProcessGraphColumnsFromToken (rootPath:string) (relFileToken: IParam) =
+        let cvpStr = Param.getValueAsString relFileToken
         let path = System.IO.Path.Combine(rootPath, cvpStr)
         (FsWorkbook.fromXlsxFile path)
             .GetWorksheets()
@@ -42,16 +42,16 @@ module internal ISA =
             )
             |> Map.ofSeq
 
-    let parseProcessGraphColumnsFromTokens (rootPath:string) (isaCvTerm: CvTerm) (refFileTokens: #IParam seq) =
-        refFileTokens
-        |> Seq.choose (fun token ->  
-            match token.Name = isaCvTerm.Name with 
-            | true -> Some (parseProcessGraphColumnsFromToken rootPath token)
-            | false -> None
+    let parseProcessGraphColumnsFromTokens (rootPath:string) (isaCvTerm: CvTerm) (relFileTokens: #IParam seq) =
+        relFileTokens
+        |> Seq.choose (fun (token :#IParam) ->  
+            match token |> Param.equalsTerm isaCvTerm with 
+            | true  -> Some (parseProcessGraphColumnsFromToken rootPath token)
+            | false -> None 
         )
         |> fun x -> 
             match Seq.length x with
-            | 0 -> failwith "No token found"
+            | 0 -> Seq.empty
             | _ -> x 
 
 
@@ -103,7 +103,7 @@ type FileSystem =
 
     /// <summary>
     /// Returns all files in the given rootPath as a list of CvParams containing the annotated relative file paths.
-    ///
+    /// Uses the ARC file system structure to parse the files to tokens.
     /// Note that rootPath must be an absolute path ending with a trailing slash.
     /// </summary>
     /// <param name="rootPath">absolute path ending with a trailing slash</param>
@@ -144,10 +144,9 @@ type Investigation =
             |> List.concat
 
     /// <summary>
-    /// Returns a function that returns Some flat IParam list representing the investigation metadata if the given token contains a filepath with the standard investigation file name ("isa.investigation.xlsx") or None otherwise.
+    /// Returns a function that returns Some flat IParam list representing the investigation metadata if the given token contains an investigation file tied to an filepath with the standard investigation file name ("isa.investigation.xlsx") or None otherwise.
     /// </summary>
     /// <param name="UseLastSheetOnIncorrectName">Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook</param>
-    /// <param name="FileName">The name of the investigation file, note that this should not be set if the file follows spec (as "isa.investigation.xlsx" is the default)</param>
     static member tryParseMetadataSheetFromToken(
         ?UseLastSheetOnIncorrectName: bool
     ) =
@@ -162,13 +161,12 @@ type Investigation =
 
 
     /// <summary>
-    /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard investigation file name ("isa.investigation.xlsx")
-    /// in a given collection of tokens as a 2D list containing the individual Investigation metadata as a flat list of `IParam`s.
-    ///
-    /// if no tokens contain such a file path, the result will be an empty list.
+    /// Returns a function that parses all metadata sheets from all the tokens containing an `Investigation File' tied to an filepath with the 
+    /// standard investigation file name ("isa.investigation.xlsx") in a given collection of tokens as a 2D list containing the individual 
+    /// Investigation metadata as a flat list of `IParam`s.
+    /// If no tokens contain such a file path, the result will be an empty list.
     /// </summary>
     /// <param name="UseLastSheetOnIncorrectName">Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook</param>
-    /// <param name="FileName">The name of the investigation file, note that this should not be set if the file follows spec (as "isa.investigation.xlsx" is the default)</param>
     static member parseMetadataSheetsFromTokens(
         ?UseLastSheetOnIncorrectName: bool
     ) =
@@ -214,10 +212,9 @@ type Study =
             |> List.concat
 
     /// <summary>
-    /// Returns a function that returns Some flat IParam list representing the study metadata if the given token contains a filepath with the standard study file name ("isa.study.xlsx") or None otherwise.
+    /// Returns a function that returns Some flat IParam list representing the study metadata if the given token contains a 'Study File' tied to an filepath with the the standard study file name ("isa.study.xlsx") or None otherwise.
     /// </summary>
     /// <param name="UseLastSheetOnIncorrectName">Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook</param>
-    /// <param name="FileName">The name of the study file, note that this should not be set if the file follows spec (as "isa.study.xlsx" is the default)</param>
     static member tryParseMetadataSheetFromToken(
         ?UseLastSheetOnIncorrectName: bool
     ) =
@@ -233,13 +230,12 @@ type Study =
 
 
     /// <summary>
-    /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard study file name ("isa.study.xlsx")
+    /// Returns a function that parses all metadata sheets from all the tokens containing a 'Study File' with the standard study file name ("isa.study.xlsx")
     /// in a given collection of tokens as a 2D list containing the individual study metadata as a flat list of `IParam`s.
     ///
     /// if no tokens contain such a file path, the result will be an empty list.
     /// </summary>
     /// <param name="UseLastSheetOnIncorrectName">Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook</param>
-    /// <param name="FileName">The name of the study file, note that this should not be set if the file follows spec (as "isa.study.xlsx" is the default)</param>
     static member parseMetadataSheetsFromTokens(
         ?UseLastSheetOnIncorrectName: bool
     ) =
@@ -275,27 +271,27 @@ type Study =
             |> Map.ofSeq
 
     /// <summary>
-    /// Returns an annotation tables from an IParam if the given token contains a filepath with the standard study file name ("isa.study.xlsx").
-    /// Map of string * `IParam` 2D List representing the individual parts parts of the Process graph, 
-    /// where the string is the name of the worksheet that contained the table, 
-    /// and the 2D lists represent a single table in which the inner 1D lists represent a single column.
+    /// Returns an annotation table from an IParam if the given token is annotated with the term 'Study File'.
+    /// Returns a map of string * `IParam` 2D list representing the individual parts of the process graph.
+    /// The string is the name of the worksheet that contained the table, and the 2D lists represent a single table where the inner 1D lists represent a single column.
     /// </summary>
-    /// <param name="rootPath">ARC root path</param>
-    /// <param name="refFileToken">IParam of the ARC Tokens</param>
-    static member parseProcessGraphColumnsFromToken (rootPath:string) (refFileToken: IParam) =
-        ISA.parseProcessGraphColumnsFromToken rootPath refFileToken
+    /// <param name="rootPath">The root path of the ARC</param>
+    /// <param name="relFileToken">IParam that may be a relevant token</param>
+    /// <returns>A map of string * `IParam` 2D list representing the individual parts of the process graph</returns>
+    static member parseProcessGraphColumnsFromToken (rootPath:string) (relFileToken: IParam) =
+        ISA.parseProcessGraphColumnsFromToken rootPath relFileToken
 
 
     /// <summary>
-    /// Returns a seq of annotation tables from an IParam seq if the given tokens contains a filepath with the standard study file name ("isa.study.xlsx").
-    /// Map of string * `IParam` 2D List representing the individual parts parts of the Process graph, 
-    /// where the string is the name of the worksheet that contained the table, 
-    /// and the 2D lists represent a single table in which the inner 1D lists represent a single column.
+    /// Returns a seq of annotation tables from an IParam seq for each contained token that is annotated with the term 'Study File'.
+    /// Returns a map of string * `IParam` 2D list representing the individual parts of the process graph.
+    /// The string is the name of the worksheet that contained the table, and the 2D lists represent a single table where the inner 1D lists represent a single column.
     /// </summary>
-    /// <param name="rootPath">ARC root path</param>
-    /// <param name="refFileToken">IParam seq of the ARC Tokens</param>
-    static member parseProcessGraphColumnsFromTokens (rootPath:string) (refFileTokens: #IParam seq) =
-        ISA.parseProcessGraphColumnsFromTokens rootPath (StructuralOntology.AFSO.``Study File``) refFileTokens
+    /// <param name="rootPath">The root path of the ARC</param>
+    /// <param name="relFileTokens">A seq of IParams that may contain relevant tokens</param>
+    /// <returns>A Seq of maps of string * `IParam` 2D list representing the individual parts of the process graph</returns>
+    static member parseProcessGraphColumnsFromTokens (rootPath:string) (relFileTokens: #IParam seq) =
+        ISA.parseProcessGraphColumnsFromTokens rootPath (StructuralOntology.AFSO.``Study File``) relFileTokens
 
 type Assay =
 
@@ -329,7 +325,7 @@ type Assay =
             |> List.concat
 
     /// <summary>
-    /// Returns a function that returns Some flat IParam list representing the assay metadata if the given token contains a filepath with the standard assay file name ("isa.assay.xlsx") or None otherwise.
+    /// Returns a function that returns Some flat IParam list representing the assay metadata if the given token contains an 'Assay File' with the standard assay file name ("isa.assay.xlsx") or None otherwise.
     /// </summary>
     /// <param name="UseLastSheetOnIncorrectName">Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook</param>
     static member tryParseMetadataSheetFromToken(
@@ -346,7 +342,7 @@ type Assay =
 
 
     /// <summary>
-    /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard assay file name ("isa.assay.xlsx")
+    /// Returns a function that parses all metadata sheets from all the tokens containing an 'Assay File' with the standard assay file name ("isa.assay.xlsx")
     /// in a given collection of tokens as a 2D list containing the individual assay metadata as a flat list of `IParam`s.
     ///
     /// if no tokens contain such a file path, the result will be an empty list.
@@ -387,23 +383,23 @@ type Assay =
             |> Map.ofSeq
 
     /// <summary>
-    /// Returns an annotation tables from an IParam if the given token contains a filepath with the standard assay file name ("isa.assay.xlsx").
-    /// Map of string * `IParam` 2D List representing the individual parts parts of the Process graph, 
-    /// where the string is the name of the worksheet that contained the table, 
-    /// and the 2D lists represent a single table in which the inner 1D lists represent a single column.
-    /// </summary>
-    /// <param name="rootPath">ARC root path</param>
-    /// <param name="refFileToken">IParam of the ARC Tokens</param>
-    static member parseProcessGraphColumnsFromToken (rootPath:string) (refFileToken: IParam) =
-        ISA.parseProcessGraphColumnsFromToken rootPath refFileToken
-
-    /// <summary>
-    /// Returns a seq of annotation tables from an IParam seq if the given tokens contains a filepath with the standard assay file name ("isa.assay.xlsx").
-    /// Map of string * `IParam` 2D List representing the individual parts parts of the Process graph, 
-    /// where the string is the name of the worksheet that contained the table, 
-    /// and the 2D lists represent a single table in which the inner 1D lists represent a single column.
+    /// Returns an annotation table from an IParam if the given token is annotated with the term 'Assay File'.
+    /// Returns a map of string * `IParam` 2D list representing the individual parts of the process graph.
+    /// The string is the name of the worksheet that contained the table, and the 2D lists represent a single table where the inner 1D lists represent a single column.
     /// </summary>
-    /// <param name="rootPath">ARC root path</param>
-    /// <param name="refFileToken">IParam seq of the ARC Tokens</param>
-    static member parseProcessGraphColumnsFromTokens (rootPath:string) (refFileTokens: #IParam seq) =
-        ISA.parseProcessGraphColumnsFromTokens rootPath (StructuralOntology.AFSO.``Assay File``) refFileTokens
+    /// <param name="rootPath">The root path of the ARC</param>
+    /// <param name="relFileToken">IParam that may be a relevant token</param>
+    /// <returns>A map of string * `IParam` 2D list representing the individual parts of the process graph</returns>
+    static member parseProcessGraphColumnsFromToken (rootPath:string) (relFileToken: IParam) =
+        ISA.parseProcessGraphColumnsFromToken rootPath relFileToken
+
+/// <summary>
+/// Returns a seq of annotation tables from an IParam seq for each contained token that is annotated with the term 'Assay File'.
+/// Returns a map of string * `IParam` 2D list representing the individual parts of the process graph.
+/// The string is the name of the worksheet that contained the table, and the 2D lists represent a single table where the inner 1D lists represent a single column.
+/// </summary>
+/// <param name="rootPath">The root path of the ARC</param>
+/// <param name="relFileTokens">A seq of IParams that may contain relevant tokens</param>
+/// <returns>A Seq of maps of string * `IParam` 2D list representing the individual parts of the process graph</returns>
+    static member parseProcessGraphColumnsFromTokens (rootPath:string) (relFileTokens: #IParam seq) =
+        ISA.parseProcessGraphColumnsFromTokens rootPath (StructuralOntology.AFSO.``Assay File``) relFileTokens