CDRH · wkdewey · Jul 17, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/Gemfile b/Gemfile
@@ -1,4 +1,5 @@
 source "https://rubygems.org"
 gem 'byebug'
-gem 'datura', git: 'https://github.com/CDRH/datura', branch: "release/v1.0.0"
-gem 'fileutils'
+gem 'datura', git: 'https://github.com/CDRH/datura', branch: "whitman-fixes"
+gem 'fileutils'
+gem "pdf-reader"
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,7 +1,7 @@
 GIT
   remote: https://github.com/CDRH/datura
-  revision: f286ab4d4a3510fe88dafa5f759f2cb5f0290ef2
-  branch: release/v1.0.0
+  revision: 0d393796eb76d5f6e2bb1ce722b43f1f84943f2f
+  branch: whitman-fixes
   specs:
     datura (0.2.0.pre.beta)
       colorize (~> 0.8.1)
@@ -11,38 +11,51 @@ GIT
 GEM
   remote: https://rubygems.org/
   specs:
+    Ascii85 (1.1.1)
+    afm (0.2.2)
+    bigdecimal (3.1.8)
     byebug (11.1.3)
     colorize (0.8.1)
-    domain_name (0.5.20190701)
-      unf (>= 0.0.5, < 1.0.0)
+    domain_name (0.6.20240107)
     fileutils (1.7.0)
+    hashery (2.1.2)
     http-accept (1.7.0)
-    http-cookie (1.0.5)
+    http-cookie (1.0.6)
       domain_name (~> 0.5)
-    mime-types (3.4.1)
+    mime-types (3.5.2)
       mime-types-data (~> 3.2015)
-    mime-types-data (3.2023.0218.1)
+    mime-types-data (3.2024.0702)
     netrc (0.11.0)
-    nokogiri (1.14.3-x86_64-darwin)
+    nokogiri (1.16.6-arm64-darwin)
       racc (~> 1.4)
-    racc (1.6.2)
+    nokogiri (1.16.6-x86_64-darwin)
+      racc (~> 1.4)
+    pdf-reader (2.12.0)
+      Ascii85 (~> 1.0)
+      afm (~> 0.2.1)
+      hashery (~> 2.0)
+      ruby-rc4
+      ttfunk
+    racc (1.8.0)
     rest-client (2.1.0)
       http-accept (>= 1.7.0, < 2.0)
       http-cookie (>= 1.0.2, < 2.0)
       mime-types (>= 1.16, < 4.0)
       netrc (~> 0.8)
-    unf (0.1.4)
-      unf_ext
-    unf_ext (0.0.8.2)
+    ruby-rc4 (0.1.5)
+    ttfunk (1.8.0)
+      bigdecimal (~> 3.1)
 
 PLATFORMS
+  arm64-darwin-23
   x86_64-darwin-19
   x86_64-darwin-22
 
 DEPENDENCIES
   byebug
   datura!
   fileutils
+  pdf-reader
 
 BUNDLED WITH
    2.4.3
diff --git a/scripts/overrides/csv_to_es.rb b/scripts/overrides/csv_to_es.rb
@@ -1,22 +1,26 @@
 class CsvToEs
 
   def assemble_collection_specific
+    # should be changed for baserow
     @json["count_k"] = rdf.select { |i| i["predicate"] != "sameAs" }.count.to_s
   end
 
   def get_id
-    "fig_" + @row["id"]
+    #test to make sure this works with baserow but it should
+    @row["id 2"]
   end
 
   def category
     "Religious figures"
   end
 
   def title
+    # should work for baserow
     @row["name"]
   end
 
   def date_not_before
+    #shuold work with baserow
     if @row["birth_date"] && !@row["birth_date"].empty?
       Datura::Helpers.date_standardize(@row["birth_date"], false)
     else
@@ -25,6 +29,7 @@ def date_not_before
   end
 
   def date_not_after
+    #should work with baserow
     if @row["death_date"] && !@row["death_date"].empty?
       Datura::Helpers.date_standardize(@row["death_date"], false)
     else
@@ -39,6 +44,7 @@ def date_display
   end
 
   def type
+    #should work with baserow
     @row["religious_tradition"]
   end
 
@@ -47,28 +53,31 @@ def type
   # end
 
   def rdf
+    #I think this needs to be constructed for baserow
     items = []
-    if @row["monasteries"]
+    if @row["Associated Monasteries"]
       # each monastery should be in the format id|role|associated_teaching|story
-      JSON.parse(@row["monasteries"]).each do |monastery|
-        monastery_data = monastery.split("|")
+      @row["Associated Monasteries"].split("\",\"").each do |monastery|
+        monastery_data = monastery.tr("\"", "").split("|")
         items << {
           "subject" => title, #name of the current figure
-          "predicate" => monastery_data[1], #role
-          "object" => monastery_data[0], #monastery id and name
-          "source" => monastery_data[2], #associated teaching
-          "note" => monastery_data[3] #story
+          "predicate" => monastery_data[2], #role
+          "object" => monastery_data[1], #monastery id and name
+          "source" => monastery_data[3], #associated teaching
+          "note" => monastery_data[4] #story
         }
       end
     end
     if relation
+      #this part should still work, although need to add the uri
       items << {
         "subject" => uri,
         "predicate" => "sameAs",
         "object" => "https://library.bdrc.io/show/bdr:#{relation}",
         "source" => "Buddhist Digital Resource Center",
         "note" => "link"
       }
+      #
       #TODO Treasury of Lives
       items << {
         "subject" => uri,
@@ -82,10 +91,12 @@ def rdf
   end
 
   def description
+    #same as baserow
     @row["description"]
   end
 
   def relation
+    #same as baserow
     @row["BDRC number"]
   end
 

diff --git a/scripts/overrides/csv_to_es_monasteries.rb b/scripts/overrides/csv_to_es_monasteries.rb
@@ -5,7 +5,8 @@ def assemble_collection_specific
   end
 
   def get_id
-    "mon_" + @row["id"]
+    #should work with baserow
+    @row["id 2"]
   end
 
   def category
@@ -22,9 +23,12 @@ def person
     # how to get the associated figures back in to here?
     # two-way relationships in Orchid and Elasticsearch
     # it should it least
+    # how to change for baserow? I'm not sure it is really different from the rdf field
+    # could record the figures somewhere
   end
 
   def date_not_before
+    #should work with baserow
     if @row["founding date"] && !@row["founding date"].empty?
       Datura::Helpers.date_standardize(@row["founding date"], false)
     end
@@ -38,24 +42,26 @@ def date_display
 
 
   def rdf
+    # need to construct a markdown type field
     items = []
-    if @row["figures"]
+    if @row["Associated Figures"]
       # each figure should be in the format id|role|associated_teaching|story
-      JSON.parse(@row["figures"]).each do |figure|
-        figure_data = figure.split("|")
+      @row["Associated Figures"].split("\",\"").each do |figure|
+        figure_data = figure.tr("\"", "").split("|")
         if figure_data[2] == "nan"
           figure_data[2] = nil
         end
         items << {
           "subject" => figure_data[0], #figure id and name
-          "predicate" => figure_data[1], #role
+          "predicate" => figure_data[2], #role
           "object" => title, #name of current monastery
-          "source" => figure_data[2], #associated teaching
-          "note" => figure_data[3] #story
+          "source" => figure_data[3], #associated teaching
+          "note" => figure_data[4] #story
         }
       end
     end
     if relation
+      #this should work in baserow but I need to figure out the uri part
       items << {
         "subject" => uri,
         "predicate" => "sameAs",

diff --git a/scripts/overrides/file_csv.rb b/scripts/overrides/file_csv.rb
@@ -26,7 +26,7 @@ def read_csv(file_location, encoding="utf-8")
 
   def row_to_es(headers, row, table)
     # process the cases and people tables with different overrides
-    puts "processing " + row["id"]
+    puts "processing " + row["id 2"]
     if table == "figures"
       CsvToEs.new(row, options, @csv, self.filename(false)).json
     elsif table == "monasteries"

diff --git a/source/csv/Figures.csv b/source/csv/Figures.csv