From 4a1c478a8e771a891bb80849b4b24ee382ab6702 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 15 Apr 2024 16:08:58 -0500 Subject: [PATCH 01/10] make sure uri_html is blank so html is not loaded in rails adding this fix to base datura fixes https://github.com/whitmanarchive/whitman-issues/issues/598 --- lib/datura/to_es/pdf_to_es/fields.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/datura/to_es/pdf_to_es/fields.rb b/lib/datura/to_es/pdf_to_es/fields.rb index b8ce90f52..8a5269fd4 100644 --- a/lib/datura/to_es/pdf_to_es/fields.rb +++ b/lib/datura/to_es/pdf_to_es/fields.rb @@ -319,4 +319,7 @@ def has_relation def has_source end + def uri_html + end + end From e0bb62e7c340322e78edac0d0942e7f96178821c Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 15 Apr 2024 16:30:35 -0500 Subject: [PATCH 02/10] don't hardcode collection name for ead_to_es_items related to https://github.com/whitmanarchive/whitman-issues/issues/666 --- lib/datura/to_es/ead_to_es_items/fields.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datura/to_es/ead_to_es_items/fields.rb b/lib/datura/to_es/ead_to_es_items/fields.rb index 6eaf9c718..449b706e8 100644 --- a/lib/datura/to_es/ead_to_es_items/fields.rb +++ b/lib/datura/to_es/ead_to_es_items/fields.rb @@ -39,7 +39,7 @@ def creator_sort end def collection - "whitman-finding_aid_manuscripts" + "#{@options["collection"]}_items" end def collection_desc From 0bb4c079722effdb644ee193bc1ad01ca7bac8f4 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 5 Jun 2024 16:10:21 -0500 Subject: [PATCH 03/10] fix nested fields in vra that were making the schema not validate --- lib/datura/to_es/vra_to_es/fields.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/datura/to_es/vra_to_es/fields.rb b/lib/datura/to_es/vra_to_es/fields.rb index 0f62b94d0..3c3f6187c 100644 --- a/lib/datura/to_es/vra_to_es/fields.rb +++ b/lib/datura/to_es/vra_to_es/fields.rb @@ -116,16 +116,16 @@ def person # subject element if get_text("@type", xml: p) == "personalName" { - id: nil, - name: get_text(".", xml: p), - role: nil + "id" => nil, + "name" => get_text(".", xml: p), + "role" => nil } # agent element else { - id: nil, - name: get_text("name", xml: p), - role: get_text("role", xml: p) + "id" => nil, + "name" => get_text("name", xml: p), + "role" => get_text("role", xml: p) } end end From 0d393796eb76d5f6e2bb1ce722b43f1f84943f2f Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 22 Jul 2024 14:16:06 -0500 Subject: [PATCH 04/10] add keyword5 for default htmltoes overrides --- lib/datura/to_es/html_to_es/fields.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datura/to_es/html_to_es/fields.rb b/lib/datura/to_es/html_to_es/fields.rb index 725ac4c4d..261296ca7 100644 --- a/lib/datura/to_es/html_to_es/fields.rb +++ b/lib/datura/to_es/html_to_es/fields.rb @@ -278,7 +278,7 @@ def keywords4 get_text(@xpaths["keywords4"]) end - def keywords4 + def keywords5 get_text(@xpaths["keywords5"]) end From 7ae15837344821392d40f1d953c7a61fb4a74d57 Mon Sep 17 00:00:00 2001 From: wkdewey Date: Wed, 7 Aug 2024 17:06:56 -0500 Subject: [PATCH 05/10] check for nil values when variables depende on nokogiri methods --- lib/datura/to_es/ead_to_es/fields.rb | 8 ++++++-- lib/datura/to_es/ead_to_es_items/fields.rb | 14 +++++++++----- lib/datura/to_es/html_to_es/fields.rb | 4 +++- lib/datura/to_es/tei_to_es/fields.rb | 4 +++- .../to_es/tei_to_es/tei_to_es_personography.rb | 4 +++- lib/datura/to_es/vra_to_es/fields.rb | 8 ++++++-- .../to_es/vra_to_es/vra_to_es_personography.rb | 4 +++- lib/datura/to_es/webs_to_es/fields.rb | 4 +++- 8 files changed, 36 insertions(+), 14 deletions(-) diff --git a/lib/datura/to_es/ead_to_es/fields.rb b/lib/datura/to_es/ead_to_es/fields.rb index dfca070f7..95b89976e 100644 --- a/lib/datura/to_es/ead_to_es/fields.rb +++ b/lib/datura/to_es/ead_to_es/fields.rb @@ -77,7 +77,9 @@ def data_type def date(before=true) datestr = get_text(@xpaths["date"]) - return Datura::Helpers.date_standardize(datestr, before) + if datestr + return Datura::Helpers.date_standardize(datestr, before) + end end def date_display @@ -210,7 +212,9 @@ def text text = [] @xpaths.keys.each do |xpath| body = get_text(@xpaths[xpath]) - text << body + if body + text << body + end end text # TODO: do we need to preserve tags like in text? if so, turn get_text to true diff --git a/lib/datura/to_es/ead_to_es_items/fields.rb b/lib/datura/to_es/ead_to_es_items/fields.rb index 449b706e8..dda00bed1 100644 --- a/lib/datura/to_es/ead_to_es_items/fields.rb +++ b/lib/datura/to_es/ead_to_es_items/fields.rb @@ -104,9 +104,11 @@ def format def get_id # doc = id doc = get_text(@xpaths["identifier"]) - if doc == "" + if !doc title = get_text(@xpaths["file"]) - return "#{@filename}_#{title}" + if title + return "#{@filename}_#{title}" + end end return "#{@filename}_#{doc}" end @@ -203,8 +205,8 @@ def subjects end def subcategory - subcategory = get_text(@xpaths["subcategory"]) - subcategory.length > 0 ? subcategory : "none" + # subcategory = get_text(@xpaths["subcategory"]) + # subcategory.length > 0 ? subcategory : "none" end def text @@ -212,7 +214,9 @@ def text # means no worrying about handling spacing between words text = [] body = get_text(@xpaths["text"]) - text << body + if body + text << body + end # TODO: do we need to preserve tags like in text? if so, turn get_text to true # text << CommonXml.convert_tags_in_string(body) text += text_additional diff --git a/lib/datura/to_es/html_to_es/fields.rb b/lib/datura/to_es/html_to_es/fields.rb index 261296ca7..925809916 100644 --- a/lib/datura/to_es/html_to_es/fields.rb +++ b/lib/datura/to_es/html_to_es/fields.rb @@ -152,7 +152,9 @@ def text # means no worrying about handling spacing between words text = [] body = get_text(@xpaths["text"]) - text << body + if body + text << body + end text += text_additional Datura::Helpers.normalize_space(text.join(" ")) end diff --git a/lib/datura/to_es/tei_to_es/fields.rb b/lib/datura/to_es/tei_to_es/fields.rb index 9b435cb72..9fe9b7f96 100644 --- a/lib/datura/to_es/tei_to_es/fields.rb +++ b/lib/datura/to_es/tei_to_es/fields.rb @@ -205,7 +205,9 @@ def text # means no worrying about handling spacing between words text_all = [] body = get_text(@xpaths["text"], keep_tags: false, delimiter: '') - text_all << body + if body + text_all << body + end # TODO: do we need to preserve tags like in text? if so, turn get_text to true # text_all << CommonXml.convert_tags_in_string(body) text_all += text_additional diff --git a/lib/datura/to_es/tei_to_es/tei_to_es_personography.rb b/lib/datura/to_es/tei_to_es/tei_to_es_personography.rb index 7e4ff79be..4d3d43de2 100644 --- a/lib/datura/to_es/tei_to_es/tei_to_es_personography.rb +++ b/lib/datura/to_es/tei_to_es/tei_to_es_personography.rb @@ -16,7 +16,9 @@ def category def creator creators = get_list(@xpaths["creators"], false, @parent_xml) - creators.map { |c| { "name" => c } } + if creators + creators.map { |c| { "name" => c } } + end end def creators diff --git a/lib/datura/to_es/vra_to_es/fields.rb b/lib/datura/to_es/vra_to_es/fields.rb index 3c3f6187c..581d18f9c 100644 --- a/lib/datura/to_es/vra_to_es/fields.rb +++ b/lib/datura/to_es/vra_to_es/fields.rb @@ -52,7 +52,9 @@ def data_type def date(before=true) datestr = get_list(@xpaths["date"]).first - Datura::Helpers.date_standardize(datestr, before) + if datestr + Datura::Helpers.date_standardize(datestr, before) + end end def date_display @@ -191,7 +193,9 @@ def text # handling separate fields in array # means no worrying about handling spacing between words text_all = [] - text_all << get_text(@xpaths["text"]) + if get_text(@xpaths["text"]) + text_all << get_text(@xpaths["text"]) + end # TODO: do we need to preserve tags like in text? if so, turn get_text to true # text_all << CommonXml.convert_tags_in_string(body) text_all += text_additional diff --git a/lib/datura/to_es/vra_to_es/vra_to_es_personography.rb b/lib/datura/to_es/vra_to_es/vra_to_es_personography.rb index 8d8f904b1..cab0c5591 100644 --- a/lib/datura/to_es/vra_to_es/vra_to_es_personography.rb +++ b/lib/datura/to_es/vra_to_es/vra_to_es_personography.rb @@ -13,7 +13,9 @@ def category def creator creators = get_list(@xpaths["creators"], xml: @parent_xml) - creators.map { |c| { "name" => c } } + if creators + creators.map { |c| { "name" => c } } + end end def creator_sort diff --git a/lib/datura/to_es/webs_to_es/fields.rb b/lib/datura/to_es/webs_to_es/fields.rb index 90e91d70d..dbccc9f82 100644 --- a/lib/datura/to_es/webs_to_es/fields.rb +++ b/lib/datura/to_es/webs_to_es/fields.rb @@ -162,7 +162,9 @@ def text # means no worrying about handling spacing between words text = [] body = get_text(@xpaths["text"]) - text << body + if body + text << body + end text += text_additional Datura::Helpers.normalize_space(text.join(" ")) end From 5088c141bb71de257f6093548539a256aa32f559 Mon Sep 17 00:00:00 2001 From: wkdewey Date: Thu, 8 Aug 2024 13:54:16 -0500 Subject: [PATCH 06/10] add character limit and truncate text fields --- lib/config/public.yml | 24 +++++++++++----------- lib/datura/to_es/csv_to_es/fields.rb | 2 +- lib/datura/to_es/ead_to_es_items/fields.rb | 2 +- lib/datura/to_es/html_to_es/fields.rb | 2 +- lib/datura/to_es/pdf_to_es/fields.rb | 2 +- lib/datura/to_es/tei_to_es/fields.rb | 2 +- lib/datura/to_es/vra_to_es/fields.rb | 2 +- lib/datura/to_es/webs_to_es/fields.rb | 2 +- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/config/public.yml b/lib/config/public.yml index 11b42716c..7fefc6c85 100644 --- a/lib/config/public.yml +++ b/lib/config/public.yml @@ -9,22 +9,20 @@ # the collection specific configuration files: # (config/public.yml and config/private.yml) - ################### # Defaults # ################### default: - # SCRIPT POWER # recommend this be increased in private.yml # on more powerful systems to improve runtime threads: 5 # LOGGING - log_old_number: 1 # number of log files before beginning to erase - log_size: 32768000 # size of log file in bytes - log_level: Logger::INFO # available levels: UNKNOWN, FATAL, ERROR, WARN, INFO, DEBUG + log_old_number: 1 # number of log files before beginning to erase + log_size: 32768000 # size of log file in bytes + log_level: Logger::INFO # available levels: UNKNOWN, FATAL, ERROR, WARN, INFO, DEBUG # ELASTICSEARCH SCHEMA CONFIGURATION # if es_schema_override is false, datura is base directory @@ -40,14 +38,17 @@ default: api_version: "1.0" # NOTE: es_schema option is set later as combination of above # es_schema_override, es_schema_path, and api_version + # ES currently has a limited character size for keyword fields of 1000000 + # exceeding this limit (generally in text field) will cause errors when searching + text_limit: 900000 # RESOURCE LOCATIONS - data_base: https://cdrhmedia.unl.edu # xml, csv, html snippets, etc - media_base: https://cdrhmedia.unl.edu # images, audio, video - es_index: override_to_set_index # elasticsearch index name - es_path: http://localhost:9200 # elasticsearch path (recommend override) - solr_core: override_to_set_core # solr core name - solr_path: http://localhost:8983/solr # solr path (recommend override) + data_base: https://cdrhmedia.unl.edu # xml, csv, html snippets, etc + media_base: https://cdrhmedia.unl.edu # images, audio, video + es_index: override_to_set_index # elasticsearch index name + es_path: http://localhost:9200 # elasticsearch path (recommend override) + solr_core: override_to_set_core # solr core name + solr_path: http://localhost:8983/solr # solr path (recommend override) # OUTPUT LOCATION # default is [environment]/output/[file_type] @@ -92,7 +93,6 @@ default: development: data_base: https://cdrhdev1.unl.edu/media - ################## # Production # ################## diff --git a/lib/datura/to_es/csv_to_es/fields.rb b/lib/datura/to_es/csv_to_es/fields.rb index 9adc59692..616470280 100644 --- a/lib/datura/to_es/csv_to_es/fields.rb +++ b/lib/datura/to_es/csv_to_es/fields.rb @@ -187,7 +187,7 @@ def text text_all += text_additional text_all = text_all.compact - Datura::Helpers.normalize_space(text_all.join(" ")) + Datura::Helpers.normalize_space(text_all.join(" "))[0..@options["text_limit"]] end # override and add by collection as needed diff --git a/lib/datura/to_es/ead_to_es_items/fields.rb b/lib/datura/to_es/ead_to_es_items/fields.rb index dda00bed1..8a297ca91 100644 --- a/lib/datura/to_es/ead_to_es_items/fields.rb +++ b/lib/datura/to_es/ead_to_es_items/fields.rb @@ -220,7 +220,7 @@ def text # TODO: do we need to preserve tags like in text? if so, turn get_text to true # text << CommonXml.convert_tags_in_string(body) text += text_additional - return Datura::Helpers.normalize_space(text.join(" ")) + return Datura::Helpers.normalize_space(text.join(" "))[0..@options["text_limit"]] end def text_additional diff --git a/lib/datura/to_es/html_to_es/fields.rb b/lib/datura/to_es/html_to_es/fields.rb index 925809916..babe8adfc 100644 --- a/lib/datura/to_es/html_to_es/fields.rb +++ b/lib/datura/to_es/html_to_es/fields.rb @@ -156,7 +156,7 @@ def text text << body end text += text_additional - Datura::Helpers.normalize_space(text.join(" ")) + Datura::Helpers.normalize_space(text.join(" "))[0..@options["text_limit"]] end def text_additional diff --git a/lib/datura/to_es/pdf_to_es/fields.rb b/lib/datura/to_es/pdf_to_es/fields.rb index 8a5269fd4..1c3bfe4f2 100644 --- a/lib/datura/to_es/pdf_to_es/fields.rb +++ b/lib/datura/to_es/pdf_to_es/fields.rb @@ -187,7 +187,7 @@ def text end text_all += text_additional text_all = text_all.compact - Datura::Helpers.normalize_space(text_all.join(" "))[0..999999] + Datura::Helpers.normalize_space(text_all.join(" "))[0..@options["text_limit"]] end # override and add by collection as needed diff --git a/lib/datura/to_es/tei_to_es/fields.rb b/lib/datura/to_es/tei_to_es/fields.rb index 9fe9b7f96..960fd1929 100644 --- a/lib/datura/to_es/tei_to_es/fields.rb +++ b/lib/datura/to_es/tei_to_es/fields.rb @@ -211,7 +211,7 @@ def text # TODO: do we need to preserve tags like in text? if so, turn get_text to true # text_all << CommonXml.convert_tags_in_string(body) text_all += text_additional - Datura::Helpers.normalize_space(text_all.join(" ")) + Datura::Helpers.normalize_space(text_all.join(" "))[0..@options["text_limit"]] end def text_additional diff --git a/lib/datura/to_es/vra_to_es/fields.rb b/lib/datura/to_es/vra_to_es/fields.rb index 581d18f9c..bdd3c4fd3 100644 --- a/lib/datura/to_es/vra_to_es/fields.rb +++ b/lib/datura/to_es/vra_to_es/fields.rb @@ -199,7 +199,7 @@ def text # TODO: do we need to preserve tags like in text? if so, turn get_text to true # text_all << CommonXml.convert_tags_in_string(body) text_all += text_additional - Datura::Helpers.normalize_space(text_all.join(" ")) + Datura::Helpers.normalize_space(text_all.join(" "))[0..@options["text_limit"]] end def text_additional diff --git a/lib/datura/to_es/webs_to_es/fields.rb b/lib/datura/to_es/webs_to_es/fields.rb index dbccc9f82..8721e4462 100644 --- a/lib/datura/to_es/webs_to_es/fields.rb +++ b/lib/datura/to_es/webs_to_es/fields.rb @@ -166,7 +166,7 @@ def text text << body end text += text_additional - Datura::Helpers.normalize_space(text.join(" ")) + Datura::Helpers.normalize_space(text.join(" "))[0..@options["text_limit"]] end def text_additional From 10e45d0ee91bf1e63fd7178e5c7eb04fc332e4be Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 21 Aug 2024 16:11:06 -0500 Subject: [PATCH 07/10] update changelog --- CHANGELOG.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8861fe0f5..c760f99b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,12 +34,12 @@ Versioning](https://semver.org/spec/v2.0.0.html). - documentation for adding new ingest formats to Datura - byebug gem for debugging - instructions for installing Javascript Runtime files for Saxon -- API schema can either be 1.0 or 2.0 (which includes nested fields); 1.0 will be run by default unless 2.0 is specified. Add the following to `public.yml` or `private.yml` in the data repo: +- API schema can either be the original 1.0 or the newly updated 2.0 (which includes new fields including nested fields); 1.0 will be run by default unless 2.0 is specified. Add the following to `public.yml` or `private.yml` in the data repo: ``` api_version: '2.0' ``` See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/schema_v2.md) -- schema validation with API version 2.0, invalidly constructed documents will not post +- schema validation with API version 2.0: invalidly constructed documents will not post - authentication with Elasticesarch 8.5; add the following to `public.yml` or `private.yml` in the data repo: ``` es_user: username @@ -47,24 +47,32 @@ See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/sc ``` - field overrides for new fields in the new API schema - functionality to transform EAD files and post them to elasticsearch +- functionality to transform PDF files (including text and metadata) and post them to elasticsearch +- limiting `text` field to a specific limit: `text_field` in `public.yml` or `private.yml` +- configuration options related to Elasticsearch, including `text_limit` and `es_schema_override` and `es_schema_path` to change the location of the Elasticsearch schema +- more detailed errors including a stack trace ### Changed - update ruby to 3.1.2 - date_standardize now relies on strftime instead of manual zero padding for month, day - minor corrections to documentation - XPath: "text" is now ingested as an array and will be displayed delimitted by spaces +- "text" field now includes "notes" XPath +- refactored posting script (`Datura.run`) - refactored command line methods into elasticsearch library - refactored and moved date_standardize and date_display helper methods -- Nokogiri methods `get_text` and `get_list` on TEI now return nil rather than empty strings or arrays if there are no matches +- Nokogiri methods `get_text` and `get_list` on TEI now return nil rather than empty strings or arrays if there are no matches. fields have been changed to check for these nil values ### Migration - check to make sure "text" xpath is doing desired behavior - use Elasticsearch 8.5 or higher and add authentication as described above if security is enabled. See [dev docs instructions](https://github.com/CDRH/cdrh_dev_docs/blob/update_elasticsearch_documentation/publishing/2_basic_requirements.md#downloading-elasticsearch). - upgrade data repos to Ruby 3.1.2 +- - add api version to config as described above - make sure fields are consistent with the api schema, many have been renamed or changed in format -- add nil checks with get_text and get_list methods +- add nil checks with get_text and get_list methods as needed - add EadToES overrides if ingesting EAD files +- add `byebug` and `pdf-reader` to Gemfile in repos based on Datura - if overriding the `read_csv` method in `lib/datura/file_type.rb`, the hash must be prefixed with ** (`**{}`). ## [v0.2.0-beta](https://github.com/CDRH/datura/compare/v0.1.6...v0.2.0-beta) - 2020-08-17 - Altering field and xpath behavior, adds get_elements From 4fcb329cfaabb69198497755c403d6f47667b7d1 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 21 Aug 2024 16:18:26 -0500 Subject: [PATCH 08/10] add ead to list of possible formats --- lib/datura/parser_options/post.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datura/parser_options/post.rb b/lib/datura/parser_options/post.rb index c3dd1218d..b6e48880f 100644 --- a/lib/datura/parser_options/post.rb +++ b/lib/datura/parser_options/post.rb @@ -22,12 +22,12 @@ def self.post_params # default to no restricted format options["format"] = nil - opts.on( '-f', '--format [input]', 'Supported formats (csv, html, pdf, tei, vra, webs)') do |input| + opts.on( '-f', '--format [input]', 'Supported formats (csv, html, ead, pdf, tei, vra, webs)') do |input| if %w[authority annotations].include?(input) puts "'authority' and 'annotations' are invalid formats".red puts "Please select a supported format or rename your custom format" exit - elsif !%w[csv html pdf tei vra webs].include?(input) + elsif !%w[csv ead html pdf tei vra webs].include?(input) puts "Caution: Requested custom format #{input}.".red puts "See FileCustom class for implementation instructions" end From 22b2d3271e04d27d759f80d413a9d0d5b0419387 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 28 Aug 2024 09:40:46 -0500 Subject: [PATCH 09/10] fix incorrect field label and remove redundancy --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c760f99b7..81b874be9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,8 +48,8 @@ See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/sc - field overrides for new fields in the new API schema - functionality to transform EAD files and post them to elasticsearch - functionality to transform PDF files (including text and metadata) and post them to elasticsearch -- limiting `text` field to a specific limit: `text_field` in `public.yml` or `private.yml` -- configuration options related to Elasticsearch, including `text_limit` and `es_schema_override` and `es_schema_path` to change the location of the Elasticsearch schema +- limiting `text` field to a specific limit: `text_limit` in `public.yml` or `private.yml` +- configuration options related to Elasticsearch, including `es_schema_override` and `es_schema_path` to change the location of the Elasticsearch schema - more detailed errors including a stack trace ### Changed From 40393d8d4de8243a4876ed751d4b9d6c90844144 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 28 Aug 2024 09:41:07 -0500 Subject: [PATCH 10/10] revert accidental commenting of subcategory override --- lib/datura/to_es/ead_to_es_items/fields.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datura/to_es/ead_to_es_items/fields.rb b/lib/datura/to_es/ead_to_es_items/fields.rb index 8a297ca91..7764e083b 100644 --- a/lib/datura/to_es/ead_to_es_items/fields.rb +++ b/lib/datura/to_es/ead_to_es_items/fields.rb @@ -205,8 +205,8 @@ def subjects end def subcategory - # subcategory = get_text(@xpaths["subcategory"]) - # subcategory.length > 0 ? subcategory : "none" + subcategory = get_text(@xpaths["subcategory"]) + subcategory.length > 0 ? subcategory : "none" end def text