diff --git a/lib/datura/helpers.rb b/lib/datura/helpers.rb index 6e64557e2..bcc245fff 100644 --- a/lib/datura/helpers.rb +++ b/lib/datura/helpers.rb @@ -26,44 +26,7 @@ def self.date_display(date, nd_text="N.D.") def self.date_standardize(date, before=true) if date y, m, d = date.split(/-|\//) - if y && y.length == 4 - # use -1 to indicate that this will be the last possible - m_default = before ? "01" : "-1" - d_default = before ? "01" : "-1" - m = m_default if !m - d = d_default if !d - if Date.valid_date?(y.to_i, m.to_i, d.to_i) - date = Date.new(y.to_i, m.to_i, d.to_i) - date.strftime("%Y-%m-%d") - end - end - end - end - - # get_directory_files - # Note: do not end with / - # params: directory (string) - # returns: returns array of all files found ([] if none), - # returns nil if no directory by that name exists - def self.date_display(date, nd_text="N.D.") - date_hyphen = self.date_standardize(date) - if date_hyphen - y, m, d = date_hyphen.split("-").map { |s| s.to_i } - date_obj = Date.new(y, m, d) - date_obj.strftime("%B %-d, %Y") - else - nd_text - end - end - - # date_standardize - # automatically defaults to setting incomplete dates to the earliest - # date (2016-07 becomes 2016-07-01) but pass in "false" in order - # to set it to the latest available date - def self.date_standardize(date, before=true) - if date - y, m, d = date.split(/-|\//) - if y && y.length == 4 + if y && y.length == 4 && y.to_i.to_s == y # use -1 to indicate that this will be the last possible m_default = before ? "01" : "-1" d_default = before ? "01" : "-1" diff --git a/lib/datura/to_es/ead_to_es/fields.rb b/lib/datura/to_es/ead_to_es/fields.rb index 95b89976e..9ea1559a1 100644 --- a/lib/datura/to_es/ead_to_es/fields.rb +++ b/lib/datura/to_es/ead_to_es/fields.rb @@ -94,9 +94,6 @@ def date_not_before date(true) end - def date_updated - end - def description get_text(@xpaths["description"]) end @@ -309,16 +306,6 @@ def citation # nested end - def container_box - end - - def container_folder - end - - def abstract - get_text(@xpaths["abstract"]) - end - def keywords2 get_text(@xpaths["keywords2"]) end diff --git a/lib/datura/to_es/ead_to_es/xpaths.rb b/lib/datura/to_es/ead_to_es/xpaths.rb index 989d5122c..97212256f 100644 --- a/lib/datura/to_es/ead_to_es/xpaths.rb +++ b/lib/datura/to_es/ead_to_es/xpaths.rb @@ -4,23 +4,23 @@ class EadToEs < XmlToEs # the specific collection's TeiToEs file or create a new method # in that file which returns a different value def xpaths_list - { - "abstract" => "/ead/archdesc/did/abstract", - "creator" => ["/ead/archdesc/did/origination/persname", "/ead/eadheader/filedesc/titlestmt/creator"], - "date" => "/ead/eadheader/filedesc/publicationstmt/date", - "description" => "/ead/archdesc/scopecontent/p", - "formats" => "/ead/archdesc/did/physdesc/genreform", - "identifier" => "/ead/archdesc/did/unitid", - "language" => "/ead/eadheader/profiledesc/langusage/language", - "publisher" => "/ead/eadheader/filedesc/publicationstmt/publisher", - "repository_contact" => "/ead/archdesc/did/repository/address/*", - "rights" => "/ead/archdesc/descgrp/accessrestrict/p", - "rights_holder" => "/ead/archdesc/did/repository/corpname", - "source" => "/ead/archdesc/descgrp/prefercite/p", - "subjects" => "/ead/archdesc/controlaccess/*[not(name()='head')]", - "title" => "/ead/archdesc/did/unittitle", - "text" => "/ead/eadheader/filedesc/titlestmt/*", - "items" => "//*[@level='item']/did/unitid" - }.merge(override_xpaths) - end + { + "abstract" => "/ead/archdesc/did/abstract", + "creator" => ["/ead/archdesc/did/origination/persname", "/ead/eadheader/filedesc/titlestmt/creator"], + "date" => "/ead/eadheader/filedesc/publicationstmt/date", + "description" => "/ead/archdesc/scopecontent/p", + "formats" => "/ead/archdesc/did/physdesc/genreform", + "identifier" => "/ead/archdesc/did/unitid", + "language" => "/ead/eadheader/profiledesc/langusage/language", + "publisher" => "/ead/eadheader/filedesc/publicationstmt/publisher", + "repository_contact" => "/ead/archdesc/did/repository/address/*", + "rights" => "/ead/archdesc/descgrp/accessrestrict/p", + "rights_holder" => "/ead/archdesc/did/repository/corpname", + "source" => "/ead/archdesc/descgrp/prefercite/p", + "subjects" => "/ead/archdesc/controlaccess/*[not(name()='head')]", + "title" => "/ead/archdesc/did/unittitle", + "text" => "/ead/eadheader/filedesc/titlestmt/*", + "items" => "//*[@level='item']/did/unitid" + }.merge(override_xpaths) end +end diff --git a/lib/datura/to_es/html_to_es/fields.rb b/lib/datura/to_es/html_to_es/fields.rb index babe8adfc..9d40c3371 100644 --- a/lib/datura/to_es/html_to_es/fields.rb +++ b/lib/datura/to_es/html_to_es/fields.rb @@ -39,7 +39,9 @@ def data_type end def date(before=true) - datestr = get_list(@xpaths["date"]).first + if get_list(@xpaths["date"]) + datestr = get_list(@xpaths["date"]).first + end if datestr Datura::Helpers.date_standardize(datestr, true) end @@ -80,7 +82,9 @@ def format end def image_id - get_list(@xpaths["image_id"]).first + if get_list(@xpaths["image_id"]) + get_list(@xpaths["image_id"]).first + end end def keywords @@ -223,7 +227,9 @@ def works # new/moved fields for API 2.0 def cover_image - get_list(@xpaths["image_id"]).first + if @xpaths["image_id"] + get_list(@xpaths["image_id"]).first + end end def date_updated diff --git a/lib/datura/to_es/tei_to_es/fields.rb b/lib/datura/to_es/tei_to_es/fields.rb index 960fd1929..5fac96f5c 100644 --- a/lib/datura/to_es/tei_to_es/fields.rb +++ b/lib/datura/to_es/tei_to_es/fields.rb @@ -23,6 +23,8 @@ def creator creators = get_list(@xpaths["creator"]) if creators creators.map { |c| { "name" => Datura::Helpers.normalize_space(c) } } + else + [] end end @@ -359,12 +361,11 @@ def event # nested end - def rdf - # nested - end - def has_source # nested + { + "title" => source + } end def has_relation diff --git a/lib/datura/to_es/vra_to_es/fields.rb b/lib/datura/to_es/vra_to_es/fields.rb index bdd3c4fd3..d83666ea8 100644 --- a/lib/datura/to_es/vra_to_es/fields.rb +++ b/lib/datura/to_es/vra_to_es/fields.rb @@ -51,7 +51,9 @@ def data_type end def date(before=true) - datestr = get_list(@xpaths["date"]).first + if get_list(@xpaths["date"]) + datestr = get_list(@xpaths["date"]).first + end if datestr Datura::Helpers.date_standardize(datestr, before) end @@ -215,7 +217,9 @@ def title end def title_sort - Datura::Helpers.normalize_name(title) + if title + Datura::Helpers.normalize_name(title) + end end def topics @@ -265,7 +269,9 @@ def works # new/moved fields for API 2.0 def cover_image - get_list(@xpaths["image_id"]).first + if get_list(@xpaths["image_id"]) + get_list(@xpaths["image_id"]).first + end end def date_updated @@ -345,10 +351,6 @@ def next_item def event # nested end - - def rdf - # nested - end def has_source # nested diff --git a/lib/datura/to_es/webs_to_es/fields.rb b/lib/datura/to_es/webs_to_es/fields.rb index 8721e4462..83dc6b5ef 100644 --- a/lib/datura/to_es/webs_to_es/fields.rb +++ b/lib/datura/to_es/webs_to_es/fields.rb @@ -308,10 +308,6 @@ def next_item def event # nested end - - def rdf - # nested - end def has_source # nested diff --git a/test/csv_to_es_test.rb b/test/csv_to_es_test.rb index ba4cd35db..6f3d93d03 100644 --- a/test/csv_to_es_test.rb +++ b/test/csv_to_es_test.rb @@ -4,26 +4,28 @@ class CsvToEsTest < Minitest::Test def setup path = File.join($fixture_path, "csv", "testing.csv") - csv = CSV.read(path, headers: true) - + csv = CSV.read(path, **{ + encoding: "utf-8", + headers: true, + }) + $options["api_version"] = "2.0" @test1 = CsvToEs.new(csv[0], $options, csv) @test2 = CsvToEs.new(csv[1], $options, csv) end def test_assemble_json json = @test1.assemble_json - assert_equal 42, json.length + assert_equal 58, json.length assert_equal "test.001", json["identifier"] json = @test2.assemble_json - assert_equal 42, json.length + assert_equal 58, json.length assert_equal "test.002", json["identifier"] end def test_csv_to_es_fields json = @test1.assemble_json assert_equal "1887-01-01", json["date"] - contributors = [{"name"=>"Jessica Dussault"}, {"name"=>"Greg Tunink"}, {"name"=>"Karin Dalziel"}] assert_equal contributors, json["contributor"] diff --git a/test/es_index_test.rb b/test/es_index_test.rb index 5cee19c27..6cc1f088b 100644 --- a/test/es_index_test.rb +++ b/test/es_index_test.rb @@ -14,13 +14,12 @@ class Datura::ElasticsearchIndexTest < Minitest::Test # stub in get_schema so that we can test get_schema_mapping without # worrying about integration with actual index - class Datura::Elasticsearch::Index def get_schema - raw = File.read( - File.join( - File.expand_path(File.dirname(__FILE__)), - "fixtures/es_mapping_2.0.json" + raw = File.read( + File.join( + File.expand_path(File.dirname(__FILE__)), + "fixtures/es_mapping_2.0.json" ) ) JSON.parse(raw) @@ -44,9 +43,9 @@ def test_get_schema_mapping es = Datura::Elasticsearch::Index.new(@@options) es.get_schema_mapping assert es.schema_mapping["fields"] - assert_equal 46, es.schema_mapping["fields"].length + assert_equal 60, es.schema_mapping["fields"].length assert_equal( - /^.*_d$|^.*_i$|^.*_k$|^.*_n$|^.*_t$|^.*_t_en$|^.*_t_es$/, + /^(?:.*_d|.*_i|.*_k|.*_n|.*_t|.*_t_en|.*_t_es)$/, es.schema_mapping["dynamic"] ) end @@ -76,7 +75,7 @@ def test_valid_document? assert es.valid_document?({ "creator" => [ { - "subcategory" => "a", + "category2" => "a", "data_type" => "a", "keyword_k" => "a" } diff --git a/test/fixtures/es_mapping_2.0.json b/test/fixtures/es_mapping_2.0.json index f82189503..1071e0e14 100644 --- a/test/fixtures/es_mapping_2.0.json +++ b/test/fixtures/es_mapping_2.0.json @@ -1,343 +1,735 @@ { - "fake_index" : { - "mappings" : { - "_doc" : { - "dynamic_templates" : [ - { - "date_fields" : { - "match" : "*_d", - "mapping" : { - "format" : "yyyy-MM-dd||epoch_millis", - "type" : "date" - } + "fake_index": { + "mappings": { + "dynamic_templates": [ + { + "date_fields": { + "match": "*_d", + "mapping": { + "format": "yyyy-MM-dd||epoch_millis", + "type": "date" } - }, - { - "integer_fields" : { - "match" : "*_i", - "mapping" : { - "type" : "integer" - } + } + }, + { + "integer_fields": { + "match": "*_i", + "mapping": { + "type": "integer" } - }, - { - "keyword_fields" : { - "match" : "*_k", - "mapping" : { - "normalizer" : "keyword_normalized", - "type" : "keyword" - } + } + }, + { + "keyword_fields": { + "match": "*_k", + "mapping": { + "normalizer": "keyword_normalized", + "type": "keyword" } - }, - { - "nested_fields" : { - "match" : "*_n", - "mapping" : { - "type" : "nested" - } + } + }, + { + "nested_fields": { + "match": "*_n", + "mapping": { + "type": "nested" } - }, - { - "text_fields" : { - "match" : "*_t", - "mapping" : { - "analyzer" : "english", - "type" : "text" - } + } + }, + { + "text_fields": { + "match": "*_t", + "mapping": { + "analyzer": "english", + "type": "text" } - }, - { - "text_english" : { - "match" : "*_t_en", - "mapping" : { - "analyzer" : "english", - "type" : "text" - } + } + }, + { + "text_english": { + "match": "*_t_en", + "mapping": { + "analyzer": "english", + "type": "text" } - }, - { - "text_spanish" : { - "match" : "*_t_es", - "mapping" : { - "analyzer" : "spanish", - "type" : "text" - } + } + }, + { + "text_spanish": { + "match": "*_t_es", + "mapping": { + "analyzer": "spanish", + "type": "text" } } - ], - "properties" : { - "abstract" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "alternative" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "annotations_text" : { - "type" : "text", - "analyzer" : "english" - }, - "category" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "collection" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "collection_desc" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "contributor" : { - "type" : "nested", - "properties" : { - "id" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "name" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "role" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" + } + ], + "properties": { + "abstract": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "alternative": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "annotations_text": { + "type": "text", + "analyzer": "english" + }, + "category": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "category2": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "category3": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "category4": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "category5": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "citation": { + "type": "nested", + "properties": { + "date": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "issue": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } } + }, + "page_end": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "page_start": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "place": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "publisher": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "section": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title_a": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title_j": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title_m": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "volume": { + "type": "keyword", + "normalizer": "keyword_normalized" } - }, - "coverage-spatial" : { - "type" : "nested", - "properties" : { - "city" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "coordinates" : { - "type" : "geo_point" - }, - "country" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "county" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "id" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "place_name" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "postal_code" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "region" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "state" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "street" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - } + } + }, + "collection": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "collection_desc": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "container_box": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "container_folder": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "contributor": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" } - }, - "creator" : { - "type" : "nested", - "properties" : { - "id" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "name" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - } + } + }, + "count_k": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "cover_image": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "creator": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name": { + "type": "keyword", + "normalizer": "keyword_normalized" } - }, - "creator_sort" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "data_type" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "date" : { - "type" : "date", - "format" : "yyyy-MM-dd||epoch_millis" - }, - "date_display" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "date_not_after" : { - "type" : "date", - "format" : "yyyy-MM-dd||epoch_millis" - }, - "date_not_before" : { - "type" : "date", - "format" : "yyyy-MM-dd||epoch_millis" - }, - "description" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "extent" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "format" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "identifier" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "image_id" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "image_location" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "keywords" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "language" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "languages" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "medium" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "people" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "person" : { - "type" : "nested", - "properties" : { - "id" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "name" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "role" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - } + } + }, + "data_type": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "date": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "date_display": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "date_not_after": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "date_not_before": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "date_updated": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "description": { + "type": "text", + "analyzer": "english" + }, + "event": { + "type": "nested", + "properties": { + "agent": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "date_begin": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "date_end": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "factor": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "notes": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "product": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait1": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait2": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait3": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait4": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait5": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "type": { + "type": "keyword", + "normalizer": "keyword_normalized" } - }, - "places" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "publisher" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "recipient" : { - "type" : "nested", - "properties" : { - "id" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "name" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "role" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - } + } + }, + "extent": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "fig_location": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "format": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "has_part": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "has_relation": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "has_source": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "identifier": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "is_part_of": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "keywords": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "keywords2": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "keywords3": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "keywords4": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "keywords5": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "language": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "medium": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "next_item": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "notes": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "person": { + "type": "nested", + "properties": { + "age_category": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "birth_date": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "death_date": { + "type": "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "gender": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name_alternate": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name_given": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name_last": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name_previous": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "nationality": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "note": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "race": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "sex": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait1": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait2": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait3": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait4": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait5": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "places": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "previous_item": { + "type": "nested", + "properties": { + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "order": { + "type": "integer" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "rdf": { + "type": "nested", + "properties": { + "note": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "object": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "predicate": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "source": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "subject": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "type": { + "type": "keyword", + "normalizer": "keyword_normalized" + } + } + }, + "relation": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "rights": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "rights_holder": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "rights_uri": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "spatial": { + "type": "nested", + "properties": { + "city": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "coordinates": { + "type": "geo_point" + }, + "country": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "county": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "description": { + "type": "text", + "analyzer": "english" + }, + "id": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "name": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "note": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "postal_code": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "region": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "role": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "short_name": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "state": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "street": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "township": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait1": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait2": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait3": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait4": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "trait5": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "type": { + "type": "keyword", + "normalizer": "keyword_normalized" } - }, - "relation" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "rights" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "rights_holder" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "rights_uri" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "source" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "subcategory" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "subjects" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "text" : { - "type" : "text", - "analyzer" : "english" - }, - "title" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "title_sort" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "topics" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "type" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "uri" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "uri_data" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "uri_html" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" - }, - "works" : { - "type" : "keyword", - "normalizer" : "keyword_normalized" } + }, + "subjects": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "text": { + "type": "text", + "analyzer": "english" + }, + "title": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "title_sort": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "topics": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "type": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "uri": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "uri_data": { + "type": "keyword", + "normalizer": "keyword_normalized" + }, + "uri_html": { + "type": "keyword", + "normalizer": "keyword_normalized" } } } diff --git a/test/helpers_test.rb b/test/helpers_test.rb index 88d740ecf..0fa3ca13f 100644 --- a/test/helpers_test.rb +++ b/test/helpers_test.rb @@ -35,7 +35,7 @@ def test_date_standardize def test_get_directory_files # real directory files = Datura::Helpers.get_directory_files("#{File.dirname(__FILE__)}/fixtures") - assert_equal 6, files.length + assert_equal 7, files.length # not a real directory files = Datura::Helpers.get_directory_files("/fake") diff --git a/test/html_to_es_test.rb b/test/html_to_es_test.rb index e0f5b9281..d15dff342 100644 --- a/test/html_to_es_test.rb +++ b/test/html_to_es_test.rb @@ -7,7 +7,7 @@ def setup def test_assemble_json json = @test.assemble_json - assert_equal 42, json.length + assert_equal 58, json.length assert_equal "testing", json["identifier"] end diff --git a/test/tei_to_es_test.rb b/test/tei_to_es_test.rb index 19d59c9d1..bc510963a 100644 --- a/test/tei_to_es_test.rb +++ b/test/tei_to_es_test.rb @@ -10,7 +10,7 @@ def setup def test_assemble_json json = @neihardt.assemble_json - assert_equal 42, json.length + assert_equal 58, json.length assert_equal "nei.j4c.12.52", json["identifier"] end @@ -26,11 +26,11 @@ def test_fields creator = [{"name"=>"Neihardt, John Gneisenau, 1881-1973"}] assert_equal creator, neihardt["creator"] - contributor = [{"id"=>"lkw", "name"=>"Weakly, Laura K.", "role"=>""}, - {"id"=>"swa", "name"=>"Adrales, Samantha W.", "role"=>""}, - {"id"=>"az", "name"=>"Zeljkovic, Arman", "role"=>""}, - {"id"=>"ep", "name"=>"Pedigo, Erin", "role"=>""}, - {"id"=>"", "name"=>"Gossin, Pamela", "role"=>""} + contributor = [{"id"=>"lkw", "name"=>"Weakly, Laura K.", "role"=>nil}, + {"id"=>"swa", "name"=>"Adrales, Samantha W.", "role"=>nil}, + {"id"=>"az", "name"=>"Zeljkovic, Arman", "role"=>nil}, + {"id"=>"ep", "name"=>"Pedigo, Erin", "role"=>nil}, + {"id"=>nil, "name"=>"Gossin, Pamela", "role"=>nil} ] assert_equal contributor, neihardt["contributor"] @@ -40,18 +40,19 @@ def test_fields assert_equal "late 1865 (?)", whitman["date_display"] assert_equal "1865-12-31", whitman["date_not_after"] assert_equal "1865-07-01", whitman["date_not_before"] - # source - source = "Track and stable talk, Aberdeen, South Dakota, 1888-02-24" - assert_equal source, cody["source"] - - source = "Neihardt, John Gneisenau, 1881-1973, Letter from John G. Neihardt to Julius T. House, 1927-11-05" - assert_equal source, neihardt["source"] - - # NOTE: this document does have publisher information in the biblStruct and will - # need to override the default xpaths to obtain that information for the source field - source = "Walt Whitman, Walt Whitman to a Soldier, late 1865 (?)" - assert_equal source, whitman["source"] + # note that source has been replaced by has_source + # TeiToES at this point does not define default behavior for this field + # source = "Track and stable talk, Aberdeen, South Dakota, 1888-02-24" + # assert_equal source, cody["has_source"] + + # source = "Neihardt, John Gneisenau, 1881-1973, Letter from John G. Neihardt to Julius T. House, 1927-11-05" + # assert_equal source, neihardt["has_source"] + + # # NOTE: this document does have publisher information in the biblStruct and will + # # need to override the default xpaths to obtain that information for the source field + # source = "Walt Whitman, Walt Whitman to a Soldier, late 1865 (?)" + # assert_equal source, whitman["has_source"] end end diff --git a/test/test_helper.rb b/test/test_helper.rb index 77226fc69..85e0db7ba 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -7,7 +7,8 @@ "collection" => "test_collection", "data_base" => "cdrhtest.unl.edu/media", "environment" => "test", - "site_url" => "cdrhtest.unl.edu" + "site_url" => "cdrhtest.unl.edu", + "api_version" => "2.0" } current_dir = File.expand_path(File.dirname(__FILE__)) diff --git a/test/vra_to_es_test.rb b/test/vra_to_es_test.rb index 60b56f23a..1b815af5e 100644 --- a/test/vra_to_es_test.rb +++ b/test/vra_to_es_test.rb @@ -7,7 +7,7 @@ def setup def test_assemble_json json = @cody.assemble_json - assert_equal 42, json.length + assert_equal 58, json.length assert_equal "wfc.img.pho.69.236.82", json["identifier"] end @@ -22,8 +22,8 @@ def test_vra_to_es_fields assert_equal keywords, json["keywords"] people = [ - {id: nil, name: "Käsebier, Gertrude, 1852-1934", role: "photographer"}, - {id: nil, name: "Spotted Horse, Willie", role: nil} + {"id" => nil, "name" => "Käsebier, Gertrude, 1852-1934", "role" => "photographer"}, + {"id" => nil, "name" => "Spotted Horse, Willie", "role" => nil} ] assert_equal people, json["person"] end diff --git a/test/webs_to_es_test.rb b/test/webs_to_es_test.rb index 44d48d35e..2bd576bdf 100644 --- a/test/webs_to_es_test.rb +++ b/test/webs_to_es_test.rb @@ -7,7 +7,7 @@ def setup def test_assemble_json json = @test.assemble_json - assert_equal 42, json.length + assert_equal 58, json.length assert_equal "testing", json["identifier"] end