Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix test failures #234

Open
wants to merge 5 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 1 addition & 38 deletions lib/datura/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,44 +26,7 @@ def self.date_display(date, nd_text="N.D.")
def self.date_standardize(date, before=true)
if date
y, m, d = date.split(/-|\//)
if y && y.length == 4
# use -1 to indicate that this will be the last possible
m_default = before ? "01" : "-1"
d_default = before ? "01" : "-1"
m = m_default if !m
d = d_default if !d
if Date.valid_date?(y.to_i, m.to_i, d.to_i)
date = Date.new(y.to_i, m.to_i, d.to_i)
date.strftime("%Y-%m-%d")
end
end
end
end

# get_directory_files
# Note: do not end with /
# params: directory (string)
# returns: returns array of all files found ([] if none),
# returns nil if no directory by that name exists
def self.date_display(date, nd_text="N.D.")
date_hyphen = self.date_standardize(date)
if date_hyphen
y, m, d = date_hyphen.split("-").map { |s| s.to_i }
date_obj = Date.new(y, m, d)
date_obj.strftime("%B %-d, %Y")
else
nd_text
end
end

# date_standardize
# automatically defaults to setting incomplete dates to the earliest
# date (2016-07 becomes 2016-07-01) but pass in "false" in order
# to set it to the latest available date
def self.date_standardize(date, before=true)
if date
y, m, d = date.split(/-|\//)
if y && y.length == 4
if y && y.length == 4 && y.to_i.to_s == y
# use -1 to indicate that this will be the last possible
m_default = before ? "01" : "-1"
d_default = before ? "01" : "-1"
Expand Down
13 changes: 0 additions & 13 deletions lib/datura/to_es/ead_to_es/fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ def date_not_before
date(true)
end

def date_updated
end

def description
get_text(@xpaths["description"])
end
Expand Down Expand Up @@ -309,16 +306,6 @@ def citation
# nested
end

def container_box
end

def container_folder
end

def abstract
get_text(@xpaths["abstract"])
end

def keywords2
get_text(@xpaths["keywords2"])
end
Expand Down
38 changes: 19 additions & 19 deletions lib/datura/to_es/ead_to_es/xpaths.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@ class EadToEs < XmlToEs
# the specific collection's TeiToEs file or create a new method
# in that file which returns a different value
def xpaths_list
{
"abstract" => "/ead/archdesc/did/abstract",
"creator" => ["/ead/archdesc/did/origination/persname", "/ead/eadheader/filedesc/titlestmt/creator"],
"date" => "/ead/eadheader/filedesc/publicationstmt/date",
"description" => "/ead/archdesc/scopecontent/p",
"formats" => "/ead/archdesc/did/physdesc/genreform",
"identifier" => "/ead/archdesc/did/unitid",
"language" => "/ead/eadheader/profiledesc/langusage/language",
"publisher" => "/ead/eadheader/filedesc/publicationstmt/publisher",
"repository_contact" => "/ead/archdesc/did/repository/address/*",
"rights" => "/ead/archdesc/descgrp/accessrestrict/p",
"rights_holder" => "/ead/archdesc/did/repository/corpname",
"source" => "/ead/archdesc/descgrp/prefercite/p",
"subjects" => "/ead/archdesc/controlaccess/*[not(name()='head')]",
"title" => "/ead/archdesc/did/unittitle",
"text" => "/ead/eadheader/filedesc/titlestmt/*",
"items" => "//*[@level='item']/did/unitid"
}.merge(override_xpaths)
end
{
"abstract" => "/ead/archdesc/did/abstract",
"creator" => ["/ead/archdesc/did/origination/persname", "/ead/eadheader/filedesc/titlestmt/creator"],
"date" => "/ead/eadheader/filedesc/publicationstmt/date",
"description" => "/ead/archdesc/scopecontent/p",
"formats" => "/ead/archdesc/did/physdesc/genreform",
"identifier" => "/ead/archdesc/did/unitid",
"language" => "/ead/eadheader/profiledesc/langusage/language",
"publisher" => "/ead/eadheader/filedesc/publicationstmt/publisher",
"repository_contact" => "/ead/archdesc/did/repository/address/*",
"rights" => "/ead/archdesc/descgrp/accessrestrict/p",
"rights_holder" => "/ead/archdesc/did/repository/corpname",
"source" => "/ead/archdesc/descgrp/prefercite/p",
"subjects" => "/ead/archdesc/controlaccess/*[not(name()='head')]",
"title" => "/ead/archdesc/did/unittitle",
"text" => "/ead/eadheader/filedesc/titlestmt/*",
"items" => "//*[@level='item']/did/unitid"
}.merge(override_xpaths)
end
end
12 changes: 9 additions & 3 deletions lib/datura/to_es/html_to_es/fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def data_type
end

def date(before=true)
datestr = get_list(@xpaths["date"]).first
if get_list(@xpaths["date"])
datestr = get_list(@xpaths["date"]).first
end
if datestr
Datura::Helpers.date_standardize(datestr, true)
end
Expand Down Expand Up @@ -80,7 +82,9 @@ def format
end

def image_id
get_list(@xpaths["image_id"]).first
if get_list(@xpaths["image_id"])
get_list(@xpaths["image_id"]).first
end
end

def keywords
Expand Down Expand Up @@ -223,7 +227,9 @@ def works
# new/moved fields for API 2.0

def cover_image
get_list(@xpaths["image_id"]).first
if @xpaths["image_id"]
get_list(@xpaths["image_id"]).first
end
end

def date_updated
Expand Down
9 changes: 5 additions & 4 deletions lib/datura/to_es/tei_to_es/fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def creator
creators = get_list(@xpaths["creator"])
if creators
creators.map { |c| { "name" => Datura::Helpers.normalize_space(c) } }
else
[]
end
end

Expand Down Expand Up @@ -359,12 +361,11 @@ def event
# nested
end

def rdf
# nested
end

def has_source
# nested
{
"title" => source
}
end

def has_relation
Expand Down
16 changes: 9 additions & 7 deletions lib/datura/to_es/vra_to_es/fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ def data_type
end

def date(before=true)
datestr = get_list(@xpaths["date"]).first
if get_list(@xpaths["date"])
datestr = get_list(@xpaths["date"]).first
end
if datestr
Datura::Helpers.date_standardize(datestr, before)
end
Expand Down Expand Up @@ -215,7 +217,9 @@ def title
end

def title_sort
Datura::Helpers.normalize_name(title)
if title
Datura::Helpers.normalize_name(title)
end
end

def topics
Expand Down Expand Up @@ -265,7 +269,9 @@ def works
# new/moved fields for API 2.0

def cover_image
get_list(@xpaths["image_id"]).first
if get_list(@xpaths["image_id"])
get_list(@xpaths["image_id"]).first
end
end

def date_updated
Expand Down Expand Up @@ -345,10 +351,6 @@ def next_item
def event
# nested
end

def rdf
# nested
end

def has_source
# nested
Expand Down
4 changes: 0 additions & 4 deletions lib/datura/to_es/webs_to_es/fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,6 @@ def next_item
def event
# nested
end

def rdf
# nested
end

def has_source
# nested
Expand Down
12 changes: 7 additions & 5 deletions test/csv_to_es_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,28 @@ class CsvToEsTest < Minitest::Test

def setup
path = File.join($fixture_path, "csv", "testing.csv")
csv = CSV.read(path, headers: true)

csv = CSV.read(path, **{
encoding: "utf-8",
headers: true,
})
$options["api_version"] = "2.0"
@test1 = CsvToEs.new(csv[0], $options, csv)
@test2 = CsvToEs.new(csv[1], $options, csv)
end

def test_assemble_json
json = @test1.assemble_json
assert_equal 42, json.length
assert_equal 58, json.length
assert_equal "test.001", json["identifier"]

json = @test2.assemble_json
assert_equal 42, json.length
assert_equal 58, json.length
assert_equal "test.002", json["identifier"]
end

def test_csv_to_es_fields
json = @test1.assemble_json
assert_equal "1887-01-01", json["date"]

contributors = [{"name"=>"Jessica Dussault"}, {"name"=>"Greg Tunink"}, {"name"=>"Karin Dalziel"}]
assert_equal contributors, json["contributor"]

Expand Down
15 changes: 7 additions & 8 deletions test/es_index_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@ class Datura::ElasticsearchIndexTest < Minitest::Test

# stub in get_schema so that we can test get_schema_mapping without
# worrying about integration with actual index

class Datura::Elasticsearch::Index
def get_schema
raw = File.read(
File.join(
File.expand_path(File.dirname(__FILE__)),
"fixtures/es_mapping_2.0.json"
raw = File.read(
File.join(
File.expand_path(File.dirname(__FILE__)),
"fixtures/es_mapping_2.0.json"
)
)
JSON.parse(raw)
Expand All @@ -44,9 +43,9 @@ def test_get_schema_mapping
es = Datura::Elasticsearch::Index.new(@@options)
es.get_schema_mapping
assert es.schema_mapping["fields"]
assert_equal 46, es.schema_mapping["fields"].length
assert_equal 60, es.schema_mapping["fields"].length
assert_equal(
/^.*_d$|^.*_i$|^.*_k$|^.*_n$|^.*_t$|^.*_t_en$|^.*_t_es$/,
/^(?:.*_d|.*_i|.*_k|.*_n|.*_t|.*_t_en|.*_t_es)$/,
es.schema_mapping["dynamic"]
)
end
Expand Down Expand Up @@ -76,7 +75,7 @@ def test_valid_document?
assert es.valid_document?({
"creator" => [
{
"subcategory" => "a",
"category2" => "a",
"data_type" => "a",
"keyword_k" => "a"
}
Expand Down
Loading