Skip to content

Commit

Permalink
Merge pull request #4924 from sul-dlss/t4915-update-reports-2
Browse files Browse the repository at this point in the history
Update more reports to use RepositoryObject and RepositoryObjectVersion
  • Loading branch information
jcoyne authored Apr 24, 2024
2 parents 4515e2b + 3bde485 commit 5313b70
Show file tree
Hide file tree
Showing 15 changed files with 133 additions and 104 deletions.
16 changes: 9 additions & 7 deletions app/reports/invalid_edtf_dates.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ class InvalidEdtfDates
DATE_VALUE_JSON_PATH = JsonPath.new('$..value').freeze

SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query_array(description, '#{DATE_JSONB_PATH}') ->> 0 as dates,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(description, '#{DATE_JSONB_PATH}')
SELECT jsonb_path_query_array(rov.description, '#{DATE_JSONB_PATH}') ->> 0 as dates,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{DATE_JSONB_PATH}')
SQL

def self.report
Expand All @@ -48,7 +50,7 @@ def self.rows(sql)
next if invalid_values.blank?

collection_druid = rows.first['collection_id']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.label

[
id,
Expand Down
4 changes: 2 additions & 2 deletions app/reports/invalid_edtf_structured_dates.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class InvalidEdtfStructuredDates
def self.report
puts "item_druid,catalogRecordId,collection_druid,invalid_values,reason\n"

Dro.where("jsonb_path_exists(description, '$.**.date.**.encoding.code ? (@ == \"edtf\")')").find_each do |dro|
RepositoryObject.dros.where(head_version: "jsonb_path_exists(description, '$.**.date.**.encoding.code ? (@ == \"edtf\")')").find_each do |dro|
new(dro:).report
end
end
Expand Down Expand Up @@ -110,6 +110,6 @@ def collection_id
end

def catalog_record_id
dro.identification['catalogLinks'].find { |link| link['catalog'] == 'folio' }&.fetch('catalogRecordId')
dro.head_version.identification['catalogLinks'].find { |link| link['catalog'] == 'folio' }&.fetch('catalogRecordId')
end
end
16 changes: 9 additions & 7 deletions app/reports/invalid_form_source_codes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ class InvalidFormSourceCodes
].freeze
REGEX = "^(?!#{VALID_CODES.map { |code| "#{code}$" }.join('|')})".freeze
SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query(description, '#{JSON_PATH} ? (@ like_regex "#{REGEX}")') ->> 0 as value,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(description, '#{JSON_PATH} ? (@ like_regex "#{REGEX}")')
SELECT jsonb_path_query(rov.description, '#{JSON_PATH} ? (@ like_regex "#{REGEX}")') ->> 0 as value,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{JSON_PATH} ? (@ like_regex "#{REGEX}")')
SQL

def self.report
Expand All @@ -45,7 +47,7 @@ def self.rows(sql)
.group_by { |row| row['external_identifier'] }
.map do |id, rows|
collection_druid = rows.first['collection_id']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.label

[
id,
Expand Down
18 changes: 10 additions & 8 deletions app/reports/invalid_form_uris.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
class InvalidFormUris
JSON_PATH = '$.**.form.uri'
SQL = <<~SQL.squish.freeze
SELECT (jsonb_path_query_array(description, '#{JSON_PATH} ? (@ like_regex "^.*\.html$")') ||
jsonb_path_query_array(description, '#{JSON_PATH} ? (@ like_regex "^(?!https?://).*$")')) ->> 0 as value,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
(jsonb_path_exists(description, '#{JSON_PATH} ? (@ like_regex "^(?!https?://).*$")') OR
jsonb_path_exists(description, '#{JSON_PATH} ? (@ like_regex "^.*\.html$")'))
SELECT (jsonb_path_query_array(rov.description, '#{JSON_PATH} ? (@ like_regex "^.*\.html$")') ||
jsonb_path_query_array(rov.description, '#{JSON_PATH} ? (@ like_regex "^(?!https?://).*$")')) ->> 0 as value,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND (jsonb_path_exists(rov.description, '#{JSON_PATH} ? (@ like_regex "^(?!https?://).*$")') OR
jsonb_path_exists(rov.description, '#{JSON_PATH} ? (@ like_regex "^.*\.html$")'))
SQL

def self.report
Expand Down
16 changes: 9 additions & 7 deletions app/reports/invalid_iso8601_dates.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class InvalidIso8601Dates
DATE_VALUE_JSON_PATH = JsonPath.new('$..value').freeze

SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query_array(description, '#{DATE_JSONB_PATH}') ->> 0 as dates,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(description, '#{DATE_JSONB_PATH}')
SELECT jsonb_path_query_array(rov.description, '#{DATE_JSONB_PATH}') ->> 0 as dates,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{DATE_JSONB_PATH}')
SQL

def self.report
Expand All @@ -45,7 +47,7 @@ def self.rows(sql)
next if invalid_values.blank?

collection_druid = rows.first['collection_id']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.label

[
id,
Expand Down
28 changes: 15 additions & 13 deletions app/reports/invalid_language_uris.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@ class InvalidLanguageUris
JSON_PATH2 = '$.**.valueLanguage.**.uri' # both valueLanguage.uri and valueLanguage.valueScript.uri

SQL = <<~SQL.squish.freeze
SELECT (jsonb_path_query_array(description, '#{JSON_PATH1} ? (@ like_regex "^.*\.html$")') ||
jsonb_path_query_array(description, '#{JSON_PATH1} ? (@ like_regex "^(?!https?://).*$")') ||
jsonb_path_query_array(description, '#{JSON_PATH2} ? (@ like_regex "^.*\.html$")') ||
jsonb_path_query_array(description, '#{JSON_PATH2} ? (@ like_regex "^(?!https?://).*$")')) ->> 0 as contrib,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
(jsonb_path_exists(description, '#{JSON_PATH1} ? (@ like_regex "^(?!https?://).*$")') OR
jsonb_path_exists(description, '#{JSON_PATH1} ? (@ like_regex "^.*\.html$")') OR
jsonb_path_exists(description, '#{JSON_PATH2} ? (@ like_regex "^(?!https?://).*$")') OR
jsonb_path_exists(description, '#{JSON_PATH2} ? (@ like_regex "^.*\.html$")')
)
SELECT (jsonb_path_query_array(rov.description, '#{JSON_PATH1} ? (@ like_regex "^.*\.html$")') ||
jsonb_path_query_array(rov.description, '#{JSON_PATH1} ? (@ like_regex "^(?!https?://).*$")') ||
jsonb_path_query_array(rov.description, '#{JSON_PATH2} ? (@ like_regex "^.*\.html$")') ||
jsonb_path_query_array(rov.description, '#{JSON_PATH2} ? (@ like_regex "^(?!https?://).*$")')) ->> 0 as contrib,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND ((jsonb_path_exists(rov.description, '#{JSON_PATH1} ? (@ like_regex "^(?!https?://).*$")') OR
jsonb_path_exists(rov.description, '#{JSON_PATH1} ? (@ like_regex "^.*\.html$")') OR
jsonb_path_exists(rov.description, '#{JSON_PATH2} ? (@ like_regex "^(?!https?://).*$")') OR
jsonb_path_exists(rov.description, '#{JSON_PATH2} ? (@ like_regex "^.*\.html$")')
))
SQL

def self.report
Expand Down
15 changes: 9 additions & 6 deletions app/reports/invalid_location_uris.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@ class InvalidLocationUris
].freeze
REGEX = "\"^https?://(?!#{URL_PATTERNS_IGNORED.join('|')}).*$\"".freeze
SQL = <<~SQL.squish.freeze
SELECT dros.external_identifier,
SELECT ro.external_identifier,
event_location #>> '{0,value}' as value,
event_location #>> '{0,uri}' as uri,
event_location #>> '{0,code}' as code,
jsonb_path_query(dros.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(dros.structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros",
jsonb_path_query_array(dros.description, '#{JSON_PATH} ? (@.uri like_regex #{REGEX})') event_location
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro,
repository_object_versions AS rov,
jsonb_path_query_array(rov.description, '#{JSON_PATH} ? (@.uri like_regex #{REGEX})') AS event_location
WHERE
jsonb_path_exists(description, '#{JSON_PATH}.uri ? (@ like_regex #{REGEX})')
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
ANd jsonb_path_exists(rov.description, '#{JSON_PATH}.uri ? (@ like_regex #{REGEX})')
SQL

def self.report
Expand Down
14 changes: 8 additions & 6 deletions app/reports/invalid_name_uris.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ class InvalidNameUris
REGEX = "\"^https?://(?!#{URL_PATTERNS_IGNORED.join('|')}).*$\"".freeze

SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query_array(description, '#{JSON_PATH} ? (@ like_regex #{REGEX})') as contrib,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(description, '#{JSON_PATH} ? (@ like_regex #{REGEX})')
SELECT jsonb_path_query_array(rov.description, '#{JSON_PATH} ? (@ like_regex #{REGEX})') as contrib,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{JSON_PATH} ? (@ like_regex #{REGEX})')
SQL

def self.report
Expand Down
14 changes: 8 additions & 6 deletions app/reports/invalid_role_uris.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ class InvalidRoleUris
REGEX = "\"^https?://(?!#{URL_PATTERNS.join('|')}).*$\"".freeze

SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query_array(description, '#{JSON_PATH} ? (@ like_regex #{REGEX})') as value,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(description, '#{JSON_PATH} ? (@ like_regex #{REGEX})')
SELECT jsonb_path_query_array(rov.description, '#{JSON_PATH} ? (@ like_regex #{REGEX})') as value,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{JSON_PATH} ? (@ like_regex #{REGEX})')
SQL

def self.report
Expand Down
16 changes: 9 additions & 7 deletions app/reports/invalid_subject_non_source_uris.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ class InvalidSubjectNonSourceUris
# HT: https://stackoverflow.com/a/3809435
REGEX = 'https?://(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,255}\.[a-zA-Z0-9()]{1,6}([-a-zA-Z0-9()@:%_\+.~#?&//=]*)'
SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query(description, '#{JSON_PATH} ? (!(@.**.uri like_regex "#{REGEX}")).**.uri') ->> 0 as invalid_values,
external_identifier,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(description, '#{JSON_PATH} ? (!(@.**.uri like_regex "#{REGEX}")).**.uri')
SELECT jsonb_path_query(rov.description, '#{JSON_PATH} ? (!(@.**.uri like_regex "#{REGEX}")).**.uri') ->> 0 as invalid_values,
ro.external_identifier,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{JSON_PATH} ? (!(@.**.uri like_regex "#{REGEX}")).**.uri')
SQL

def self.report
Expand All @@ -38,7 +40,7 @@ def self.rows(sql)
.group_by { |row| row['external_identifier'] }
.map do |id, rows|
collection_druid = rows.first['collection_id']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.label

[
id,
Expand Down
14 changes: 8 additions & 6 deletions app/reports/property_existence_dros.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ class PropertyExistenceDros
JSON_PATH = "strict $.**.#{PROPERTY} ? (@.size() > 0)".freeze # when property is array

SQL = <<~SQL.squish.freeze
SELECT external_identifier as item_druid,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(dros.description, '#{JSON_PATH}')
SELECT ro.external_identifier as item_druid,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_path_exists(rov.description, '#{JSON_PATH}')
SQL

def self.report
Expand All @@ -35,7 +37,7 @@ def self.rows(sql_query)

sql_result_rows.map do |row|
collection_druid = row['collection_id']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.head_version&.label

[
row['item_druid'],
Expand Down
20 changes: 11 additions & 9 deletions app/reports/property_multiple_events.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,16 @@ class PropertyMultipleEvents
PURL_JSONB_PATH = 'strict $.purl'

SQL = <<~SQL.squish.freeze
SELECT jsonb_path_query(description, '#{PURL_JSONB_PATH}') ->> 0 as purl,
external_identifier,
label,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id,
jsonb_path_query(administrative, '$.hasAdminPolicy') ->> 0 as apo
FROM "dros" WHERE
jsonb_array_length(jsonb_path_query_array(description, '#{EVENT_JSONB_PATH}')) > 1
AND NOT jsonb_path_exists(dros.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId')
SELECT jsonb_path_query(rov.description, '#{PURL_JSONB_PATH}') ->> 0 as purl,
ro.external_identifier,
rov.label,
jsonb_path_query(rov.structural, '$.isMemberOf') ->> 0 as collection_id,
jsonb_path_query(rov.administrative, '$.hasAdminPolicy') ->> 0 as apo
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'dro'
AND jsonb_array_length(jsonb_path_query_array(rov.description, '#{EVENT_JSONB_PATH}')) > 1
AND NOT jsonb_path_exists(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId')
SQL

def self.report
Expand All @@ -34,7 +36,7 @@ def self.result_rows(sql)
.group_by { |row| row['external_identifier'] }
.map do |_id, rows|
collection_druid = rows.first['collection_id']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.head_version&.label

[
rows.first['purl'],
Expand Down
12 changes: 7 additions & 5 deletions app/reports/property_nesting_level_collections.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ class PropertyNestingLevelCollections
# https://www.postgresql.org/docs/14/functions-json.html#STRICT-AND-LAX-MODES
JSON_PATH = "strict $.**{#{MIN_NESTING_LEVEL} TO LAST}.#{PROPERTY}".freeze
SQL = <<~SQL.squish.freeze
SELECT external_identifier as collection_druid,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId
FROM "collections" WHERE
jsonb_path_exists(collections.description, '#{JSON_PATH}')
SELECT ro.external_identifier as collection_druid,
jsonb_path_query(rov.identification, '$.catalogLinks[*] ? (@.catalog == "folio").catalogRecordId') ->> 0 as catalogRecordId
FROM repository_objects AS ro, repository_object_versions AS rov WHERE
ro.head_version_id = rov.id
AND ro.object_type = 'collection'
AND jsonb_path_exists(rov.description, '#{JSON_PATH}')
SQL

def self.report
Expand All @@ -28,7 +30,7 @@ def self.rows(sql_query)

sql_result_rows.map do |row|
collection_druid = row['collection_druid']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label
collection_name = RepositoryObject.collections.find_by(external_identifier: collection_druid)&.label

[
collection_druid,
Expand Down
Loading

0 comments on commit 5313b70

Please sign in to comment.