Skip to content

Commit

Permalink
Merge pull request #733 from pulibrary/i678-keywords
Browse files Browse the repository at this point in the history
Add keyword configuration option to OCLC newly added
  • Loading branch information
christinach authored Mar 28, 2024
2 parents 10a9921 + 170261a commit dd6db49
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 1 deletion.
44 changes: 44 additions & 0 deletions app/models/oclc/lc_call_slips/keyword_field.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# frozen_string_literal: true
module Oclc
module LcCallSlips
# This class is responsible for searching a given
# Marc::DataField for the keywords that a selector
# is interested in.
class KeywordField
def initialize(field:, keywords:)
@field = field
@keywords = keywords
end

def match?
keyword_field? && field_contains_keywords?
end

private

attr_reader :field, :keywords

def keyword_field?
field.is_a?(MARC::DataField) && field.tag.match?(/^[12578]/)
end

def field_contains_keywords?
field.any? { |subfield| subfield_contains_keywords?(subfield) }
end

def subfield_contains_keywords?(subfield)
words_in_subfield = subfield.value.split(' ')
words_in_subfield.any? { |found_word| word_is_keyword? found_word }
end

def word_is_keyword?(word)
keywords.any? do |desired_keyword|
# Add ^ and $ to make sure that we match the whole world,
# then turn the * wildcard into .*
desired_keyword_as_regexp = Regexp.new('^' + desired_keyword.gsub('*', '.*') + '$', 'i')
word.match? desired_keyword_as_regexp
end
end
end
end
end
8 changes: 7 additions & 1 deletion app/models/oclc/lc_call_slips/record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def generally_relevant?
end

def relevant_to_selector?(selector:)
location_relevant_to_selector?(selector:) && (call_number_in_range_for_selector?(selector:) || subject_relevant_to_selector?(selector:))
location_relevant_to_selector?(selector:) && (call_number_in_range_for_selector?(selector:) || subject_relevant_to_selector?(selector:) || keywords_relevant_to_selector?(selector:))
end

def location_relevant_to_selector?(selector:)
Expand All @@ -41,6 +41,12 @@ def class_relevant_to_selector?(selector:)
false
end

def keywords_relevant_to_selector?(selector:)
keywords = selector.keywords
return false if keywords.blank?
record.fields.any? { |field| KeywordField.new(field:, keywords:).match? }
end

def subject_relevant_to_selector?(selector:)
return false unless selector.subjects

Expand Down
4 changes: 4 additions & 0 deletions app/models/oclc/lc_call_slips/selector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def classes
call_number_ranges.pluck(:class).uniq
end

def keywords
@keywords ||= selector_config[selector_key][:keywords]
end

def subjects
selector_config[selector_key][:subjects]
end
Expand Down
64 changes: 64 additions & 0 deletions spec/models/oclc/lc_call_slips/keyword_field_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# frozen_string_literal: true
require 'rails_helper'

RSpec.describe Oclc::LcCallSlips::KeywordField do
shared_examples 'a match' do
it 'match? returns true' do
expect(described_class.new(field:, keywords:).match?).to eq(true)
end
end
shared_examples 'not a match' do
it 'match? returns false' do
expect(described_class.new(field:, keywords:).match?).to eq(false)
end
end
context 'when field is a control field' do
let(:field) { MARC::ControlField.new('001', 'SCSB-10482146') }
let(:keywords) { ['cantaloup*'] }
it_behaves_like 'not a match'
end

context 'when field is a 245' do
let(:field) do
MARC::DataField.new('245', '0', '0',
MARC::Subfield.new('a', 'Cantaloups'))
end
let(:keywords) { ['cantaloup*'] }
it_behaves_like 'a match'
end

context 'when field is a 246' do
let(:field) do
MARC::DataField.new('246', '0', '0',
MARC::Subfield.new('a', 'Cantaloupe culture'))
end
let(:keywords) { ['cantaloup*'] }
it_behaves_like 'a match'
end

context 'when field is a 260' do
let(:field) do
MARC::DataField.new('260', '0', '0',
MARC::Subfield.new('b', 'The Rocky Ford Cantaloupe Seed Breeders\' Association'))
end
let(:keywords) { ['cantaloup*'] }
it_behaves_like 'a match'
context 'when keyword includes multiple wildcards' do
let(:keywords) { ['*nt*loup*'] }
it_behaves_like 'a match'
end
context 'when the first matching keyword comes late in the array' do
let(:keywords) { ['sediment', 'metamorphosis', 'geolog*', 'igneous', 'rock*', 'sandstone'] }
it_behaves_like 'a match'
end
end

context 'when field is a 300' do
let(:field) do
MARC::DataField.new('300', '0', '0',
MARC::Subfield.new('a', '1 online resource (2 pages), digital, PDF file'))
end
let(:keywords) { ['online'] }
it_behaves_like 'not a match'
end
end
65 changes: 65 additions & 0 deletions spec/models/oclc/lc_call_slips/record_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,69 @@
expect(oclc_record.generally_relevant?).to eq(true)
end
end

describe 'keywords' do
let(:marc_record) { MARC::Record.new_from_hash('fields' => fields) }
let(:fields) do
[
{ '008' => '120627s2024 gerabg ob 001 0 gre d' },
{ '245' => { "ind1" => "1",
"ind2" => "0",
'subfields' => [
{ 'a' => 'Chinese homestyle : ',
'b' => 'everyday plant-based recipes for takeout, dim sum, noodles, and more / ',
'c' => 'Maggie Zhu.' }
] } }
]
end
context 'with a selector with a keyword' do
let(:selector_config) do
{ heijdra: {
keywords: ['chinese']
} }
end

it 'recognizes that it is relevant' do
expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(true)
end
end
context 'with a selector without any keywords' do
let(:selector_config) do
{ hatfield: {
classes: [{ class: 'PN', low_num: 6755, high_num: 6758 }]
} }
end
it 'does not mark a record as relevant based on its subjects' do
expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(false)
expect(oclc_record.relevant_to_selector?(selector:)).to eq(false)
end
end
context 'with wildcard in the config file keywords' do
let(:selector_config) do
{ heijdra: {
keywords: ['chin*']
} }
end

it 'matches using the wildcard' do
expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(true)
end

context 'keyword in record contains configured keyword, but config does not have wildcard at beginning and end' do
let(:fields) do
[
{ '008' => '120627s2024 gerabg ob 001 0 gre d' },
{ '245' => { "ind1" => "1",
"ind2" => "0",
'subfields' => [
{ 'a' => 'Machine learning : ' }
] } }
]
end
it 'does not mark a record as relevant based on its subjects' do
expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(false)
end
end
end
end
end

0 comments on commit dd6db49

Please sign in to comment.