From 170261aaa9257a30b12708e6339d79bc1c061c43 Mon Sep 17 00:00:00 2001 From: Jane Sandberg Date: Tue, 26 Mar 2024 15:53:53 -0700 Subject: [PATCH] Add keyword configuration option to OCLC newly added --- .../oclc/lc_call_slips/keyword_field.rb | 44 +++++++++++++ app/models/oclc/lc_call_slips/record.rb | 8 ++- app/models/oclc/lc_call_slips/selector.rb | 4 ++ .../oclc/lc_call_slips/keyword_field_spec.rb | 64 ++++++++++++++++++ spec/models/oclc/lc_call_slips/record_spec.rb | 65 +++++++++++++++++++ 5 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 app/models/oclc/lc_call_slips/keyword_field.rb create mode 100644 spec/models/oclc/lc_call_slips/keyword_field_spec.rb diff --git a/app/models/oclc/lc_call_slips/keyword_field.rb b/app/models/oclc/lc_call_slips/keyword_field.rb new file mode 100644 index 00000000..008af0ad --- /dev/null +++ b/app/models/oclc/lc_call_slips/keyword_field.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true +module Oclc + module LcCallSlips + # This class is responsible for searching a given + # Marc::DataField for the keywords that a selector + # is interested in. + class KeywordField + def initialize(field:, keywords:) + @field = field + @keywords = keywords + end + + def match? + keyword_field? && field_contains_keywords? + end + + private + + attr_reader :field, :keywords + + def keyword_field? + field.is_a?(MARC::DataField) && field.tag.match?(/^[12578]/) + end + + def field_contains_keywords? + field.any? { |subfield| subfield_contains_keywords?(subfield) } + end + + def subfield_contains_keywords?(subfield) + words_in_subfield = subfield.value.split(' ') + words_in_subfield.any? { |found_word| word_is_keyword? found_word } + end + + def word_is_keyword?(word) + keywords.any? do |desired_keyword| + # Add ^ and $ to make sure that we match the whole world, + # then turn the * wildcard into .* + desired_keyword_as_regexp = Regexp.new('^' + desired_keyword.gsub('*', '.*') + '$', 'i') + word.match? desired_keyword_as_regexp + end + end + end + end +end diff --git a/app/models/oclc/lc_call_slips/record.rb b/app/models/oclc/lc_call_slips/record.rb index c4eb9b76..4fda24d3 100644 --- a/app/models/oclc/lc_call_slips/record.rb +++ b/app/models/oclc/lc_call_slips/record.rb @@ -14,7 +14,7 @@ def generally_relevant? end def relevant_to_selector?(selector:) - location_relevant_to_selector?(selector:) && (call_number_in_range_for_selector?(selector:) || subject_relevant_to_selector?(selector:)) + location_relevant_to_selector?(selector:) && (call_number_in_range_for_selector?(selector:) || subject_relevant_to_selector?(selector:) || keywords_relevant_to_selector?(selector:)) end def location_relevant_to_selector?(selector:) @@ -41,6 +41,12 @@ def class_relevant_to_selector?(selector:) false end + def keywords_relevant_to_selector?(selector:) + keywords = selector.keywords + return false if keywords.blank? + record.fields.any? { |field| KeywordField.new(field:, keywords:).match? } + end + def subject_relevant_to_selector?(selector:) return false unless selector.subjects diff --git a/app/models/oclc/lc_call_slips/selector.rb b/app/models/oclc/lc_call_slips/selector.rb index 694177e8..d058691b 100644 --- a/app/models/oclc/lc_call_slips/selector.rb +++ b/app/models/oclc/lc_call_slips/selector.rb @@ -24,6 +24,10 @@ def classes call_number_ranges.pluck(:class).uniq end + def keywords + @keywords ||= selector_config[selector_key][:keywords] + end + def subjects selector_config[selector_key][:subjects] end diff --git a/spec/models/oclc/lc_call_slips/keyword_field_spec.rb b/spec/models/oclc/lc_call_slips/keyword_field_spec.rb new file mode 100644 index 00000000..cd16d7c8 --- /dev/null +++ b/spec/models/oclc/lc_call_slips/keyword_field_spec.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true +require 'rails_helper' + +RSpec.describe Oclc::LcCallSlips::KeywordField do + shared_examples 'a match' do + it 'match? returns true' do + expect(described_class.new(field:, keywords:).match?).to eq(true) + end + end + shared_examples 'not a match' do + it 'match? returns false' do + expect(described_class.new(field:, keywords:).match?).to eq(false) + end + end + context 'when field is a control field' do + let(:field) { MARC::ControlField.new('001', 'SCSB-10482146') } + let(:keywords) { ['cantaloup*'] } + it_behaves_like 'not a match' + end + + context 'when field is a 245' do + let(:field) do + MARC::DataField.new('245', '0', '0', + MARC::Subfield.new('a', 'Cantaloups')) + end + let(:keywords) { ['cantaloup*'] } + it_behaves_like 'a match' + end + + context 'when field is a 246' do + let(:field) do + MARC::DataField.new('246', '0', '0', + MARC::Subfield.new('a', 'Cantaloupe culture')) + end + let(:keywords) { ['cantaloup*'] } + it_behaves_like 'a match' + end + + context 'when field is a 260' do + let(:field) do + MARC::DataField.new('260', '0', '0', + MARC::Subfield.new('b', 'The Rocky Ford Cantaloupe Seed Breeders\' Association')) + end + let(:keywords) { ['cantaloup*'] } + it_behaves_like 'a match' + context 'when keyword includes multiple wildcards' do + let(:keywords) { ['*nt*loup*'] } + it_behaves_like 'a match' + end + context 'when the first matching keyword comes late in the array' do + let(:keywords) { ['sediment', 'metamorphosis', 'geolog*', 'igneous', 'rock*', 'sandstone'] } + it_behaves_like 'a match' + end + end + + context 'when field is a 300' do + let(:field) do + MARC::DataField.new('300', '0', '0', + MARC::Subfield.new('a', '1 online resource (2 pages), digital, PDF file')) + end + let(:keywords) { ['online'] } + it_behaves_like 'not a match' + end +end diff --git a/spec/models/oclc/lc_call_slips/record_spec.rb b/spec/models/oclc/lc_call_slips/record_spec.rb index aa4b8c17..afa67427 100644 --- a/spec/models/oclc/lc_call_slips/record_spec.rb +++ b/spec/models/oclc/lc_call_slips/record_spec.rb @@ -407,4 +407,69 @@ expect(oclc_record.generally_relevant?).to eq(true) end end + + describe 'keywords' do + let(:marc_record) { MARC::Record.new_from_hash('fields' => fields) } + let(:fields) do + [ + { '008' => '120627s2024 gerabg ob 001 0 gre d' }, + { '245' => { "ind1" => "1", + "ind2" => "0", + 'subfields' => [ + { 'a' => 'Chinese homestyle : ', + 'b' => 'everyday plant-based recipes for takeout, dim sum, noodles, and more / ', + 'c' => 'Maggie Zhu.' } + ] } } + ] + end + context 'with a selector with a keyword' do + let(:selector_config) do + { heijdra: { + keywords: ['chinese'] + } } + end + + it 'recognizes that it is relevant' do + expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(true) + end + end + context 'with a selector without any keywords' do + let(:selector_config) do + { hatfield: { + classes: [{ class: 'PN', low_num: 6755, high_num: 6758 }] + } } + end + it 'does not mark a record as relevant based on its subjects' do + expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(false) + expect(oclc_record.relevant_to_selector?(selector:)).to eq(false) + end + end + context 'with wildcard in the config file keywords' do + let(:selector_config) do + { heijdra: { + keywords: ['chin*'] + } } + end + + it 'matches using the wildcard' do + expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(true) + end + + context 'keyword in record contains configured keyword, but config does not have wildcard at beginning and end' do + let(:fields) do + [ + { '008' => '120627s2024 gerabg ob 001 0 gre d' }, + { '245' => { "ind1" => "1", + "ind2" => "0", + 'subfields' => [ + { 'a' => 'Machine learning : ' } + ] } } + ] + end + it 'does not mark a record as relevant based on its subjects' do + expect(oclc_record.keywords_relevant_to_selector?(selector:)).to eq(false) + end + end + end + end end