From b39cdda157cd4a2698afe74d4df07ada42808f52 Mon Sep 17 00:00:00 2001 From: Charlie Morris <523381+cdmo@users.noreply.github.com> Date: Mon, 28 Sep 2020 13:11:54 -0400 Subject: [PATCH] Adds scan_sim and sorts the psulib_config_spec.rb tests (#247) --- lib/traject/psulib_config.rb | 3 + ...pecial_collections_accessioning_number.mrc | 1 + spec/lib/traject/psulib_config_spec.rb | 70 +++++++++++-------- 3 files changed, 43 insertions(+), 31 deletions(-) create mode 100644 spec/fixtures/special_collections_accessioning_number.mrc diff --git a/lib/traject/psulib_config.rb b/lib/traject/psulib_config.rb index 8c8c7f27..cbabfe20 100644 --- a/lib/traject/psulib_config.rb +++ b/lib/traject/psulib_config.rb @@ -109,6 +109,9 @@ # Library of Congress number to_field 'lccn_ssim', extract_marc('010a'), trim_punctuation +# Special Collections Accessioning Number ("scan") +to_field 'scan_sim', extract_marc('099a') + # Title fields # ## Title Search Fields diff --git a/spec/fixtures/special_collections_accessioning_number.mrc b/spec/fixtures/special_collections_accessioning_number.mrc new file mode 100644 index 00000000..cee7fc7b --- /dev/null +++ b/spec/fixtures/special_collections_accessioning_number.mrc @@ -0,0 +1 @@ +05133cpc a2200421Ii 450000100090000000300060000900500170001500800410003204000290007303500220010210000410012424500300016526400150019530000400021033600210025033700250027133800220029633800230031850601410034152010540048252401190153654005800165554101130223554517230234860000420407165000390411365500360415265500340418865500220422271000430424485600640428709900100435154100190436154101010438099300520448199300760453394901020460926782112SIRSI20200910131154.0200910i19601995pau eng d aUPMbengedacserdacUPM a(OCoLC)11946325711 aOates, Joyce Carol,d1938-ecreator.10aJoyce Carol Oates papers. 0c1960-1995. a0.5flinear feet (1 Hollinger Box ) atext2rdacontent aunmediated2rdamedia asheet2rdacontent avolume2rdacarrier aFor information on accessing this collection please contact the Special Collections Research Services Unit at ul-spcolref@lists.psu.edu.3 aJoyce Carol Oates (born June 16, 1938) is an American writer born in Lockport, New York. Oates published her first book in 1962 and has since published over 58 novels, as well as a number of plays and novellas, and many volumes of short stories, poetry, and nonfiction. She has won many awards for her writing, including the National Book Award, for her novel them (1969), two O. Henry Awards, the National Humanities Medal and the Jerusalem Prize (2019). In addition to writing and publishing her work in prose, poetry, and translation, Oates operated a small press and published the literary magazine The Ontario Review alongside her husband and author Raymond J. Smith (1930-2008) from 1974-2008.Oates currently serves as the Roger S. Berlind '52 Professor Emerita in the Humanities with the Program in Creative Writing at Princeton University. While officially retired, Oates continues to teach creative writing, write, and publish her work. This collection contains correspondence, typescripts, and publications dated between 1968 and 1995. aJoyce Carol Oates papers, #09981, Special Collections Library, Pennsylvania State University, University Park, PA. aPhotocopies of original materials may be made available for research purposes at the discretion of the Eberly Family Special Collections Library. Photocopies or reproductions of original materials may be subject to fees as outlined by the Pennsylvania State University Libraries reproduction policies. Copyright is retained by the creators of items in these papers, or their descendants, as stipulated by United States copyright law. Patrons seeking advice on the availability of unpublished materials for publication should consult relevant copyright law and laws of libel.1 aThe Pennsylvania State University Libraries purchased this material from Lord Durham Rare Books in May 2019. aJoyce Carol Oates (born June 16, 1938) is an American writer born in Lockport, New York. Oates published her first book in 1962 and has since published over 58 novels, as well as a number of plays and novellas, and many volumes of short stories, poetry, and nonfiction. She has won many awards for her writing, including the National Book Award, for her novel them (1969), two O. Henry Awards, the National Humanities Medal and the Jerusalem Prize (2019). Her novels Black Water (1992), What I Lived For (1994), Blonde (2000), and short story collections, The Wheel of Love (1970) and Lovely, Dark, Deep: Stories (2014) were each finalists for the Pulitzer Prize. Oates is known to be one of the most prolific contemporary American writers, and her work has appeared under the pseudonyms Rosamind Smith and Lauren Kelley. At the time of writing, Oates' most recent publication is The Surviving Child , forthcoming in 2019. In addition to writing and publishing her work in prose, poetry, and translation, Oates operated a small press and published the literary magazine The Ontario Review alongside her husband and author Raymond J. Smith (1930-2008) from 1974-2008. Additionally, Oates worked as a professor of Creative Writing at both the University of Windsor (1968-1978) and later Princeton University (1978-).Oates currently serves as the Roger S. Berlind '52 Professor Emerita in the Humanities with the Program in Creative Writing at Princeton University. While officially retired, Oates continues to teach creative writing, write, and publish her work. Oates also continues to be active on Twitter with an account established by her publisher Harper Collins under the account @JoyceCarolOates.10aOates, Joyce Carol,d1938-vArchives. 0aWomen authors, AmericanvArchives. 7aPersonal correspondence.2lcgft 7aPublications (documents)2aat 7aTypescripts.2aat2 aLord Durham Rare Books,eformer owner.423Finding aid:uhttp://libraries.psu.edu/findingaids/9981.htm a09981 aAcquired 2019. cPurchased with funds from the aHelen F. Faust Women Writers Acquisition and Travel Fund ;d2018 a700000010124 nThe Hastings Libraries Endowment a770000000051 nHelen F. Faust Women Writers Acquisition and Travel Fund a9981 box01 GST/AN/03/08wASISc1i000076915529lSPC-PATRNOmUP-SPECCOLrYsYtARCHIVESu5/16/2019 \ No newline at end of file diff --git a/spec/lib/traject/psulib_config_spec.rb b/spec/lib/traject/psulib_config_spec.rb index 1b56ce5d..a46e45cc 100644 --- a/spec/lib/traject/psulib_config_spec.rb +++ b/spec/lib/traject/psulib_config_spec.rb @@ -4,6 +4,7 @@ RSpec.describe 'Psulib_config spec:' do let(:leader) { '1234567890' } + let(:fixture_path) { './spec/fixtures' } before(:all) do c = './lib/traject/psulib_config.rb' @@ -11,19 +12,29 @@ @indexer.load_config_file(c) end - describe 'Record with music numeric should have semicolons for all but subfield e' do - let(:field) { 'music_numerical_ssm' } - let(:music_383) do - { '383' => { 'ind1' => '1', 'ind2' => '0', 'subfields' => [{ 'b' => 'op. 36' }, - { 'b' => 'op. 86' }, - { 'b' => 'op. 35' }, - { 'e' => 'Bach' }, - { 'e' => 'Motzart' }] } } + describe 'id' do + context 'one record with trailing whitespace' do + let(:id) do + { '001' => '2 ' } + end + let(:result) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [id], 'leader' => leader)) } + + it 'should strip off white space at the end' do + expect(result['id']).to eq ['2'] + end end - let(:result) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [music_383], 'leader' => leader)) } + context 'one record with two 001 values' do + let(:id1) do + { '001' => '2' } + end + let(:id2) do + { '001' => '3' } + end + let(:result) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [id1, id2], 'leader' => leader)) } - it 'has some semi colons' do - expect(result[field]).to eq ['op. 36; op. 86; op. 35', 'Bach', 'Motzart'] + it 'should only take the first match' do + expect(result['id']).to eq ['2'] + end end end @@ -44,29 +55,26 @@ end end - describe 'id' do - context 'one record with trailing whitespace' do - let(:id) do - { '001' => '2 ' } - end - let(:result) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [id], 'leader' => leader)) } + describe 'Record with music numeric should have semicolons for all but subfield e' do + let(:field) { 'music_numerical_ssm' } + let(:music_383) do + { '383' => { 'ind1' => '1', 'ind2' => '0', 'subfields' => [{ 'b' => 'op. 36' }, + { 'b' => 'op. 86' }, + { 'b' => 'op. 35' }, + { 'e' => 'Bach' }, + { 'e' => 'Motzart' }] } } + end + let(:result) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [music_383], 'leader' => leader)) } - it 'should strip off white space at the end' do - expect(result['id']).to eq ['2'] - end + it 'has some semi colons' do + expect(result[field]).to eq ['op. 36; op. 86; op. 35', 'Bach', 'Motzart'] end - context 'one record with two 001 values' do - let(:id1) do - { '001' => '2' } - end - let(:id2) do - { '001' => '3' } - end - let(:result) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [id1, id2], 'leader' => leader)) } + end - it 'should only take the first match' do - expect(result['id']).to eq ['2'] - end + describe 'Special collections accessioning numbers' do + it 'are processed from the 099a' do + result = @indexer.map_record(MARC::Reader.new(File.join(fixture_path, 'special_collections_accessioning_number.mrc')).to_a.first) + expect(result['scan_sim']).to contain_exactly '09981' end end end