diff --git a/vufind/searchspecs_20240216.yaml b/vufind/searchspecs_20240216.yaml new file mode 100644 index 0000000..373d418 --- /dev/null +++ b/vufind/searchspecs_20240216.yaml @@ -0,0 +1,684 @@ +--- +# Listing of search types and their component parts and weights. +# +# Format is: +# searchType: +# # CustomMunge is an optional section to define custom pre-processing of +# # user input when Dismax does not apply. See below for details of +# # munge actions. +# CustomMunge: +# MungeName1: +# - [action1, actionParams] +# - [action2, actionParams] +# - [action3, actionParams] +# MungeName2: +# - [action1, actionParams] +# # While CustomMunge only applies to Lucene search, DismaxMunge is a +# # counterpart that applies to Dismax and eDismax queries. DismaxMunge +# # contains a chain of custom munge operations as described below. Note +# # that using this function is NOT RECOMMENDED if you can accomplish your +# # goals more elegantly through Solr schema adjustments, but it may be +# # useful in some situations, such as when you are connecting to a Solr +# # instance that is beyond your direct control. Here is an example stanza: +# DismaxMunge: +# - [preg_replace, '/^.*$/', '"$1"'] +# # DismaxFields is optional and defines the fields sent to the Dismax handler +# # when we are able to use it. QueryFields will be used for advanced +# # searches that Dismax cannot support. QueryFields is always used if no +# # DismaxFields section is defined. +# DismaxFields: +# - field1^boost +# - field2^boost +# - field3^boost +# # DismaxParams is optional and allows you to override default Dismax settings +# # (i.e. mm / bf) on a search-by-search basis. Enclose the parameter values +# # in quotes for proper behavior. If you want global default values for these +# # settings, you can edit the appropriate search handler in +# # solr/biblio/conf/solrconfig.xml. +# DismaxParams: +# - [param1_name, param1_value] +# - [param2_name, param2_value] +# - [param3_name, param3_value] +# # This optional setting may be used to specify which Dismax handler to use. By +# # default, VuFind provides two options: dismax (for the old, standard +# # Dismax) and edismax (for Extended Dismax). You can also configure your own +# # in solrconfig.xml, but VuFind relies on the name "edismax" to identify an +# # Extended Dismax handler. If you omit this setting, the default value from +# # the default_dismax_handler setting in the [Index] section of config.ini +# # will be used. +# DismaxHandler: dismax|edismax +# # QueryFields define the fields we are searching when not using Dismax; VuFind +# # detects queries that will not work with Dismax and switches to QueryFields +# # as needed. +# QueryFields: +# SolrField: +# - [howToMungeSearchstring, weight] +# - [differentMunge, weight] +# DifferentSolrField: +# - [howToMunge, weight] +# # The optional FilterQuery section allows you to AND a static query to the +# # dynamic query generated using the QueryFields; see JournalTitle below +# # for an example. This is applied whether we use DismaxFields or +# # QueryFields. +# FilterQuery: (optional Lucene filter query) +# ExactSettings: +# DismaxFields: ... +# QueryFields: ... +# # All the same settings as above, but for exact searches, i.e. search terms +# # enclosed in quotes. Allows different fields or weights for exact +# # searches. See below for commented-out examples. +# +# ...etc. +# +# GlobalExtraParams allows extra parameters to be (conditionally) added to the Solr +# search request. +# +# Example showcasing all the available conditions: +# +# GlobalExtraParams: +# - param: bf +# value: "recip(rord(publishDateSort),1,1000,1000)" +# conditions: +# - NoDismaxParams +# - bf +# - bq +# - SearchTypeIn: +# - AllFields +# - Title +# - Author +# - SearchTypeNotIn: +# - Series +# +# Value can be a single string or a list of strings to repeat the parameter. +# +# Available conditions: +# +# - NoDismaxParams: None of the search types specifies any of the listed parameters +# in DismaxParams +# - SearchTypeIn At least one of the search types must be in the given list +# - AllSearchTypesIn All of the search types must be in the given list +# - SearchTypeNotIn None of the search types must be in the given list +# +# Example: Boost more recent material if any of the search fields is of type +# "AllFields" or "Title" unless there's a search type specific boost: +# +# GlobalExtraParams: +# - param: bf +# value: "recip(ms(NOW/DAY,publishDateSort),1,1000,1000)" +# conditions: +# - SearchTypeIn: +# - AllFields +# - Title +# - NoDismaxParams: +# - bf +# - bq +# +# +# Example: Boost material in English if any of the search fields is of type +# "AllFields" unless any of the search fields is of type "year": +# +# GlobalExtraParams: +# - param: bq +# value: "language:English^100" +# conditions: +# - SearchTypeNotIn: year +# - SearchTypeIn: AllFields +# +# +#----------------------------------------------------------------------------------- +# +# Within the QueryFields area, fields are OR'd together, unless they're in an +# anonymous array with a numeric instead of alphanumeric key, in which case the +# first element is a two-value array that tells us what the type (AND or OR) and +# weight of the whole group should be. +# +# So, given: +# +# test: +# QueryFields: +# A: +# - [onephrase, 500] +# - [and, 200] +# B: +# - [and, 100] +# - [or, 50] +# # Start an anonymous array to group; first element indicates AND grouping +# # and a weight of 50 +# 0: +# 0: +# - AND +# - 50 +# C: +# - [onephrase, 200] +# D: +# - [onephrase, 300] +# # Note the "not" attached to the field name as a minus, and the use of ~ +# # to mean null ("no special weight") +# -E: +# - [or, ~] +# D: +# - [or, 100] +# +# ...and the search string +# +# test "one two" +# +# ...we'd get +# +# (A:"test one two"^500 OR +# A:(test AND "one two")^ 200 OR +# B:(test AND "one two")^100 OR +# B:(test OR "one two")^50 +# ( +# C:("test one two")^200 AND +# D:"test one two"^300 AND +# -E:(test OR "one two") +# )^50 OR +# D:(test OR "one two")^100 +# ) +# +#----------------------------------------------------------------------------------- +# +# Munge types are string manipulation rules, and consist of: +# +# onephrase: eliminate all quotes and do it as a single phrase. +# testing "one two" +# ...becomes ("testing one two") +# +# and: AND the terms together +# testing "one two" +# ...becomes (testing AND "one two") +# +# or: OR the terms together +# testing "one two" +# ...becomes (testing OR "one two") +# +# identity: Use the search as-is +# testing "one two" +# ...becomes (testing "one two") +# +# Additional Munge types can be defined in the CustomMunge section. Each array +# entry under CustomMunge defines a new named munge type. Each array entry under +# the name of the munge type specifies a string manipulation operation. Operations +# will be applied in the order listed, and different operations take different +# numbers of parameters. +# +# Munge operations: +# +# [append, text] - Append text to the end of the user's search string +# [lowercase] - Convert string to lowercase +# [preg_replace, pattern, replacement] - Perform a regular expression replace +# using the preg_replace() PHP function. If you use backreferences in your +# replacement phrase, be sure to escape dollar signs (i.e. \$1, not $1). +# [ucfirst] - Uppercase the first letter of each word in the string +# [uppercase] - Convert string to uppercase +# +# See the CallNumber search below for an example of custom munging in action. +# +#----------------------------------------------------------------------------------- +# +# Note that you may create a "@parent_yaml" entry at the very top of the file to +# inherit sections from another file. For example: +# +# "@parent_yaml": "/path/to/my/file.yaml" +# +# Only sections not found in this file will be loaded in from the parent file. +# In some complex scenarios, this can be a useful way of sharing settings +# between multiple configured VuFind instances. You can create a chain of parent +# files if necessary. +# +# If @parent_yaml cannot be accessed as an absolute path, it will also be tried +# relative to the path of the file defining it. +# +#----------------------------------------------------------------------------------- + +# These searches use Dismax when possible: +Author: + DismaxFields: + - author^350 + - author2^100 + - author_additional + - author_corporate + - author_variant + - author2_variant + - GND_txt_mv^200 + - author_os_txtP_mv + DismaxHandler: edismax + DismaxParams: + # - [bq, format:eJournal^237] + # - [bq, format:Journal^284] + # - [bq, format:eBook^255] + # - [bq, format:Book^500] + # - [bq, format:Article^620] + # - [bq, format:"electronic+Article"^620] + # - [bq, format_facet:"Serienband"^0.1] + + - [bq, publishDateSort:2023^90] + - [bq, publishDateSort:2022^80] + - [bq, publishDateSort:2021^70] + - [bq, publishDateSort:2020^60] + - [bq, publishDateSort:2019^50] + - [bq, publishDateSort:2018^45] + - [bq, publishDateSort:2017^40] + - [bq, publishDateSort:2016^35] + - [bq, publishDateSort:2015^30] + - [bq, publishDateSort:2014^25] + - [bq, publishDateSort:2013^20] + - [bq, publishDateSort:2024^100] + + #Verbesserte Phrasensuche, eWW +QueryFields: + author: + - [onephrase, 350] + - [author_initial, 100] + author2: + - [onephrase, 100] + - [author_initial, ~] + author_additional: + - [onephrase, 100] + - [author_initial, ~] + GND_txt_mv: + - [onephrase, 200] + - [author_initial, ~] +ExactSettings: + QueryFields: + author: + - [onephrase, 350] + author2: + - [onephrase, 100] + author_additional: + - [onephrase, 100] + GND_txt_mv: + - [onephrase, 200] + + + + + + + +ISN: + DismaxFields: + - isbn + - issn + DismaxHandler: edismax + +Subject: + DismaxFields: + - topic_unstemmed^150 + - topic^100 + - geographic^50 + - genre^50 + - era + - title_short^150 + - title_full_unstemmed^100 + - title_full^100 + - title^50 + - title_alt^50 + - title_new^50 + - title_old + - series^100 + - series2 + - title_os_txtP_mv + DismaxParams: + - [bq, format:eJournal^237] + - [bq, format:Journal^284] + - [bq, format:eBook^255] + - [bq, format:Book^500] + - [bq, format:Article^620] + - [bq, format:"electronic+Article"^620] + - [bq, format_facet:"Serienband"^0.1] + + - [bq, publishDateSort:2023^1280] + - [bq, publishDateSort:2022^1260] + - [bq, publishDateSort:2021^1240] + - [bq, publishDateSort:2020^1220] + - [bq, publishDateSort:2019^1200] + - [bq, publishDateSort:2018^1180] + - [bq, publishDateSort:2017^1160] + - [bq, publishDateSort:2016^1150] + - [bq, publishDateSort:2015^1140] + - [bq, publishDateSort:2014^1130] + - [bq, publishDateSort:2013^1120] + - [bq, publishDateSort:202*^1500] + DismaxHandler: edismax + +Keyword: + DismaxFields: + - topic_unstemmed^50 + - topic^50 + - GND_txt_mv + DismaxHandler: edismax + +Coordinate: + DismaxFields: + - long_lat_display + DismaxHandler: edismax + +# This field definition is a compromise that supports both journal-level and +# article-level data. The disadvantage is that hits in article titles will +# be mixed in. If you are building a purely article-oriented index, you should +# customize this to remove all of the title_* fields and focus entirely on the +# container_title field. +JournalTitle: + DismaxFields: + - title_short^500 + - title_full_unstemmed^450 + - title_full^400 + - title^300 + - container_title^250 + - title_alt^200 + - title_new^100 + - title_old + - series^100 + - series2 + - title_os_txtP_mv + DismaxParams: + - [bq,format:Journal^200] + - [bq,format:eJournal^100] + DismaxHandler: edismax + FilterQuery: "format:Journal OR format:eJournal" +# ExactSettings: +# DismaxFields: +# - title_full_unstemmed^450 +# FilterQuery: "format:Journal OR format:Article" + +Title: + DismaxFields: + - title_short^500 + - title_full_unstemmed^650 + - title_full^400 + - title_fullGer^400 + - title^300 + - title_alt^200 + - title_new^100 + - title_old + - series^100 + - series2 + - title_os_txtP_mv + DismaxHandler: edismax +# ExactSettings: +# DismaxFields: +# - title_full_unstemmed^450 + +BKL: + DismaxMunge: + - [preg_replace, '/^./', '"$0'] + - [append, '"'] + DismaxFields: + - bklnumber + - bklname + DismaxHandler: edismax + +Series: + DismaxFields: + - series^100 + - series2 + DismaxHandler: edismax + +AllFields: + DismaxFields: + - title_short + - title_full_unstemmed + - title_full + - title + - title_alt + - title_new + - series^50 + - series^100 +# - series2^30 + - author^300 +# - authorswithroles_txt_mv^300 + - contents^10 + - topic_unstemmed^550 + - topic^500 + - geographic^300 + - class^240 + - bklname^250 + - genre^300 + - allfields_unstemmed^10 + - fulltext_unstemmed^10 + - allfields + - fulltext + - description + - isbn + - issn + - long_lat_display + - allfieldsGer + - id^50 + - hierarchy_top_id + - journal^500 + - is_hierarchy_title^500 + - hierarchy_top_title^500 + # DismaxHandler: edismax + DismaxParams: + - [bq, format:eJournal^237] + - [bq, format:Journal^284] + - [bq, format:eBook^255] + - [bq, format:Book^500] + - [bq, format:Article^620] + - [bq, format:"electronic+Article"^620] + - [bq, format_facet:"Serienband"^0.1] + - [bq, format_facet:"Zeitschriften"^500] + - [bq, genre_facet:"Rezension"^0.1] + - [bq, genre_facet:"book-review"^0.1] +# Alternative zum abgestufen Boosting des Erscheinungsjahres, eWW +# see #35 +# see #59 + - [bf, 'ord(publishDateSort)^500'] +# Hebis inspired + # - [boost, sum(product(max(0,sum(product(abs(ms(NOW/YEAR,publishDateSort)),-5.285e-13),1)),6.5),500)] +# Docs inspired + # - [bf=recip(ms(NOW/Year,ord(publishDateSort)),1,1000,1000)] +# Old Approach + # - [bq, publishDateSort:2023^280] + # - [bq, publishDateSort:2022^260] + # - [bq, publishDateSort:2021^240] + # - [bq, publishDateSort:2020^220] + # - [bq, publishDateSort:2019^200] + # - [bq, publishDateSort:2018^180] + # - [bq, publishDateSort:2017^160] + # - [bq, publishDateSort:2016^150] + # - [bq, publishDateSort:2015^140] + # - [bq, publishDateSort:2014^130] + # - [bq, publishDateSort:2013^120] + # - [bq, publishDateSort:202*^220] + + +# - [bq, collection_details:"GBV_ILN_11"^300] +# - [tie, 0.1] + # DismaxHandler: edismax + + + +JournalSearch: + DismaxFields: + - title_short^500 + - title_full_unstemmed^650 + - title_full^400 + - title^300 + - container_title^250 + - title_alt^200 + - title_new^100 + - title_old + - series^100 + - series2 + - title_os_txtP_mv + DismaxParams: + - [bq,format:Journal^600] + - [bq,format:eJournal^300] + DismaxHandler: edismax + FilterQuery: "format:Journal OR format:eJournal OR format: \"Serial Volume\"" +# ExactSettings: +# DismaxFields: +# - title_full_unstemmed^450 +# FilterQuery: "format:Journal OR format:Article" + +ShelfMark: + DismaxMunge: + # Strip quotes: + # - [preg_replace, '/["]/', ''] + # Escape whitespaces: + - [preg_replace, '/[ ]/', '\ '] + # Escape colons (unescape first to avoid double-escapes): + - [preg_replace, '/(\\:)/', ':'] + - [preg_replace, '/:/', '\:'] + # Strip pre-existing trailing asterisks: + # - [preg_replace, '/\*+$/', ''] + # Add iln 11 + - [preg_replace, '/^./', '11\:$0'] + DismaxFields: + - signature_iln_str_mv + - signature_iln_scis_mv + DismaxHandler: edismax + +Provenience: + DismaxFields: + - provenience_txtP_mv + DismaxHandler: edismax + +# These are advanced searches that never use Dismax: +id: + QueryFields: + id: + - [onephrase, ~] + +ParentID: + QueryFields: + hierarchy_parent_id: + - [onephrase, ~] + +# Fields for exact matches originating from alphabetic browse +ids: + QueryFields: + id: + - [or, ~] + +TopicBrowse: + QueryFields: + topic_browse: + - [onephrase, ~] + + + +AuthorBrowse: + QueryFields: + author_browse: + - [onephrase, ~] + +TitleBrowse: + QueryFields: + title_full: + - [onephrase, ~] + +DeweyBrowse: + QueryFields: + dewey-raw: + - [onephrase, ~] + +LccBrowse: + QueryFields: + callnumber-raw: + - [onephrase, ~] + +CallNumber: + # We use two similar munges here -- one for exact matches, which will get + # a very high boost factor, and one for left-anchored wildcard searches, + # which will return a larger number of hits at a lower boost. + CustomMunge: + callnumber_exact: + # Strip whitespace and quotes: + - [preg_replace, '/[ "]/', ''] + # Escape colons (unescape first to avoid double-escapes): + - [preg_replace, '/(\\:)/', ':'] + - [preg_replace, '/:/', '\:'] + # Strip pre-existing trailing asterisks: + - [preg_replace, '/\*+$/', ''] + callnumber_fuzzy: + # Strip whitespace and quotes: + - [preg_replace, '/[ "]/', ''] + # Escape colons (unescape first to avoid double-escapes): + - [preg_replace, '/(\\:)/', ':'] + - [preg_replace, '/:/', '\:'] + # Strip pre-existing trailing asterisks, then add a new one: + - [preg_replace, '/\*+$/', ''] + - [append, "*"] + QueryFields: + callnumber-search: + - [callnumber_exact, 1000] + - [callnumber_fuzzy, ~] + dewey-search: + - [callnumber_exact, 1000] + - [callnumber_fuzzy, ~] + +publisher: + DismaxFields: + - publisher^100 + - pulishPlace^100 + QueryFields: + publisher: + - [and, 100] + - [or, ~] + +year: + DismaxFields: + - publishDate^100 + QueryFields: + publishDate: + - [and, 100] + - [or, ~] + +language: + QueryFields: + language: + - [and, ~] + +toc: + DismaxFields: + - contents^100 + QueryFields: + contents: + - [and, 100] + - [or, ~] + +topic: + QueryFields: + topic: + - [and, 50] + topic_facet: + - [and, ~] + +geographic: + QueryFields: + geographic: + - [and, 50] + geographic_facet: + - [and, ~] + +genre: + QueryFields: + genre: + - [and, 50] + genre_facet: + - [and, ~] + +era: + QueryFields: + era: + - [and, ~] + +oclc_num: + CustomMunge: + oclc_num: + - [preg_replace, "/[^0-9]/", ""] + # trim leading zeroes: + - [preg_replace, "/^0*/", ""] + QueryFields: + oclc_num: + - [oclc_num, ~] + +WorkKeys: + QueryFields: + work_keys_str_mv: + - [and, 100] + - [or, ~]