Skip to content

Commit

Permalink
Add another variant of et al
Browse files Browse the repository at this point in the history
  • Loading branch information
dshorthouse committed Jul 1, 2024
1 parent dfa8215 commit 40d4b1c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
7 changes: 4 additions & 3 deletions lib/dwc_agent/constants.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module DwcAgent
\b\d+\(?(?i:[[:alpha:]])\)?\b|
\b[,;]?\s*(?:et\.?\s+al|&\s+al)\.?|
\b[,;]?\s*(?i:etal)\.?|
\b[,;]?\s*(?i:et.al)\.?|
\b\s+(bis|ter)(\b|\z)|
\bu\.\s*a\.|
\b[,;]?\s*(?i:and|&)?\s*(?i:others|party)\s*\b|
Expand Down Expand Up @@ -108,7 +109,7 @@ module DwcAgent
\b[,;]\s+\d+\.?\z|
[!@?]|
[,]?\d+|
\s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
\s+\d+?[\/.]?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x|xi|xii)[\/.]\d+|
[,;]\z|
^\w{0,2}\z|
^[A-Z]{2,}\z|
Expand Down Expand Up @@ -168,8 +169,8 @@ module DwcAgent
POST_STRIP_TIDY = %r{
^\s*[&,;.]\s*|
[\[\]]|
^[`'"`]|
[`'"]$
^[`'".,!?]{1,}|
[`'",]{1,}$
}x

CHAR_SUBS = {
Expand Down
13 changes: 13 additions & 0 deletions spec/dwc_agent/parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2163,5 +2163,18 @@ module DwcAgent
expect(parsed[0].values_at(:given, :family)).to eq(["Luke", nil])
end

it "should strip out preceding and trailing quotes or commas" do
input = "\"H.Pittier,\""
parsed = parser.parse(input)
expect(parsed[0].values_at(:given, :family)).to eq(["H.", "Pittier"])
end

it "should strip out malformed et al" do
input = "A. Ward; J. Dyer et.al"
parsed = parser.parse(input)
expect(parsed[0].values_at(:given, :family)).to eq(["A.", "Ward"])
expect(parsed[1].values_at(:given, :family)).to eq(["J.", "Dyer"])
end

end
end

0 comments on commit 40d4b1c

Please sign in to comment.