Skip to content

Commit

Permalink
feat: add support for AU, A2, A3 and A4 tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
customcommander committed Jun 27, 2020
1 parent 17222db commit 0ad5b53
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 13 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ ER -
| AB | abstract | Abstract |
| AD | author_address | Author address |
| AN | acc_number | Accession number |
| AU | author | Primary author(s) |
| A2 | author_sec | Secondary author(s) |
| A3 | author_ter | Tertiary author(s) |
| A4 | author_sub | Subsidiary author(s) |
| AV | arch_loc | Location in archives |
| C1 | custom | Custom field 1; entry 0 in the `custom` array |
| C2 | custom | Custom field 2; entry 1 in the `custom` array |
Expand Down
8 changes: 8 additions & 0 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ function id(x) { return x[0]; }
{"name": "OTHER_TAG$subexpression$1", "symbols": ["DOI"]},
{"name": "OTHER_TAG$subexpression$1", "symbols": ["EDITION"]},
{"name": "OTHER_TAG$subexpression$1", "symbols": ["TITLE_ALT"]},
{"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR1"]},
{"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR2"]},
{"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR3"]},
{"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR4"]},
{"name": "OTHER_TAG", "symbols": ["OTHER_TAG$subexpression$1"], "postprocess": ([[d]]) => d},
{"name": "ABSTRACT", "symbols": [(lexer.has("AB") ? {type: "AB"} : AB), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'abstract' , value})},
{"name": "AUTHOR_ADDR", "symbols": [(lexer.has("AD") ? {type: "AD"} : AD), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_address', value})},
Expand All @@ -51,6 +55,10 @@ function id(x) { return x[0]; }
{"name": "DOI", "symbols": [(lexer.has("DO") ? {type: "DO"} : DO), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'doi' , value})},
{"name": "EDITION", "symbols": [(lexer.has("ET") ? {type: "ET"} : ET), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'edition' , value})},
{"name": "TITLE_ALT", "symbols": [(lexer.has("J2") ? {type: "J2"} : J2), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'title_alt' , value})},
{"name": "AUTHOR1", "symbols": [(lexer.has("AU") ? {type: "AU"} : AU), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author' , value})},
{"name": "AUTHOR2", "symbols": [(lexer.has("A2") ? {type: "A2"} : A2), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_sec' , value})},
{"name": "AUTHOR3", "symbols": [(lexer.has("A3") ? {type: "A3"} : A3), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_ter' , value})},
{"name": "AUTHOR4", "symbols": [(lexer.has("A4") ? {type: "A4"} : A4), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_sub' , value})},
{"name": "KEYWORD$ebnf$1", "symbols": ["LINE"]},
{"name": "KEYWORD$ebnf$1", "symbols": ["KEYWORD$ebnf$1", "LINE"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "KEYWORD", "symbols": [(lexer.has("KW") ? {type: "KW"} : KW), (lexer.has("SEP") ? {type: "SEP"} : SEP), "KEYWORD$ebnf$1"], "postprocess": ([,,lines]) =>
Expand Down
34 changes: 21 additions & 13 deletions grammar.ne
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,30 @@ OTHER_TAG -> ( KEYWORD
| DOI
| EDITION
| TITLE_ALT
| AUTHOR1
| AUTHOR2
| AUTHOR3
| AUTHOR4
)
{% ([[d]]) => d %}

ABSTRACT -> %AB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'abstract' , value}) %}
AUTHOR_ADDR -> %AD %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'author_address', value}) %}
ACC_NUMBER -> %AN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'acc_number' , value}) %}
ARCH_LOC -> %AV %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'arch_loc' , value}) %}
RP_STATUS -> %RP %SEP %RP_CONTENT __ {% ([,,{value}]) => ({key: 'reprint' , value}) %}
CAPTION -> %CA %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'caption' , value}) %}
CALL_NUMBER -> %CN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'call_number' , value}) %}
PUB_LOC -> %CY %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'pub_loc' , value}) %}
DB_NAME -> %DB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_name' , value}) %}
DB_PROV -> %DP %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_provider' , value}) %}
DOI -> %DO %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'doi' , value}) %}
EDITION -> %ET %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'edition' , value}) %}
TITLE_ALT -> %J2 %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'title_alt' , value}) %}
ABSTRACT -> %AB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'abstract' , value}) %}
AUTHOR_ADDR -> %AD %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'author_address', value}) %}
ACC_NUMBER -> %AN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'acc_number' , value}) %}
ARCH_LOC -> %AV %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'arch_loc' , value}) %}
RP_STATUS -> %RP %SEP %RP_CONTENT __ {% ([,,{value}]) => ({key: 'reprint' , value}) %}
CAPTION -> %CA %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'caption' , value}) %}
CALL_NUMBER -> %CN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'call_number' , value}) %}
PUB_LOC -> %CY %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'pub_loc' , value}) %}
DB_NAME -> %DB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_name' , value}) %}
DB_PROV -> %DP %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_provider' , value}) %}
DOI -> %DO %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'doi' , value}) %}
EDITION -> %ET %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'edition' , value}) %}
TITLE_ALT -> %J2 %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'title_alt' , value}) %}
AUTHOR1 -> %AU %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author' , value}) %}
AUTHOR2 -> %A2 %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author_sec' , value}) %}
AUTHOR3 -> %A3 %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author_ter' , value}) %}
AUTHOR4 -> %A4 %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author_sub' , value}) %}

KEYWORD -> %KW %SEP LINE:+
{% ([,,lines]) =>
Expand Down
10 changes: 10 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ const custom_add =
, acc.custom[k[1] - 1] = v
, acc );

const name_add =
(acc, {key, value: [last_name, first_name, suffix = '']}) =>
( acc[key] = acc[key] || []
, acc[key].push({last_name, first_name, suffix})
, acc );

const defaults =
acc =>
Object.assign
Expand All @@ -46,6 +52,10 @@ const to_record =
, 'abstract' , add
, 'acc_number' , add
, 'author_address', add
, 'author' , name_add
, 'author_sec' , name_add
, 'author_ter' , name_add
, 'author_sub' , name_add
, 'arch_loc' , add
, 'call_number' , add
, 'caption' , add
Expand Down
7 changes: 7 additions & 0 deletions lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ const lexer =
, AB: /AB(?= - )/
, AD: /AD(?= - )/
, AN: /AN(?= - )/
, AU: /AU(?= - )/
, A2: /A2(?= - )/
, A3: /A3(?= - )/
, A4: /A4(?= - )/
, AV: /AV(?= - )/
, C1: /C1(?= - )/
, C2: /C2(?= - )/
Expand Down Expand Up @@ -80,6 +84,9 @@ const lexer =
]
, RP_CONTENT: /(?:IN FILE|NOT IN FILE|ON REQUEST \(\d{2}\/\d{2}\/\d{4}\))/
, DATE_CONTENT: /(?:\d{4})?\/(?:(?:\d\d)?\/){2}(?:[A-Za-z \-]+)?/
, NAME_CONTENT: { match: /[a-zA-Z \-]+,[a-zA-Z \-\.]+(?:,[a-zA-Z\.]+)*/
, value: name => name.split(',').map(part => part.trim())
}
, PUBYEAR_CONTENT : /\d{4}/
, CONTENT: /[a-zA-Z0-9 \-\.':/;]+/
}
Expand Down
29 changes: 29 additions & 0 deletions ris-parser.feature
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,35 @@ Scenario Outline: DA - Date
| /06// | date | {"year": "" , "month": "06", "day": "" , "info": ""} |
| 2020//25/Conf | date | {"year": "2020", "month": "" , "day": "25", "info": "Conf"} |

Scenario Outline: Authors
Given I have this RIS file
"""
TY - JOUR
AU - Doe, John
A2 - Doe, John
A3 - Doe, John
A4 - Doe, John
<tag> - <content>
ER -
"""
When I parse the file
Then I will find a reference where '<field>' is set to '<value>'

Examples:
| tag | content | field | value |
| AU | Phillips, A.J | author | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] |
| AU | Phillips, Albert John | author | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] |
| AU | Phillips,A.J.,Sr. | author | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] |
| A2 | Phillips, A.J | author_sec | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] |
| A2 | Phillips, Albert John | author_sec | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] |
| A2 | Phillips,A.J.,Sr. | author_sec | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] |
| A3 | Phillips, A.J | author_ter | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] |
| A3 | Phillips, Albert John | author_ter | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] |
| A3 | Phillips,A.J.,Sr. | author_ter | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] |
| A4 | Phillips, A.J | author_sub | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] |
| A4 | Phillips, Albert John | author_sub | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] |
| A4 | Phillips,A.J.,Sr. | author_sub | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] |

Scenario Outline: Other tags
Given I have this RIS file
"""
Expand Down
4 changes: 4 additions & 0 deletions sample.ris
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ DA - 2020/06/25/SuperConf
PY - 1981
RP - IN FILE
DO - 10.1177/0003122411414817
AU - Phillips, A.J
AU - Phillips, Albert John
AU - Phillips, Albert
AU - Phillips,A.J.,Sr.
ER -

TY - BOOK
Expand Down

0 comments on commit 0ad5b53

Please sign in to comment.