diff --git a/README.md b/README.md index a7a30dc..7b68b29 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,10 @@ ER - | AB | abstract | Abstract | | AD | author_address | Author address | | AN | acc_number | Accession number | +| AU | author | Primary author(s) | +| A2 | author_sec | Secondary author(s) | +| A3 | author_ter | Tertiary author(s) | +| A4 | author_sub | Subsidiary author(s) | | AV | arch_loc | Location in archives | | C1 | custom | Custom field 1; entry 0 in the `custom` array | | C2 | custom | Custom field 2; entry 1 in the `custom` array | diff --git a/grammar.js b/grammar.js index e895657..7be46f6 100644 --- a/grammar.js +++ b/grammar.js @@ -37,6 +37,10 @@ function id(x) { return x[0]; } {"name": "OTHER_TAG$subexpression$1", "symbols": ["DOI"]}, {"name": "OTHER_TAG$subexpression$1", "symbols": ["EDITION"]}, {"name": "OTHER_TAG$subexpression$1", "symbols": ["TITLE_ALT"]}, + {"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR1"]}, + {"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR2"]}, + {"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR3"]}, + {"name": "OTHER_TAG$subexpression$1", "symbols": ["AUTHOR4"]}, {"name": "OTHER_TAG", "symbols": ["OTHER_TAG$subexpression$1"], "postprocess": ([[d]]) => d}, {"name": "ABSTRACT", "symbols": [(lexer.has("AB") ? {type: "AB"} : AB), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'abstract' , value})}, {"name": "AUTHOR_ADDR", "symbols": [(lexer.has("AD") ? {type: "AD"} : AD), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_address', value})}, @@ -51,6 +55,10 @@ function id(x) { return x[0]; } {"name": "DOI", "symbols": [(lexer.has("DO") ? {type: "DO"} : DO), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'doi' , value})}, {"name": "EDITION", "symbols": [(lexer.has("ET") ? {type: "ET"} : ET), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'edition' , value})}, {"name": "TITLE_ALT", "symbols": [(lexer.has("J2") ? {type: "J2"} : J2), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("CONTENT") ? {type: "CONTENT"} : CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'title_alt' , value})}, + {"name": "AUTHOR1", "symbols": [(lexer.has("AU") ? {type: "AU"} : AU), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author' , value})}, + {"name": "AUTHOR2", "symbols": [(lexer.has("A2") ? {type: "A2"} : A2), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_sec' , value})}, + {"name": "AUTHOR3", "symbols": [(lexer.has("A3") ? {type: "A3"} : A3), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_ter' , value})}, + {"name": "AUTHOR4", "symbols": [(lexer.has("A4") ? {type: "A4"} : A4), (lexer.has("SEP") ? {type: "SEP"} : SEP), (lexer.has("NAME_CONTENT") ? {type: "NAME_CONTENT"} : NAME_CONTENT), "__"], "postprocess": ([,,{value}]) => ({key: 'author_sub' , value})}, {"name": "KEYWORD$ebnf$1", "symbols": ["LINE"]}, {"name": "KEYWORD$ebnf$1", "symbols": ["KEYWORD$ebnf$1", "LINE"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}}, {"name": "KEYWORD", "symbols": [(lexer.has("KW") ? {type: "KW"} : KW), (lexer.has("SEP") ? {type: "SEP"} : SEP), "KEYWORD$ebnf$1"], "postprocess": ([,,lines]) => diff --git a/grammar.ne b/grammar.ne index 99afd13..18a5994 100644 --- a/grammar.ne +++ b/grammar.ne @@ -31,22 +31,30 @@ OTHER_TAG -> ( KEYWORD | DOI | EDITION | TITLE_ALT + | AUTHOR1 + | AUTHOR2 + | AUTHOR3 + | AUTHOR4 ) {% ([[d]]) => d %} -ABSTRACT -> %AB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'abstract' , value}) %} -AUTHOR_ADDR -> %AD %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'author_address', value}) %} -ACC_NUMBER -> %AN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'acc_number' , value}) %} -ARCH_LOC -> %AV %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'arch_loc' , value}) %} -RP_STATUS -> %RP %SEP %RP_CONTENT __ {% ([,,{value}]) => ({key: 'reprint' , value}) %} -CAPTION -> %CA %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'caption' , value}) %} -CALL_NUMBER -> %CN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'call_number' , value}) %} -PUB_LOC -> %CY %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'pub_loc' , value}) %} -DB_NAME -> %DB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_name' , value}) %} -DB_PROV -> %DP %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_provider' , value}) %} -DOI -> %DO %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'doi' , value}) %} -EDITION -> %ET %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'edition' , value}) %} -TITLE_ALT -> %J2 %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'title_alt' , value}) %} +ABSTRACT -> %AB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'abstract' , value}) %} +AUTHOR_ADDR -> %AD %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'author_address', value}) %} +ACC_NUMBER -> %AN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'acc_number' , value}) %} +ARCH_LOC -> %AV %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'arch_loc' , value}) %} +RP_STATUS -> %RP %SEP %RP_CONTENT __ {% ([,,{value}]) => ({key: 'reprint' , value}) %} +CAPTION -> %CA %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'caption' , value}) %} +CALL_NUMBER -> %CN %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'call_number' , value}) %} +PUB_LOC -> %CY %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'pub_loc' , value}) %} +DB_NAME -> %DB %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_name' , value}) %} +DB_PROV -> %DP %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'db_provider' , value}) %} +DOI -> %DO %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'doi' , value}) %} +EDITION -> %ET %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'edition' , value}) %} +TITLE_ALT -> %J2 %SEP %CONTENT __ {% ([,,{value}]) => ({key: 'title_alt' , value}) %} +AUTHOR1 -> %AU %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author' , value}) %} +AUTHOR2 -> %A2 %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author_sec' , value}) %} +AUTHOR3 -> %A3 %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author_ter' , value}) %} +AUTHOR4 -> %A4 %SEP %NAME_CONTENT __ {% ([,,{value}]) => ({key: 'author_sub' , value}) %} KEYWORD -> %KW %SEP LINE:+ {% ([,,lines]) => diff --git a/index.js b/index.js index 194b336..ceab559 100644 --- a/index.js +++ b/index.js @@ -31,6 +31,12 @@ const custom_add = , acc.custom[k[1] - 1] = v , acc ); +const name_add = + (acc, {key, value: [last_name, first_name, suffix = '']}) => + ( acc[key] = acc[key] || [] + , acc[key].push({last_name, first_name, suffix}) + , acc ); + const defaults = acc => Object.assign @@ -46,6 +52,10 @@ const to_record = , 'abstract' , add , 'acc_number' , add , 'author_address', add + , 'author' , name_add + , 'author_sec' , name_add + , 'author_ter' , name_add + , 'author_sub' , name_add , 'arch_loc' , add , 'call_number' , add , 'caption' , add diff --git a/lexer.js b/lexer.js index d84c418..d551f78 100644 --- a/lexer.js +++ b/lexer.js @@ -34,6 +34,10 @@ const lexer = , AB: /AB(?= - )/ , AD: /AD(?= - )/ , AN: /AN(?= - )/ + , AU: /AU(?= - )/ + , A2: /A2(?= - )/ + , A3: /A3(?= - )/ + , A4: /A4(?= - )/ , AV: /AV(?= - )/ , C1: /C1(?= - )/ , C2: /C2(?= - )/ @@ -80,6 +84,9 @@ const lexer = ] , RP_CONTENT: /(?:IN FILE|NOT IN FILE|ON REQUEST \(\d{2}\/\d{2}\/\d{4}\))/ , DATE_CONTENT: /(?:\d{4})?\/(?:(?:\d\d)?\/){2}(?:[A-Za-z \-]+)?/ + , NAME_CONTENT: { match: /[a-zA-Z \-]+,[a-zA-Z \-\.]+(?:,[a-zA-Z\.]+)*/ + , value: name => name.split(',').map(part => part.trim()) + } , PUBYEAR_CONTENT : /\d{4}/ , CONTENT: /[a-zA-Z0-9 \-\.':/;]+/ } diff --git a/ris-parser.feature b/ris-parser.feature index ea425d0..de78908 100644 --- a/ris-parser.feature +++ b/ris-parser.feature @@ -57,6 +57,35 @@ Scenario Outline: DA - Date | /06// | date | {"year": "" , "month": "06", "day": "" , "info": ""} | | 2020//25/Conf | date | {"year": "2020", "month": "" , "day": "25", "info": "Conf"} | +Scenario Outline: Authors + Given I have this RIS file + """ + TY - JOUR + AU - Doe, John + A2 - Doe, John + A3 - Doe, John + A4 - Doe, John + - + ER - + """ + When I parse the file + Then I will find a reference where '' is set to '' + + Examples: + | tag | content | field | value | + | AU | Phillips, A.J | author | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] | + | AU | Phillips, Albert John | author | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] | + | AU | Phillips,A.J.,Sr. | author | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] | + | A2 | Phillips, A.J | author_sec | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] | + | A2 | Phillips, Albert John | author_sec | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] | + | A2 | Phillips,A.J.,Sr. | author_sec | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] | + | A3 | Phillips, A.J | author_ter | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] | + | A3 | Phillips, Albert John | author_ter | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] | + | A3 | Phillips,A.J.,Sr. | author_ter | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] | + | A4 | Phillips, A.J | author_sub | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J","suffix":""}] | + | A4 | Phillips, Albert John | author_sub | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"Albert John","suffix":""}] | + | A4 | Phillips,A.J.,Sr. | author_sub | [{"last_name":"Doe","first_name":"John","suffix":""},{"last_name":"Phillips","first_name":"A.J.","suffix":"Sr."}] | + Scenario Outline: Other tags Given I have this RIS file """ diff --git a/sample.ris b/sample.ris index d7fa8d5..ed127c3 100644 --- a/sample.ris +++ b/sample.ris @@ -8,6 +8,10 @@ DA - 2020/06/25/SuperConf PY - 1981 RP - IN FILE DO - 10.1177/0003122411414817 +AU - Phillips, A.J +AU - Phillips, Albert John +AU - Phillips, Albert +AU - Phillips,A.J.,Sr. ER - TY - BOOK