diff --git a/bmj/parser.js b/bmj/parser.js index 9d768f73b..2d5a44c81 100755 --- a/bmj/parser.js +++ b/bmj/parser.js @@ -18,7 +18,7 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { const doiPrefix = '10.1136/'; - if ((match = /^\/content\/([a-z0-9]+)\/([0-9]+)\/([a-z0-9-]+).full.pdf$/i.exec(path)) !== null) { + if ((match = /^\/content\/([a-z0-9]+)\/([0-9]+)\/([a-z0-9-]+)\.full\.pdf$/i.exec(path)) !== null) { // /content/bmj/379/bmj-2022-071517.full.pdf result.rtype = 'ARTICLE'; result.mime = 'PDF'; diff --git a/dallozbib/parser.js b/dallozbib/parser.js index 2c08dd0e3..6bf8fd31d 100755 --- a/dallozbib/parser.js +++ b/dallozbib/parser.js @@ -22,6 +22,19 @@ module.exports = new Parser(function analyseEC(parsedUrl) { result.rtype = 'BOOK'; result.mime = 'PDF'; result.unitid = result.title_id = 'puc:' + param['puc'] + '-nu:' + param['nu']; + + } else if ((match = /^\/ouvrage\/[a-z]+\/[a-z-]+_([0-9]+)$/i.exec(path)) !== null) { + // /ouvrage/mementos/droit-administratif-biens_9782247156900 + result.rtype = 'BOOK'; + result.mime = 'HTML'; + result.unitid = match[1]; + result.online_identifier = match[1]; + + } else if ((match = /^\/recherche$/i.exec(path)) !== null) { + // /recherche?query=droit+administratif+des+biens + result.rtype = 'SEARCH'; + result.mime = 'HTML'; + } return result; diff --git a/dallozbib/test/dallozbib.2024-11-20.csv b/dallozbib/test/dallozbib.2024-11-20.csv new file mode 100644 index 000000000..e2379b8ce --- /dev/null +++ b/dallozbib/test/dallozbib.2024-11-20.csv @@ -0,0 +1,3 @@ +out-online_identifier;out-unitid;out-rtype;out-mime;in-url +9782247156900;9782247156900;BOOK;HTML;https://bibliotheque-lefebvre-dalloz-fr.ezpaarse.univ-paris1.fr/ouvrage/mementos/droit-administratif-biens_9782247156900 +;;SEARCH;HTML;https://bibliotheque-lefebvre-dalloz-fr.ezpaarse.univ-paris1.fr/recherche?query=droit+administratif+des+biens diff --git a/doctrinal-plus/parser.js b/doctrinal-plus/parser.js index a105baaac..b444867bd 100755 --- a/doctrinal-plus/parser.js +++ b/doctrinal-plus/parser.js @@ -72,6 +72,18 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // /doctrinal/results result.rtype = 'SEARCH'; result.mime = 'HTML'; + + } else if ((match = /^\/revue-[a-z-]+\/([0-9a-z-]+)$/i.exec(path)) !== null) { + // /revue-juridique/GAZPAL-la-gazette-du-palais + result.rtype = 'RECORD'; + result.mime = 'HTML'; + result.unitid = match[1]; + + } else if ((match = /^\/recherche-[a-z-]+$/i.exec(path)) !== null) { + // /recherche-notices-juridiques + result.rtype = 'SEARCH'; + result.mime = 'HTML'; + } return result; diff --git a/doctrinal-plus/test/test.2024-11-20.csv b/doctrinal-plus/test/test.2024-11-20.csv new file mode 100644 index 000000000..9853a562d --- /dev/null +++ b/doctrinal-plus/test/test.2024-11-20.csv @@ -0,0 +1,3 @@ +out-publication_date;out-title_id;out-unitid;out-rtype;out-mime;in-url +;;GAZPAL-la-gazette-du-palais;RECORD;HTML;https://www.doctrinal.fr/revue-juridique/GAZPAL-la-gazette-du-palais +;;;SEARCH;HTML;https://www-doctrinal-fr.ezpaarse.univ-paris1.fr/recherche-notices-juridiques diff --git a/hospimedia/manifest.json b/hospimedia/manifest.json new file mode 100644 index 000000000..f99008c67 --- /dev/null +++ b/hospimedia/manifest.json @@ -0,0 +1,13 @@ +{ + "longname": "Hospimedia", + "name": "hospimedia", + "describe": "Recognizes the accesses to the platform Hospimedia", + "contact": "Frederic Truong inist, ezPAARSE", + "pkb": false, + "docurl": "https://analyses.ezpaarse.org/platforms/6723986a8e33b361a62282a8", + "domains": [ + "www.hospimedia.fr" + ], + "version": "2024-11-20", + "status": "beta" +} \ No newline at end of file diff --git a/hospimedia/parser.js b/hospimedia/parser.js new file mode 100755 index 000000000..4eb528b60 --- /dev/null +++ b/hospimedia/parser.js @@ -0,0 +1,43 @@ +#!/usr/bin/env node + +'use strict'; +const Parser = require('../.lib/parser.js'); + +/** + * Recognizes the accesses to the platform Hospimedia + * @param {Object} parsedUrl an object representing the URL to analyze + * main attributes: pathname, query, hostname + * @param {Object} ec an object representing the EC whose URL is being analyzed + * @return {Object} the result + */ +module.exports = new Parser(function analyseEC(parsedUrl, ec) { + let result = {}; + let path = parsedUrl.pathname; + // uncomment this line if you need parameters + // let param = parsedUrl.query || {}; + + // use console.error for debuging + // console.error(parsedUrl); + + let match; + + if ((match = /^\/fiches-pratiques\/([0-9a-z-]+)$/i.exec(path)) !== null) { + // http://www.hospimedia.fr/fiches-pratiques/20241029-droit-quelles-sont-les-obligations-du-professionnel-de + result.rtype = 'ARTICLE'; + result.mime = 'HTML'; + result.unitid = match[1]; + + } else if ((match = /^\/studio\/medias\/([0-9a-z-]+)$/i.exec(path)) !== null) { + // http://www.hospimedia.fr/studio/medias/20240911-les-rendez-vous-de-semaine-de-quatre-jours + result.rtype = 'VIDEO'; + result.mime = 'MISC'; + result.unitid = match[1]; + + } else if (/^\/recherche$/i.test(path)) { + // http://www.hospimedia.fr/recherche?q=ehpad + result.rtype = 'SEARCH'; + result.mime = 'HTML'; + } + + return result; +}); diff --git a/hospimedia/test/hospimedia.2024-11-20.csv b/hospimedia/test/hospimedia.2024-11-20.csv new file mode 100644 index 000000000..42b18ee42 --- /dev/null +++ b/hospimedia/test/hospimedia.2024-11-20.csv @@ -0,0 +1,4 @@ +out-unitid;out-rtype;out-mime;in-url +;SEARCH;HTML;https://abonnes-hospimedia-fr.proxybib-pp.cnam.fr/recherche?q=ehpad +20241029-droit-quelles-sont-les-obligations-du-professionnel-de;ARTICLE;HTML;https://abonnes-hospimedia-fr.proxybib-pp.cnam.fr/fiches-pratiques/20241029-droit-quelles-sont-les-obligations-du-professionnel-de +20240911-les-rendez-vous-de-semaine-de-quatre-jours;VIDEO;MISC;https://abonnes-hospimedia-fr.proxybib-pp.cnam.fr/studio/medias/20240911-les-rendez-vous-de-semaine-de-quatre-jours \ No newline at end of file diff --git a/numerique-premium/parser.js b/numerique-premium/parser.js index 9e5f0cf6d..78aa247df 100755 --- a/numerique-premium/parser.js +++ b/numerique-premium/parser.js @@ -5,15 +5,31 @@ const Parser = require('../.lib/parser.js'); module.exports = new Parser(function analyseEC(parsedUrl, ec) { let result = {}; - let path = parsedUrl.pathname; + let path = parsedUrl.pathname; let match; if ((match = /^\/content\/([a-z]+)\/([0-9]+)$/i.exec(path)) !== null) { // http://www.numeriquepremium.com/content/books/9782728801749 - result.rtype = 'ABS'; - result.mime = 'HTML'; + result.rtype = 'ABS'; + result.mime = 'HTML'; result.title_id = match[1] + '/' + match[2]; - result.unitid = match[2]; + result.unitid = match[2]; + } else if ((match = /^\/doi\/epdf\/(10.[0-9]+\/([a-z0-9-.]+))$/i.exec(path)) !== null) { + // https://www.numeriquepremium.com/doi/epdf/10.14375/NP.9782072798238 + result.rtype = 'BOOK'; + result.mime = 'PDF'; + result.unitid = match[2]; + result.doi = match[1]; + } else if ((match = /^\/doi\/book\/(10.[0-9]+\/([a-z0-9-.]+))$/i.exec(path)) !== null) { + // https://www.numeriquepremium.com/doi/book/10.14375/NP.9782072798238#toc-containter + result.rtype = 'TOC'; + result.mime = 'HTML'; + result.unitid = match[2]; + result.doi = match[1]; + } else if (/^\/action\/doSearch$/i.test(path)) { + // https://www-numeriquepremium-com.ezpaarse.univ-paris1.fr/action/doSearch + result.rtype = 'SEARCH'; + result.mime = 'HTML'; } return result; diff --git a/numerique-premium/test/numerique-premium.2024-11-20.csv b/numerique-premium/test/numerique-premium.2024-11-20.csv new file mode 100644 index 000000000..cfb945696 --- /dev/null +++ b/numerique-premium/test/numerique-premium.2024-11-20.csv @@ -0,0 +1,6 @@ +out-title_id;out-doi;out-unitid;out-rtype;out-mime;in-url +;10.14375/NP.9782072798238;NP.9782072798238;BOOK;PDF;https://www-numeriquepremium-com.ezpaarse.univ-paris1.fr/doi/epdf/10.14375/NP.9782072798238 +;10.14375/NP.9782072798238;NP.9782072798238;TOC;HTML;https://www-numeriquepremium-com.ezpaarse.univ-paris1.fr/doi/book/10.14375/NP.9782072798238#toc-containter +;;;SEARCH;HTML;https://www-numeriquepremium-com.ezpaarse.univ-paris1.fr/action/doSearch?AllField=14-18%2C+penser+le+patriotisme +books/9782728801749;;9782728801749;ABS;HTML;http://www.numeriquepremium.com/content/books/9782728801749 +;;;;;http://www.numeriquepremium.com.ezproxy.univ-paris3.fr/deliver/fulltext/books/editions-rue-dulm/9782728801749/9782728801749_fulltext_PAGENUMBERPLACEHOLDER.gif?itemId=/content/books/9782728801749&fmt=ahah&logEvent=false \ No newline at end of file diff --git a/thesesfr/parser.js b/thesesfr/parser.js index 15e121094..bb7bb84fb 100755 --- a/thesesfr/parser.js +++ b/thesesfr/parser.js @@ -44,7 +44,10 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { let match; - if (ec['User-Agent'] === 'node') { + const userAgentezPAARSE = 'ezPAARSE (https://readmetrics.org; mailto:ezteam@couperin.org)'; + const userAgentezPAARSEAnon = 'ezPAARSE (https://readmetrics.org; mailto:)'; + + if ((ec['User-Agent'] === 'node') || (ec['User-Agent'] === userAgentezPAARSE) || (ec['User-Agent'] === userAgentezPAARSEAnon)) { //NOP } else if ( @@ -133,4 +136,4 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { } return result; -}); \ No newline at end of file +});