From 3f1f154cafa50cb577c2c1c04bdcbfb45bd73bd9 Mon Sep 17 00:00:00 2001 From: Wayne Date: Mon, 12 Aug 2024 11:17:37 +0800 Subject: [PATCH] develop parser sbj --- sbj/manifest.json | 13 +++++++ sbj/parser.js | 68 +++++++++++++++++++++++++++++++++++++ sbj/test/sbj.2024-08-09.csv | 13 +++++++ 3 files changed, 94 insertions(+) create mode 100644 sbj/manifest.json create mode 100755 sbj/parser.js create mode 100644 sbj/test/sbj.2024-08-09.csv diff --git a/sbj/manifest.json b/sbj/manifest.json new file mode 100644 index 00000000..735ec423 --- /dev/null +++ b/sbj/manifest.json @@ -0,0 +1,13 @@ +{ + "longname": "Sports Business Journal", + "name": "sbj", + "describe": "Recognizes the accesses to the platform Sports Business Journal", + "contact": "Violita Kovchegov", + "pkb": false, + "docurl": "https://analyses.ezpaarse.org/platforms/6643cceb54ebd848d73993d6", + "domains": [ + "www.sportsbusinessjournal.com" + ], + "version": "2024-08-09", + "status": "beta" +} \ No newline at end of file diff --git a/sbj/parser.js b/sbj/parser.js new file mode 100755 index 00000000..bcdb4989 --- /dev/null +++ b/sbj/parser.js @@ -0,0 +1,68 @@ +#!/usr/bin/env node + +'use strict'; +const Parser = require('../.lib/parser.js'); + +/** + * Recognizes the accesses to the platform Sports Business Journal + * @param {Object} parsedUrl an object representing the URL to analyze + * main attributes: pathname, query, hostname + * @param {Object} ec an object representing the EC whose URL is being analyzed + * @return {Object} the result + */ +module.exports = new Parser(function analyseEC(parsedUrl, ec) { + let result = {}; + let path = parsedUrl.pathname; + // uncomment this line if you need parameters + // let param = parsedUrl.query || {}; + + // use console.error for debuging + // console.error(parsedUrl); + + let match; + + if ((match = /^\/([a-zA-Z0-9-]+)\/([a-zA-Z0-9-]+)\/([0-9]{4})\/([0-9]{2})\/([0-9]{2}).aspx$/i.exec(path)) !== null) { + // https://www.sportsbusinessjournal.com/Daily/Issues/2024/08/07.aspx + // https://www.sportsbusinessjournal.com/Daily/Issues/2024/08/05.aspx + // https://www.sportsbusinessjournal.com/Daily/Closing-Bell/2024/08/02.aspx + // https://www.sportsbusinessjournal.com/Daily/Closing-Bell/2024/07/31.aspx + // https://www.sportsbusinessjournal.com/SB-Blogs/Newsletter-Football/2024/07/25.aspx + // https://www.sportsbusinessjournal.com/SB-Blogs/Newsletter-Media/2024/08/05.aspx + result.rtype = 'ISSUE'; + result.mime = 'HTML'; + result.publication_date = `${match[3]}/${match[4]}/${match[5]}`; + result.unitid = `${match[2]}/${match[3]}/${match[4]}/${match[5]}`; + + } else if (/^\/([a-zA-Z0-9-]+).aspx$/i.test(path)) { + // https://www.sportsbusinessjournal.com/Podcasts.aspx + result.rtype = 'TOC'; + result.mime = 'HTML'; + + } else if ((match = /^\/Articles\/([0-9]{4})\/([0-9]{2})\/([0-9]{2})\/([a-zA-Z0-9-]+)$/i.exec(path)) !== null) { + // https://www.sportsbusinessjournal.com/Articles/2024/04/26/david-tepper-local-restaurant-critical-sign + // https://www.sportsbusinessjournal.com/Articles/2024/05/14/jimmy-dunne-resigns-pga-tour-policy-board + result.rtype = 'ARTICLE'; + result.mime = 'HTML'; + result.title_id = match[4]; + result.publication_date = `${match[1]}/${match[2]}/${match[3]}`; + result.unitid = `${match[1]}/${match[2]}/${match[3]}/${match[4]}`; + + } else if ((match = /^\/Journal\/Issues\/([0-9]{4})\/([0-9]{2})\/([0-9]{2})\/([a-zA-Z0-9-]+)\/([a-zA-Z0-9-]+).aspx$/i.exec(path)) !== null) { + // https://www.sportsbusinessjournal.com/Journal/Issues/2015/08/24/People-and-Pop-Culture/Friday-Night-Lights.aspx?hl=Caitlyn+Clark&sc=0&publicationSource=search + result.rtype = 'ISSUE'; + result.mime = 'HTML'; + result.db_id = match[4]; + result.title_id = match[5]; + result.publication_date = `${match[1]}/${match[2]}/${match[3]}`; + result.unitid = `${match[1]}/${match[2]}/${match[3]}/${match[4]}/${match[5]}`; + + } else if (/^\/Search\/([a-zA-Z0-9-]+).aspx$/i.test(path)) { + // https://www.sportsbusinessjournal.com/Search/Site.aspx?searchPhrase=wnba + // https://www.sportsbusinessjournal.com/Search/Site.aspx?searchPhrase=Caitlyn+Clark&searchType=0&startDate=&endDate= + result.rtype = 'SEARCH'; + result.mime = 'HTML'; + + } + + return result; +}); diff --git a/sbj/test/sbj.2024-08-09.csv b/sbj/test/sbj.2024-08-09.csv new file mode 100644 index 00000000..7f172065 --- /dev/null +++ b/sbj/test/sbj.2024-08-09.csv @@ -0,0 +1,13 @@ +out-db_id;out-title_id;out-publication_date;out-unitid;out-rtype;out-mime;in-url +;;;;SEARCH;HTML;https://www.sportsbusinessjournal.com/Search/Site.aspx?searchPhrase=wnba +;;;;SEARCH;HTML;https://www.sportsbusinessjournal.com/Search/Site.aspx?searchPhrase=Caitlyn+Clark&searchType=0&startDate=&endDate= +People-and-Pop-Culture;Friday-Night-Lights;2015/08/24;2015/08/24/People-and-Pop-Culture/Friday-Night-Lights;ISSUE;HTML;https://www.sportsbusinessjournal.com/Journal/Issues/2015/08/24/People-and-Pop-Culture/Friday-Night-Lights.aspx?hl=Caitlyn+Clark&sc=0&publicationSource=search +;jimmy-dunne-resigns-pga-tour-policy-board;2024/05/14;2024/05/14/jimmy-dunne-resigns-pga-tour-policy-board;ARTICLE;HTML;https://www.sportsbusinessjournal.com/Articles/2024/05/14/jimmy-dunne-resigns-pga-tour-policy-board +;david-tepper-local-restaurant-critical-sign;2024/04/26;2024/04/26/david-tepper-local-restaurant-critical-sign;ARTICLE;HTML;https://www.sportsbusinessjournal.com/Articles/2024/04/26/david-tepper-local-restaurant-critical-sign +;;;;TOC;HTML;https://www.sportsbusinessjournal.com/Podcasts.aspx +;;2024/08/05;Newsletter-Media/2024/08/05;ISSUE;HTML;https://www.sportsbusinessjournal.com/SB-Blogs/Newsletter-Media/2024/08/05.aspx +;;2024/07/25;Newsletter-Football/2024/07/25;ISSUE;HTML;https://www.sportsbusinessjournal.com/SB-Blogs/Newsletter-Football/2024/07/25.aspx +;;2024/08/07;Issues/2024/08/07;ISSUE;HTML;https://www.sportsbusinessjournal.com/Daily/Issues/2024/08/07.aspx +;;2024/08/05;Issues/2024/08/05;ISSUE;HTML;https://www.sportsbusinessjournal.com/Daily/Issues/2024/08/05.aspx +;;2024/08/02;Closing-Bell/2024/08/02;ISSUE;HTML;https://www.sportsbusinessjournal.com/Daily/Closing-Bell/2024/08/02.aspx +;;2024/07/31;Closing-Bell/2024/07/31;ISSUE;HTML;https://www.sportsbusinessjournal.com/Daily/Closing-Bell/2024/07/31.aspx \ No newline at end of file