From e3cd2b26e1187aa95673848926c19ac8b212844d Mon Sep 17 00:00:00 2001 From: Wayne Date: Tue, 26 Mar 2024 15:36:01 +0800 Subject: [PATCH 1/2] update parser morning --- morning/manifest.json | 3 ++- morning/parser.js | 31 ++++++++++++++++++++++++----- morning/test/Morning.2024-03-26.csv | 13 ++++++++++++ morning/test/morning.2021-01-15.csv | 6 ------ 4 files changed, 41 insertions(+), 12 deletions(-) create mode 100644 morning/test/Morning.2024-03-26.csv delete mode 100644 morning/test/morning.2021-01-15.csv diff --git a/morning/manifest.json b/morning/manifest.json index 42db14973..7628f62d3 100644 --- a/morning/manifest.json +++ b/morning/manifest.json @@ -8,7 +8,8 @@ "domains": [ "library.morningstar.com", "datanalysis.morningstar.com.au", - "ar.morningstar.com" + "ar.morningstar.com", + "research.morningstar.com" ], "version": "2021-01-15", "status": "beta" diff --git a/morning/parser.js b/morning/parser.js index 0b8e9300b..7446a885b 100755 --- a/morning/parser.js +++ b/morning/parser.js @@ -14,7 +14,7 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { let result = {}; let path = parsedUrl.pathname; // uncomment this line if you need parameters - let param = parsedUrl.query || {}; + // let param = parsedUrl.query || {}; // use console.error for debuging // console.error(parsedUrl); @@ -25,18 +25,14 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // http://library.morningstar.com/ArticleSuppot/article?id=1016715 result.rtype = 'ARTICLE'; result.mime = 'HTML'; - result.unitid = param.id; - } else if ((match = /^\/videos\/$/i.exec(path)) !== null) { // http://library.morningstar.com/videos/?id=1017223 result.rtype = 'VIDEO'; result.mime = 'MISC'; - result.unitid = param.id; } else if ((match = /^\/v2\/quote$/i.exec(path)) !== null) { // http://library.morningstar.com/v2/quote?id=0P0001KOSA&typeid=ST result.rtype = 'REPORT'; result.mime = 'HTML'; - result.unitid = param.id; } else if ((match = /^\/Returns\/([0-9a-z]+.html)$/i.exec(path)) !== null) { // http://library.morningstar.com/Returns/CategoryReturns.html result.rtype = 'REPORT'; @@ -45,6 +41,31 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // http://library.morningstar.com/Returns/Consumer_Cyclical_Q4_2020.pdf result.rtype = 'REPORT'; result.mime = 'PDF'; + } else if (/^\/home$/i.test(path) && parsedUrl.hostname === 'research.morningstar.com') { + // https://research.morningstar.com/home + result.rtype = 'SESSION'; + result.mime = 'HTML'; + } else if (/^\/chart$/i.test(path) && parsedUrl.hostname === 'research.morningstar.com') { + // https://research.morningstar.com/chart + result.rtype = 'DATASET'; + result.mime = 'HTML'; + } else if (/^\/calendar$/i.test(path) && parsedUrl.hostname === 'research.morningstar.com') { + // https://research.morningstar.com/calendar + result.rtype = 'TOOL'; + result.mime = 'HTML'; + } else if ((match = /^\/articles\/([0-9a-zA-Z]+)\/([a-zA-Z0-9-]+)$/i.exec(path)) !== null) { + // https://research.morningstar.com/articles/1190828/charging-the-future-unleashing-the-power-of-battery-technology + // https://research.morningstar.com/articles/1189996/inflation-still-expected-to-plummet + result.rtype = 'ARTICLE'; + result.mime = 'HTML'; + result.title_id = match[2]; + result.db_id = match[1]; + result.unitid = `${match[1]}/${match[2]}`; + } else if ((match = /^\/api\/v1\/articles\/([0-9]+)\/file$/i.exec(path)) !== null) { + // https://research.morningstar.com/api/v1/articles/1189179/file?type=AnalystClientResearch + // https://research.morningstar.com/api/v1/articles/20151/file?type=CommonArticle + result.rtype = 'ARTICLE'; + result.mime = 'PDF'; result.unitid = match[1]; } diff --git a/morning/test/Morning.2024-03-26.csv b/morning/test/Morning.2024-03-26.csv new file mode 100644 index 000000000..76c347ff2 --- /dev/null +++ b/morning/test/Morning.2024-03-26.csv @@ -0,0 +1,13 @@ +out-title_id;out-db_id;out-unitid;out-rtype;out-mime;in-url +;;20151;ARTICLE;PDF;https://research.morningstar.com/api/v1/articles/20151/file?type=CommonArticle +;;1189179;ARTICLE;PDF;https://research.morningstar.com/api/v1/articles/1189179/file?type=AnalystClientResearch +inflation-still-expected-to-plummet;1189996;1189996/inflation-still-expected-to-plummet;ARTICLE;HTML;https://research.morningstar.com/articles/1189996/inflation-still-expected-to-plummet +charging-the-future-unleashing-the-power-of-battery-technology;1190828;1190828/charging-the-future-unleashing-the-power-of-battery-technology;ARTICLE;HTML;https://research.morningstar.com/articles/1190828/charging-the-future-unleashing-the-power-of-battery-technology +;;;TOOL;HTML;https://research.morningstar.com/calendar +;;;DATASET;HTML;https://research.morningstar.com/chart +;;;SESSION;HTML;https://research.morningstar.com/home +;;;REPORT;PDF;http://library.morningstar.com/Returns/Consumer_Cyclical_Q4_2020.pdf +;;;REPORT;HTML;http://library.morningstar.com/Returns/CategoryReturns.html +;;;REPORT;HTML;http://library.morningstar.com/v2/quote?id=0P0001KOSA&typeid=ST +;;;VIDEO;MISC;http://library.morningstar.com/videos/?id=1017223 +;;;ARTICLE;HTML;http://library.morningstar.com/ArticleSuppot/article?id=1016715 \ No newline at end of file diff --git a/morning/test/morning.2021-01-15.csv b/morning/test/morning.2021-01-15.csv deleted file mode 100644 index 525887e71..000000000 --- a/morning/test/morning.2021-01-15.csv +++ /dev/null @@ -1,6 +0,0 @@ -out-unitid;out-rtype;out-mime;in-url -1016715;ARTICLE;HTML;http://library.morningstar.com/ArticleSuppot/article?id=1016715 -1017223;VIDEO;MISC;http://library.morningstar.com/videos/?id=1017223 -0P0001KOSA;REPORT;HTML;http://library.morningstar.com/v2/quote?id=0P0001KOSA&typeid=ST -;REPORT;HTML;http://library.morningstar.com/Returns/CategoryReturns.html -Consumer_Cyclical_Q4_2020;REPORT;PDF;http://library.morningstar.com/Returns/Consumer_Cyclical_Q4_2020.pdf \ No newline at end of file From 821d88fa703da22bf40a324fcfba91feec207c82 Mon Sep 17 00:00:00 2001 From: Wayne Date: Thu, 28 Mar 2024 10:47:51 +0800 Subject: [PATCH 2/2] update parser morning modify --- morning/parser.js | 6 +++++- morning/test/Morning.2024-03-26.csv | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/morning/parser.js b/morning/parser.js index 7446a885b..ebef929e1 100755 --- a/morning/parser.js +++ b/morning/parser.js @@ -14,7 +14,7 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { let result = {}; let path = parsedUrl.pathname; // uncomment this line if you need parameters - // let param = parsedUrl.query || {}; + let param = parsedUrl.query || {}; // use console.error for debuging // console.error(parsedUrl); @@ -25,14 +25,17 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // http://library.morningstar.com/ArticleSuppot/article?id=1016715 result.rtype = 'ARTICLE'; result.mime = 'HTML'; + result.unitid = param.id; } else if ((match = /^\/videos\/$/i.exec(path)) !== null) { // http://library.morningstar.com/videos/?id=1017223 result.rtype = 'VIDEO'; result.mime = 'MISC'; + result.unitid = param.id; } else if ((match = /^\/v2\/quote$/i.exec(path)) !== null) { // http://library.morningstar.com/v2/quote?id=0P0001KOSA&typeid=ST result.rtype = 'REPORT'; result.mime = 'HTML'; + result.unitid = param.id; } else if ((match = /^\/Returns\/([0-9a-z]+.html)$/i.exec(path)) !== null) { // http://library.morningstar.com/Returns/CategoryReturns.html result.rtype = 'REPORT'; @@ -41,6 +44,7 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // http://library.morningstar.com/Returns/Consumer_Cyclical_Q4_2020.pdf result.rtype = 'REPORT'; result.mime = 'PDF'; + result.unitid = match[1]; } else if (/^\/home$/i.test(path) && parsedUrl.hostname === 'research.morningstar.com') { // https://research.morningstar.com/home result.rtype = 'SESSION'; diff --git a/morning/test/Morning.2024-03-26.csv b/morning/test/Morning.2024-03-26.csv index 76c347ff2..7760b2c4c 100644 --- a/morning/test/Morning.2024-03-26.csv +++ b/morning/test/Morning.2024-03-26.csv @@ -6,8 +6,8 @@ charging-the-future-unleashing-the-power-of-battery-technology;1190828;1190828/c ;;;TOOL;HTML;https://research.morningstar.com/calendar ;;;DATASET;HTML;https://research.morningstar.com/chart ;;;SESSION;HTML;https://research.morningstar.com/home -;;;REPORT;PDF;http://library.morningstar.com/Returns/Consumer_Cyclical_Q4_2020.pdf +;;Consumer_Cyclical_Q4_2020;REPORT;PDF;http://library.morningstar.com/Returns/Consumer_Cyclical_Q4_2020.pdf ;;;REPORT;HTML;http://library.morningstar.com/Returns/CategoryReturns.html -;;;REPORT;HTML;http://library.morningstar.com/v2/quote?id=0P0001KOSA&typeid=ST -;;;VIDEO;MISC;http://library.morningstar.com/videos/?id=1017223 -;;;ARTICLE;HTML;http://library.morningstar.com/ArticleSuppot/article?id=1016715 \ No newline at end of file +;;0P0001KOSA;REPORT;HTML;http://library.morningstar.com/v2/quote?id=0P0001KOSA&typeid=ST +;;1017223;VIDEO;MISC;http://library.morningstar.com/videos/?id=1017223 +;;1016715;ARTICLE;HTML;http://library.morningstar.com/ArticleSuppot/article?id=1016715 \ No newline at end of file