From 989331871e9fe04667269e2dc59bb4659209c960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=86=E4=B8=BA=E5=90=9B=E6=95=85?= Date: Mon, 13 Jan 2025 23:04:00 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20router=20for=20CPTA(=E4=B8=AD?= =?UTF-8?q?=E5=9B=BD=E4=BA=BA=E4=BA=8B=E8=80=83=E8=AF=95=E7=BD=91)=20(#180?= =?UTF-8?q?97)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add router for CPTA * fix: prolong the caching time. * fix: fix anti-crawler restriction. * fix: deduce the account of request * fix: remove expiration time. * fix: fix param description and request actions --- lib/routes/cpta/handler.ts | 136 +++++++++++++++++++++++++++++++++++ lib/routes/cpta/namespace.ts | 6 ++ 2 files changed, 142 insertions(+) create mode 100644 lib/routes/cpta/handler.ts create mode 100644 lib/routes/cpta/namespace.ts diff --git a/lib/routes/cpta/handler.ts b/lib/routes/cpta/handler.ts new file mode 100644 index 00000000000000..d8d40510a2c6e1 --- /dev/null +++ b/lib/routes/cpta/handler.ts @@ -0,0 +1,136 @@ +import { DataItem, Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { load } from 'cheerio'; +import asyncPool from 'tiny-async-pool'; + +type NewsCategory = { + title: string; + baseUrl: string; + description: string; +}; + +const WEBSITE_URL = 'http://www.cpta.com.cn'; + +const NEWS_TYPES: Record = { + notice: { + title: '通知公告', + baseUrl: 'http://www.cpta.com.cn/notice.html', + description: '中国人事考试网 考试通知公告汇总', + }, + performance: { + title: '成绩公布', + baseUrl: 'http://www.cpta.com.cn/performance.html', + description: '中国人事考试网 考试成绩公布汇总', + }, +}; + +const handler: Route['handler'] = async (context) => { + const category = context.req.param('category'); + const BASE_URL = NEWS_TYPES[category].baseUrl; + // Fetch the index page + const { data: listResponse } = await got(BASE_URL); + const $ = load(listResponse); + + // Select all list items containing news information + const ITEM_SELECTOR = 'ul[class*="list_14"] > li:has(*)'; + const listItems = $(ITEM_SELECTOR); + + // Map through each list item to extract details + const contentLinkList = listItems + .toArray() + .map((element) => { + const title = $(element).find('a').attr('title')!; + const date = $(element).find('i').text()!.replaceAll(/[[\]]/g, ''); + const relativeLink = $(element).find('a').attr('href')!; + const absoluteLink = new URL(relativeLink, WEBSITE_URL).href; + return { + title, + date, + link: absoluteLink, + }; + }) + .sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()) + .slice(0, 10); + + const fetchDataItem = (item: { title: string; date: string; link: string }) => + cache.tryGet(item.link, async () => { + const CONTENT_SELECTOR = '#p_content'; + const { data: contentResponse } = await got(item.link); + const contentPage = load(contentResponse); + const content = contentPage(CONTENT_SELECTOR).html() || ''; + return { + title: item.title, + pubDate: item.date, + link: item.link, + description: content, + category: ['study'], + guid: item.link, + id: item.link, + image: 'https://www.gov.cn/images/gtrs_logo_lt.png', + content: { + html: content, + text: content, + }, + updated: item.date, + language: 'zh-CN', + } as DataItem; + }); + + const dataItems: DataItem[] = []; + + for await (const item of await asyncPool(1, contentLinkList, fetchDataItem)) { + dataItems.push(item as DataItem); + } + + return { + title: `中国人事考试网-${NEWS_TYPES[category].title}`, + description: NEWS_TYPES[category].description, + link: BASE_URL, + image: 'https://www.gov.cn/images/gtrs_logo_lt.png', + item: dataItems, + allowEmpty: true, + language: 'zh-CN', + feedLink: `https://rsshub.app/cpta/${category}`, + id: `https://rsshub.app/cpta/${category}`, + }; +}; + +export const route: Route = { + path: '/:category', + name: '中国人事考试网发布', + maintainers: ['PrinOrange'], + parameters: { + category: '栏目参数,可见下表描述。', + }, + description: ` +| Category | Title | Description | +|-------------|-----------|-------------------------------------| +| notice | 通知公告 | 中国人事考试网 考试通知公告汇总 | +| performance | 成绩公布 | 中国人事考试网 考试成绩公布汇总 | +`, + handler, + categories: ['study'], + features: { + requireConfig: false, + requirePuppeteer: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + supportRadar: true, + antiCrawler: true, + }, + radar: [ + { + title: '中国人事考试网通知公告', + source: ['www.cpta.com.cn/notice.html', 'www.cpta.com.cn'], + target: `/notice`, + }, + { + title: '中国人事考试网成绩发布', + source: ['www.cpta.com.cn/performance.html', 'www.cpta.com.cn'], + target: `/performance`, + }, + ], + example: '/cpta/notice', +}; diff --git a/lib/routes/cpta/namespace.ts b/lib/routes/cpta/namespace.ts new file mode 100644 index 00000000000000..a0b0f00132143f --- /dev/null +++ b/lib/routes/cpta/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: '中国人事考试网', + url: 'www.cpta.com.cn', +};