Skip to content

Commit

Permalink
refactor: 🫧 seperate logic into own functions
Browse files Browse the repository at this point in the history
Signed-off-by: Manuel Ruck <[email protected]>
  • Loading branch information
Manuel Ruck committed Nov 30, 2024
1 parent 91b4be1 commit 3fc9be6
Show file tree
Hide file tree
Showing 13 changed files with 460 additions and 203 deletions.
30 changes: 30 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Attach to Process",
"type": "node",
"request": "attach",
"port": 9229,
"restart": true,
"timeout": 10000,
"skipFiles": ["<node_internals>/**"]
},
{
"name": "Run crawler dev",
"type": "node",
"request": "launch",
"runtimeExecutable": "pnpm",
"runtimeArgs": ["dev"],
"cwd": "${workspaceFolder}/services/cron-jobs/crawler",
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen"
}
],
"compounds": [
{
"name": "Debug crawler dev",
"configurations": ["Run crawler dev", "Attach to Process"]
}
]
}
8 changes: 4 additions & 4 deletions services/cron-jobs/crawler/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
"name": "crawler",
"version": "1.1.4",
"description": "Kubernetes cron-job to collect data from various sources for bundestag.io",
"main": "index.ts",
"main": "build/main.js",
"repository": "https://github.com/demokratie-live/democracy-development/",
"author": "Manuel Ruck, Ulf Gebhardt, Robert Schäfer",
"license": "MIT",
"scripts": {
"dev": "tsx --env-file .env --env-file .env.local --watch src/import-procedures/index.ts",
"garden:dev": "tsx --watch src/import-procedures/index.ts",
"dev": "tsx --env-file .env --env-file .env.local --watch src/main.ts",
"garden:dev": "tsx --watch src/main.ts",
"build": "tsup-node",
"lint": "eslint .",
"start": "node ./build/index.js"
"start": "node ./build/main.js"
},
"dependencies": {
"@democracy-deutschland/bt-dip-sdk": "1.3.0-alpha.0",
Expand Down
64 changes: 35 additions & 29 deletions services/cron-jobs/crawler/src/axios.ts
Original file line number Diff line number Diff line change
@@ -1,37 +1,43 @@
import axios from 'axios';
import axios, { AxiosInstance, AxiosResponse, InternalAxiosRequestConfig } from 'axios';

const MAX_REQUESTS_COUNT = 1;
const INTERVAL_MS = 1000;
let PENDING_REQUESTS = 0;
// create new axios instance
const api = axios.create({});

/**
* Axios Request Interceptor
*/
api.interceptors.request.use(function (config) {
return new Promise((resolve) => {
const interval = setInterval(() => {
if (PENDING_REQUESTS < MAX_REQUESTS_COUNT) {
PENDING_REQUESTS++;
clearInterval(interval);
resolve(config);
}
}, INTERVAL_MS);
function setupRequestInterceptor(apiInstance: AxiosInstance): void {
apiInstance.interceptors.request.use(function (config: InternalAxiosRequestConfig) {
return new Promise((resolve) => {
const interval = setInterval(() => {
if (PENDING_REQUESTS < MAX_REQUESTS_COUNT) {
PENDING_REQUESTS++;
clearInterval(interval);
resolve(config);
}
}, INTERVAL_MS);
});
});
});
/**
* Axios Response Interceptor
*/
api.interceptors.response.use(
function (response) {
PENDING_REQUESTS = Math.max(0, PENDING_REQUESTS - 1);
return Promise.resolve(response);
},
function (error) {
PENDING_REQUESTS = Math.max(0, PENDING_REQUESTS - 1);
return Promise.reject(error);
},
);
}

function setupResponseInterceptor(apiInstance: AxiosInstance): void {
apiInstance.interceptors.response.use(
function (response: AxiosResponse) {
PENDING_REQUESTS = Math.max(0, PENDING_REQUESTS - 1);
return Promise.resolve(response);
},
function (error: unknown) {
PENDING_REQUESTS = Math.max(0, PENDING_REQUESTS - 1);
return Promise.reject(error);
},
);
}

const createAxiosInstance = (): AxiosInstance => {
const api = axios.create({});
setupRequestInterceptor(api);
setupResponseInterceptor(api);
return api;
};

const api = createAxiosInstance();

export default api;
60 changes: 36 additions & 24 deletions services/cron-jobs/crawler/src/config.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,39 @@
const {
DB_URL = 'mongodb://localhost:27017/bundestagio',
IMPORT_PROCEDURES_START_CURSOR = '*',
IMPORT_PROCEDURES_FILTER_BEFORE = new Date().toISOString().slice(0, 10),
IMPORT_PROCEDURES_FILTER_AFTER = new Date(Number(new Date()) - 1000 * 60 * 60 * 24 * 7 * 4)
.toISOString()
.slice(0, 10),
} = process.env;
const parseEnvVariables = () => {
const {
DB_URL = 'mongodb://localhost:27017/bundestagio',
IMPORT_PROCEDURES_START_CURSOR = '*',
IMPORT_PROCEDURES_FILTER_BEFORE = new Date().toISOString().slice(0, 10),
IMPORT_PROCEDURES_FILTER_AFTER = new Date(Number(new Date()) - 1000 * 60 * 60 * 24 * 7 * 4)
.toISOString()
.slice(0, 10),
} = process.env;

let { IMPORT_PROCEDURES_CHUNK_SIZE = 100, IMPORT_PROCEDURES_CHUNK_ROUNDS = 5 } = process.env;
let { IMPORT_PROCEDURES_CHUNK_SIZE = 100, IMPORT_PROCEDURES_CHUNK_ROUNDS = 5 } = process.env;

IMPORT_PROCEDURES_CHUNK_SIZE = Number(IMPORT_PROCEDURES_CHUNK_SIZE);
IMPORT_PROCEDURES_CHUNK_ROUNDS = Number(IMPORT_PROCEDURES_CHUNK_ROUNDS);
const IMPORT_PROCEDURES_FILTER_TYPES = process.env.IMPORT_PROCEDURES_FILTER_TYPES
? process.env.IMPORT_PROCEDURES_FILTER_TYPES.split(',')
: undefined;
IMPORT_PROCEDURES_CHUNK_SIZE = Number(IMPORT_PROCEDURES_CHUNK_SIZE);
IMPORT_PROCEDURES_CHUNK_ROUNDS = Number(IMPORT_PROCEDURES_CHUNK_ROUNDS);
const IMPORT_PROCEDURES_FILTER_TYPES = process.env.IMPORT_PROCEDURES_FILTER_TYPES
? process.env.IMPORT_PROCEDURES_FILTER_TYPES.split(',')
: undefined;

export const CONFIG = {
DIP_API_KEY: process.env.DIP_API_KEY || '',
DB_URL,
IMPORT_PROCEDURES_CHUNK_SIZE,
IMPORT_PROCEDURES_CHUNK_ROUNDS,
IMPORT_PROCEDURES_FILTER_BEFORE,
IMPORT_PROCEDURES_FILTER_AFTER,
IMPORT_PROCEDURES_FILTER_TYPES,
IMPORT_PROCEDURES_START_CURSOR,
} as const;
return {
DB_URL,
IMPORT_PROCEDURES_START_CURSOR,
IMPORT_PROCEDURES_FILTER_BEFORE,
IMPORT_PROCEDURES_FILTER_AFTER,
IMPORT_PROCEDURES_CHUNK_SIZE,
IMPORT_PROCEDURES_CHUNK_ROUNDS,
IMPORT_PROCEDURES_FILTER_TYPES,
};
};

function getConfig() {
const envVariables = parseEnvVariables();

return {
DIP_API_KEY: process.env.DIP_API_KEY || '',
...envVariables,
} as const;
}

export const CONFIG = getConfig();
25 changes: 25 additions & 0 deletions services/cron-jobs/crawler/src/cronJob.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { getCron, setCronStart, setCronSuccess, ICronJob } from '@democracy-deutschland/bundestagio-common';
import { Logger } from './logger';
import { executeImportProcedures } from './importProceduresExecutor';
import { CONFIG } from './config';

const CRON_JOB_NAME = 'import-procedures';

/**
* Handles the cron job execution.
* @param config - The configuration object.
* @param logger - The logger instance.
*/
export const handleCronJob = async (config: typeof CONFIG, logger: Logger): Promise<void> => {
try {
logger.log('Handling cron job...');
const cronjob: ICronJob = await getCron({ name: CRON_JOB_NAME });
await setCronStart({ name: CRON_JOB_NAME });
await executeImportProcedures(cronjob, config, logger);
await setCronSuccess({ name: CRON_JOB_NAME, successStartDate: cronjob.lastStartDate || new Date() });
logger.log('Cron job handled successfully.');
} catch (error) {
logger.error('Failed to handle cron job: ' + (error instanceof Error ? error.message : error));
process.exit(1);
}
};
13 changes: 13 additions & 0 deletions services/cron-jobs/crawler/src/database.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { mongoConnect } from '@democracy-deutschland/bundestagio-common';
import { Logger } from './logger';

export const connectToDatabase = async (dbUrl: string, logger: Logger): Promise<void> => {
try {
logger.log('Connecting to the database...');
await mongoConnect(dbUrl);
logger.log('Database connection successful.');
} catch (error) {
logger.error('Failed to connect to the database: ' + (error instanceof Error ? error.message : error));
process.exit(1);
}
};
Loading

0 comments on commit 3fc9be6

Please sign in to comment.