Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge main #58

Merged
merged 41 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c0329ed
Update pinger.ts
mohammeds1992 Mar 7, 2024
8e760b4
Update Dockerfile
mohammeds1992 Mar 7, 2024
0d6e4d1
Update pinger.ts
mohammeds1992 Mar 7, 2024
f6cc4b4
Update pinger.ts
mohammeds1992 Mar 7, 2024
cdb002f
fixes
mohammeds1992 Mar 7, 2024
4c8d52a
Update app.ts
mohammeds1992 Mar 7, 2024
d09d117
Update app.ts
mohammeds1992 Mar 7, 2024
4021ab8
Update app.ts
mohammeds1992 Mar 7, 2024
7e07938
fixes
mohammeds1992 Mar 7, 2024
eed78c4
Update app.ts
mohammeds1992 Mar 7, 2024
2912b91
Update app.ts
mohammeds1992 Mar 7, 2024
bde2ea7
observatory changes
mohammeds1992 Mar 17, 2024
cef7dff
axios
mohammeds1992 Mar 17, 2024
1b5978a
Update app.ts
mohammeds1992 Mar 17, 2024
31f7095
singular paid nodes
mohammeds1992 Mar 18, 2024
6bfcd10
Update app.ts
mohammeds1992 Mar 18, 2024
639f752
Update singularPaidNodes.json
mohammeds1992 Mar 18, 2024
37dd65c
Update singularPaidNodes.json
mohammeds1992 Mar 18, 2024
156f716
Update app.ts
mohammeds1992 Mar 21, 2024
7112141
add provider index
mohammeds1992 Mar 21, 2024
287d5b2
fix: Merge branch 'prod' into staging
mohammeds1992 Mar 21, 2024
7462e9b
log improvements
mohammeds1992 Mar 21, 2024
7888e99
Update app.ts
mohammeds1992 Mar 21, 2024
d905f66
slack alerts
mohammeds1992 Mar 22, 2024
d3917d4
slack url from env
mohammeds1992 Mar 22, 2024
c538c84
fixes , slack from env
mohammeds1992 Mar 22, 2024
6a129ea
fixes
mohammeds1992 Mar 23, 2024
3234320
fixes
mohammeds1992 Mar 23, 2024
8886ef8
fixes
mohammeds1992 Mar 23, 2024
13c533d
fixes
mohammeds1992 Mar 23, 2024
7f2c02a
fix: merge conflicts
mohammeds1992 Mar 23, 2024
bb31fc1
query fixes
mohammeds1992 Mar 23, 2024
8664b79
fixes
mohammeds1992 Mar 23, 2024
0403eb5
fix: Merge branch 'prod' into staging
mohammeds1992 Mar 23, 2024
41c8e4d
fixes
mohammeds1992 Mar 23, 2024
29a2137
fixes
mohammeds1992 Mar 23, 2024
871f711
fixes
mohammeds1992 Mar 23, 2024
43db2a9
Update pinger.ts
mohammeds1992 Apr 4, 2024
d420e2a
fix: remove chain name
mohammeds1992 Apr 8, 2024
76bb07f
Update pinger.ts
mohammeds1992 Apr 15, 2024
56a2fbd
fix: NMS API alert
mohammeds1992 Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions k8s/production/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ spec:
secretKeyRef:
name: observatory-service-prod-secrets
key: SLACK_CHANNEL_URL
- name: SLACK_CHANNEL_URL_P0
valueFrom:
secretKeyRef:
name: observatory-service-prod-secrets
key: SLACK_CHANNEL_URL_P0
- name: GRAFANA_API_KEY
valueFrom:
secretKeyRef:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
Warnings:

- You are about to drop the column `chain_name` on the `response_codes` table. All the data in the column will be lost.

*/
-- DropIndex
DROP INDEX "response_codes_chain_name_type_idx";

-- AlterTable
ALTER TABLE "response_codes" DROP COLUMN "chain_name";
18 changes: 8 additions & 10 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,23 @@ datasource db {
}

model ResponseCode {
id Int @id @default(autoincrement())
createdAt DateTime @default(now()) @db.Timestamptz(3)
updatedAt DateTime @updatedAt @db.Timestamptz(3)
chainName String? @map("chain_name")
id Int @id @default(autoincrement())
createdAt DateTime @default(now()) @db.Timestamptz(3)
updatedAt DateTime @updatedAt @db.Timestamptz(3)
type Types
httpResponseCode Int
url String
responseTime Int?
chainId String? @map("chain_id")
priority Int? @default(0)
endpointType EndpointType @default(latest_block)
chainId String? @map("chain_id")
priority Int? @default(0)
endpointType EndpointType @default(latest_block)
provider String?
errorMessage String?
chainPriority Int @default(1)
chainPriority Int @default(1)

@@index([chainName, type])
@@index([type])
@@index([createdAt])
@@index([chainId])
@@index([type])
@@index([httpResponseCode])
@@index([responseTime])
@@index([url])
Expand Down
28 changes: 9 additions & 19 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import './cron/data-accuracy-check';
import './cron/nms-p1-last-hour-downtime-check';
import './cron/numia-last-hour-downtime-check';
import './cron/allnodes-last-hour-downtime-check';
import './cron/nms-api-check';
import { Pinger } from './pinger';
import sleep from './sleep';
import prometheus from 'prom-client';
Expand Down Expand Up @@ -61,8 +62,8 @@ async function startIndividualNodePinger(): Promise<void> {
logger.informational('Starting a new iteration of Individual Node Pinger.');
const pinger = Container.get(Pinger.token);

const handlePing = async (url: string, chainId: string, chainName: string | undefined) => {
return pinger.ping(url, chainName, Types.SINGULAR, chainId).catch((error) => {
const handlePing = async (url: string, chainId: string) => {
return pinger.ping(url, Types.SINGULAR, chainId).catch((error) => {
logger.error(`Error pinging ${url} for chainId ${chainId}: ${error.message}`);
return null; // Return a value to keep the array's structure consistent
});
Expand All @@ -72,11 +73,10 @@ async function startIndividualNodePinger(): Promise<void> {

for (const nodeData of jsonData) {
const chainId = nodeData.chainId;
const chainName = nodeData.chainName;
const nodes = nodeData.nodeList;

for (const url of nodes) {
promisesArr.push(handlePing(url, chainId, chainName));
promisesArr.push(handlePing(url, chainId));
}
}

Expand Down Expand Up @@ -126,21 +126,17 @@ async function startSingularPaidNodePinger(): Promise<void> {
return;
}

const chainIdToNameMap = new Map(
jsonData.map((node: { chainId: string; chainName: string }) => [node.chainId, node.chainName]),
);

while (true) {
try {
const startTime = Date.now();
logger.informational('Starting a new iteration of Singular Paid Node Pinger.');
const pinger = Container.get(Pinger.token); // Assuming Pinger is set up in Container

const handlePing = async (url: string, chainId: string, chainName: string, provider: string | null) => {
const handlePing = async (url: string, chainId: string, provider: string | null) => {
return pinger
.ping(
url,
chainName,
Types.SINGULAR_PAID,
chainId,
'/cosmos/base/tendermint/v1beta1/blocks/latest',
Expand All @@ -163,14 +159,13 @@ async function startSingularPaidNodePinger(): Promise<void> {
// Here's the critical check
if (!Array.isArray(nodes)) {
logger.error(
`Node list for provider "${provider}" in chain "${nodeData.chainName}" (${chainId}) is not an array.`,
`Node list for provider "${provider}" in chain (${chainId}) is not an array.`,
);
continue; // Skip this iteration
}

const chainName = chainIdToNameMap.get(chainId);
for (const url of nodes) {
promisesArr.push(handlePing(url, chainId, chainName!, provider));
promisesArr.push(handlePing(url, chainId, provider));
}
}
}
Expand Down Expand Up @@ -239,7 +234,7 @@ async function startEcostakePinger(): Promise<void> {
const details = chainDetails as any;
const url = `https://rest.cosmos.directory/${details.chainRegistryPath}`;
arr.push(
pinger.ping(url, details.chainName, Types.ECOSTAKE, details.chainId).catch((error) => {
pinger.ping(url, Types.ECOSTAKE, details.chainId).catch((error) => {
logger.error(`Error pinging ${url} for chainId ${details.chainId}: ${error.message}`);
return null;
}),
Expand Down Expand Up @@ -364,10 +359,6 @@ async function startNMSPinger(nmsRunType: Types): Promise<void> {
return;
}

const chainIdToNameMap = new Map(
jsonData.map((node: { chainId: string; chainName: string }) => [node.chainId, node.chainName]),
);

if (EnvVars.getNodeEnv() === 'test') return;
let promisesArr: Promise<Prisma.ResponseCodeCreateInput>[] = [];
while (true) {
Expand All @@ -381,11 +372,10 @@ async function startNMSPinger(nmsRunType: Types): Promise<void> {
for (let i = 0; i < chainIds.length; i++) {
const chainId = chainIds[i] || '';
const nodes = jsonData[chainId!];
const chainName = chainIdToNameMap.get(chainId);
const urls = await nmsGetNodeURL(nodes, nmsRunType);
urls.forEach(({ url, priority }) => {
promisesArr.push(
pinger.ping(url, chainName, nmsRunType, chainId, '/cosmos/base/tendermint/v1beta1/blocks/latest', priority),
pinger.ping(url, nmsRunType, chainId, '/cosmos/base/tendermint/v1beta1/blocks/latest', priority),
);
});

Expand Down
8 changes: 2 additions & 6 deletions src/cron/allnodes-last-hour-downtime-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async function lastHourDowntimeP0ChainsCheck() {
failed_requests,
(failed_requests::FLOAT / total_requests) * 100 AS failure_rate_percent,
CASE
WHEN (failed_requests::FLOAT / total_requests) * 100 > 5 THEN 'Yes'
WHEN (failed_requests::FLOAT / total_requests) * 100 > 10 THEN 'Yes'
ELSE 'No'
END AS is_downtime_hour
FROM
Expand Down Expand Up @@ -148,7 +148,7 @@ async function lastHourDowntimeP1ChainsCheck() {
failed_requests,
(failed_requests::FLOAT / total_requests) * 100 AS failure_rate_percent,
CASE
WHEN (failed_requests::FLOAT / total_requests) * 100 > 10 THEN 'Yes'
WHEN (failed_requests::FLOAT / total_requests) * 100 > 20 THEN 'Yes'
ELSE 'No'
END AS is_downtime_hour
FROM
Expand Down Expand Up @@ -249,7 +249,3 @@ cron.schedule('0 * * * *', () => {
console.log('Running scheduled task for All nodes Last One Hour Downtime P1 Chains Check...');
lastHourDowntimeP1ChainsCheck();
});

// Initial call
lastHourDowntimeP0ChainsCheck();
lastHourDowntimeP1ChainsCheck();
7 changes: 2 additions & 5 deletions src/cron/data-accuracy-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ async function dataAccuracyCheck() {
type: 'section',
text: {
type: 'mrkdwn',
text: '*NMS P1 Last One Hour Downtime Alert*',
text: '*Data Accuracy Check Alert*',
},
},
{
type: 'section',
text: {
type: 'mrkdwn',
text: 'At least one chain has experienced downtime in the last hour. Immediate attention required.',
text: 'Data collection less than 80 % threshold for more than 100 chains. Immediate attention required.',
},
},
{
Expand Down Expand Up @@ -148,6 +148,3 @@ cron.schedule('0 0 * * *', () => {
console.log('Running scheduled task to verify data accuracy...');
dataAccuracyCheck();
});

// Initial call or you can remove it if you want to strictly follow the schedule
dataAccuracyCheck();
121 changes: 121 additions & 0 deletions src/cron/nms-api-check.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import { PrismaClient } from '@prisma/client';
import cron from 'node-cron';
import axios from 'axios';
import { EnvVars } from '../env-vars';

const prisma = new PrismaClient();
const slackChannelUrl = EnvVars.getSlackChannelUrlP0(); // Ensure this is configured in your environment variables

async function checkApiFailuresAndAlert() {
const numLatestCalls: number = parseInt(process.env.NUM_LATEST_CALLS || '5', 10);
console.log(`Checking the last ${numLatestCalls} API calls for each chain ID and priority...`);

// Explicitly casting the query result to any[] to bypass TypeScript's strict typing.
const rawData = (await prisma.$queryRaw<any[]>`
SELECT * FROM (
SELECT
r.chain_id,
r."createdAt", -- Correctly referencing with case sensitivity
r.type || '+' || CAST(r.priority AS TEXT) AS type_priority, -- Using || for string concatenation
r.priority, -- Ensure priority is selected if it's needed for sorting
r.url,
r."httpResponseCode",
r."errorMessage",
ROW_NUMBER() OVER (PARTITION BY r.chain_id, r.priority ORDER BY r."createdAt" DESC) as rn
FROM response_codes r
WHERE r.type = 'nms' AND (r.priority = 0 OR r.priority = 1)
) sub
WHERE sub.rn <= 5
ORDER BY sub.chain_id, sub.priority, sub."createdAt" DESC;
`) as any[];

const groupedResults = rawData.reduce((acc: { [key: string]: any[] }, item: any) => {
const key = `${item.chain_id}|${item.priority}`; // Changed delimiter to '|'
if (!acc[key]) {
acc[key] = [];
}
acc[key].push(item);
return acc;
}, {});

if (!slackChannelUrl) {
console.log('Slack channel URL not configured. Skipping Slack notification.');
return;
}

// Check if all responses are unsuccessful and send an alert
for (const [key, details] of Object.entries(groupedResults)) {
if (details.every((d: any) => d.httpResponseCode !== 200)) {
const [chainId, priority] = key.split('|'); // Split using the new delimiter

const message = {
blocks: [
{
type: 'header',
text: {
type: 'plain_text',
text: `🚨 Alert: NMS API Failures Detected`,
emoji: true,
},
},
{
type: 'section',
text: {
type: 'mrkdwn',
text: `*Chain ID:* ${chainId}\n*Priority:* ${priority}`, // Showing Chain ID and Priority on separate lines
},
},
{
type: 'section',
text: {
type: 'mrkdwn',
text: `All last *${details.length} API calls* failed. Details are listed below:`,
},
},
...details.map((d) => ({
type: 'section',
fields: [
{
type: 'mrkdwn',
text: `*Timestamp:* ${d.createdAt}`,
},
{
type: 'mrkdwn',
text: `*Type + Priority:* ${d.type_priority}`,
},
{
type: 'mrkdwn',
text: `*URL:* <${d.url}|Link>`,
},
{
type: 'mrkdwn',
text: `*Response Code:* ${d.httpResponseCode}`,
},
{
type: 'mrkdwn',
text: `*Error Message:* ${d.errorMessage || 'N/A'}`,
},
],
})),
],
};

try {
const response = await axios.post(slackChannelUrl, message, {
headers: { 'Content-Type': 'application/json' },
});
if (response.status === 200) {
console.log(`Alert sent to Slack for ${chainId} at priority ${priority}`);
} else {
console.error('Failed to send message to Slack:', response.data);
}
} catch (error) {
console.error('Failed to send message to Slack:', error);
}
}
}

}

// Schedule the task to run every minute
cron.schedule('* * * * *', checkApiFailuresAndAlert);
8 changes: 2 additions & 6 deletions src/cron/nms-p1-last-hour-downtime-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ async function lastHourDowntimeP0ChainsCheck() {
failed_requests,
(failed_requests::FLOAT / total_requests) * 100 AS failure_rate_percent,
CASE
WHEN (failed_requests::FLOAT / total_requests) * 100 > 5 THEN 'Yes'
WHEN (failed_requests::FLOAT / total_requests) * 100 > 10 THEN 'Yes'
ELSE 'No'
END AS is_downtime_hour
FROM
Expand Down Expand Up @@ -152,7 +152,7 @@ async function lastHourDowntimeP1ChainsCheck() {
failed_requests,
(failed_requests::FLOAT / total_requests) * 100 AS failure_rate_percent,
CASE
WHEN (failed_requests::FLOAT / total_requests) * 100 > 10 THEN 'Yes'
WHEN (failed_requests::FLOAT / total_requests) * 100 > 20 THEN 'Yes'
ELSE 'No'
END AS is_downtime_hour
FROM
Expand Down Expand Up @@ -254,7 +254,3 @@ cron.schedule('0 * * * *', () => {
console.log('Running scheduled task for NMS Last One Hour Downtime P1 Chains Check...');
lastHourDowntimeP1ChainsCheck();
});

// Initial call
lastHourDowntimeP0ChainsCheck();
lastHourDowntimeP1ChainsCheck();
8 changes: 2 additions & 6 deletions src/cron/numia-last-hour-downtime-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async function lastHourDowntimeP0ChainsCheck() {
failed_requests,
(failed_requests::FLOAT / total_requests) * 100 AS failure_rate_percent,
CASE
WHEN (failed_requests::FLOAT / total_requests) * 100 > 5 THEN 'Yes'
WHEN (failed_requests::FLOAT / total_requests) * 100 > 10 THEN 'Yes'
ELSE 'No'
END AS is_downtime_hour
FROM
Expand Down Expand Up @@ -148,7 +148,7 @@ async function lastHourDowntimeP1ChainsCheck() {
failed_requests,
(failed_requests::FLOAT / total_requests) * 100 AS failure_rate_percent,
CASE
WHEN (failed_requests::FLOAT / total_requests) * 100 > 10 THEN 'Yes'
WHEN (failed_requests::FLOAT / total_requests) * 100 > 20 THEN 'Yes'
ELSE 'No'
END AS is_downtime_hour
FROM
Expand Down Expand Up @@ -249,7 +249,3 @@ cron.schedule('0 * * * *', () => {
console.log('Running scheduled task for Numia Last One Hour Downtime P1 Chains Check...');
lastHourDowntimeP1ChainsCheck();
});

// Initial call
lastHourDowntimeP0ChainsCheck();
lastHourDowntimeP1ChainsCheck();
5 changes: 5 additions & 0 deletions src/env-vars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,9 @@ export namespace EnvVars {
const value = getEnvVar('SLACK_CHANNEL_URL');
return isUnset(value) ? null : value;
}

export function getSlackChannelUrlP0(): string | null {
const value = getEnvVar('SLACK_CHANNEL_URL_P0');
return isUnset(value) ? null : value;
}
}
Loading
Loading