Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add problem timestamps and duration #447

Merged
merged 3 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion lib/nsutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,19 @@ const char *mkstr(const char *fmt, ...)
return ret;
}


/* format duration seconds into human readable string */
const char* duration_string(unsigned long duration) {
int days, hours, minutes, seconds;

days = duration / 86400;
duration -= (days * 86400);
hours = duration / 3600;
duration -= (hours * 3600);
minutes = duration / 60;
duration -= (minutes * 60);
seconds = duration;
return (char *)mkstr("%dd %dh %dm %ds", days, hours, minutes, seconds);
}

/* close and reopen stdin, stdout and stderr to /dev/null */
void close_standard_fds(void)
Expand Down
8 changes: 8 additions & 0 deletions lib/nsutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ extern int online_cpus(void);
extern const char *mkstr(const char *fmt, ...)
__attribute__((__format__(__printf__, 1, 2)));

/**
* format duration seconds into human readable string.
* @note The returned string must *not* be free()'d!
* @param[in] duration The duration in seconds
* @return A pointer to the formatted string on success. Undefined on errors
*/
extern const char *duration_string(unsigned long);

/**
* Calculate the millisecond delta between two timeval structs
* @param[in] start The start time
Expand Down
10 changes: 7 additions & 3 deletions src/naemon/checks_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -1008,14 +1008,18 @@ static int handle_host_state(host *hst, int *alert_recorded)
/* update the problem id when transitioning to a problem state */
if (hst->last_state == STATE_UP) {
/* don't reset last problem id, or it will be zero the next time a problem is encountered */
hst->current_problem_id = next_problem_id;
next_problem_id++;
nm_free(hst->current_problem_id);
hst->current_problem_id = (char*)g_uuid_string_random();
hst->problem_start = current_time;
hst->problem_end = 0L;
}

/* clear the problem id when transitioning from a problem state to an UP state */
if (hst->current_state == STATE_UP) {
hst->last_problem_id = hst->current_problem_id;
hst->current_problem_id = 0L;
hst->current_problem_id = NULL;
if(hst->problem_start > 0)
hst->problem_end = current_time;
}

/* write the host state change to the main log file */
Expand Down
10 changes: 7 additions & 3 deletions src/naemon/checks_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -686,14 +686,18 @@ int handle_async_service_check_result(service *temp_service, check_result *queue
/* update the problem id when transitioning to a problem state */
if (temp_service->last_state == STATE_OK) {
/* don't reset last problem id, or it will be zero the next time a problem is encountered */
temp_service->current_problem_id = next_problem_id;
next_problem_id++;
nm_free(temp_service->current_problem_id);
temp_service->current_problem_id = (char*)g_uuid_string_random();
temp_service->problem_start = current_time;
temp_service->problem_end = 0L;
}

/* clear the problem id when transitioning from a problem state to an OK state */
if (temp_service->current_state == STATE_OK) {
temp_service->last_problem_id = temp_service->current_problem_id;
temp_service->current_problem_id = 0L;
temp_service->current_problem_id = NULL;
if(temp_service->problem_start > 0)
temp_service->problem_end = current_time;
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/naemon/globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,7 @@ extern int currently_running_service_checks;
extern int currently_running_host_checks;

extern unsigned long next_event_id;
extern unsigned long next_problem_id;
extern unsigned long next_comment_id;
extern unsigned long next_notification_id;

extern unsigned long modified_process_attributes;
extern unsigned long modified_host_process_attributes;
Expand Down
104 changes: 67 additions & 37 deletions src/naemon/macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,10 +627,6 @@ static int grab_standard_host_macro_r(nagios_macros *mac, int macro_type, host *
objectlist *temp_objectlist = NULL;
time_t current_time = 0L;
unsigned long duration = 0L;
int days = 0;
int hours = 0;
int minutes = 0;
int seconds = 0;
char *buf1 = NULL;
char *buf2 = NULL;
int total_host_services = 0;
Expand Down Expand Up @@ -708,19 +704,10 @@ static int grab_standard_host_macro_r(nagios_macros *mac, int macro_type, host *
case MACRO_HOSTDURATION:
time(&current_time);
duration = (unsigned long)(current_time - temp_host->last_state_change);

if (macro_type == MACRO_HOSTDURATIONSEC)
*output = (char *)mkstr("%lu", duration);
else {

days = duration / 86400;
duration -= (days * 86400);
hours = duration / 3600;
duration -= (hours * 3600);
minutes = duration / 60;
duration -= (minutes * 60);
seconds = duration;
*output = (char *)mkstr("%dd %dh %dm %ds", days, hours, minutes, seconds);
*output = (char *)mkstr("%s", duration_string(duration));
}
break;
case MACRO_HOSTEXECUTIONTIME:
Expand Down Expand Up @@ -748,7 +735,7 @@ static int grab_standard_host_macro_r(nagios_macros *mac, int macro_type, host *
*output = (char *)mkstr("%d", temp_host->current_notification_number);
break;
case MACRO_HOSTNOTIFICATIONID:
*output = (char *)mkstr("%lu", temp_host->current_notification_id);
*output = temp_host->current_notification_id;
break;
case MACRO_HOSTEVENTID:
*output = (char *)mkstr("%lu", temp_host->current_event_id);
Expand All @@ -757,10 +744,32 @@ static int grab_standard_host_macro_r(nagios_macros *mac, int macro_type, host *
*output = (char *)mkstr("%lu", temp_host->last_event_id);
break;
case MACRO_HOSTPROBLEMID:
*output = (char *)mkstr("%lu", temp_host->current_problem_id);
if(temp_host->current_problem_id != NULL)
*output = temp_host->current_problem_id;
break;
case MACRO_LASTHOSTPROBLEMID:
*output = (char *)mkstr("%lu", temp_host->last_problem_id);
if(temp_host->last_problem_id != NULL)
*output = temp_host->last_problem_id;
break;
case MACRO_HOSTPROBLEMSTART:
*output = (char *)mkstr("%lu", (unsigned long)temp_host->problem_start);
break;
case MACRO_HOSTPROBLEMEND:
*output = (char *)mkstr("%lu", (unsigned long)temp_host->problem_end);
break;
case MACRO_HOSTPROBLEMDURATIONSEC:
case MACRO_HOSTPROBLEMDURATION:
if(temp_host->problem_end > 0) {
duration = (unsigned long)(temp_host->problem_end - temp_host->problem_start);
} else if(temp_host->problem_start > 0) {
time(&current_time);
duration = (unsigned long)(current_time - temp_host->problem_start);
}
if (macro_type == MACRO_HOSTPROBLEMDURATIONSEC)
*output = (char *)mkstr("%lu", duration);
else {
*output = (char *)mkstr("%s", duration_string(duration));
}
break;
case MACRO_HOSTACTIONURL:
if (temp_host->action_url)
Expand Down Expand Up @@ -950,10 +959,6 @@ static int grab_standard_service_macro_r(nagios_macros *mac, int macro_type, ser
objectlist *temp_objectlist = NULL;
time_t current_time = 0L;
unsigned long duration = 0L;
int days = 0;
int hours = 0;
int minutes = 0;
int seconds = 0;
char *buf1 = NULL;
char *buf2 = NULL;

Expand Down Expand Up @@ -1044,31 +1049,19 @@ static int grab_standard_service_macro_r(nagios_macros *mac, int macro_type, ser
break;
case MACRO_SERVICEDURATIONSEC:
case MACRO_SERVICEDURATION:

time(&current_time);
duration = (unsigned long)(current_time - temp_service->last_state_change);

/* get the state duration in seconds */
if (macro_type == MACRO_SERVICEDURATIONSEC)
*output = (char *)mkstr("%lu", duration);

/* get the state duration */
else {
days = duration / 86400;
duration -= (days * 86400);
hours = duration / 3600;
duration -= (hours * 3600);
minutes = duration / 60;
duration -= (minutes * 60);
seconds = duration;
*output = (char *)mkstr("%dd %dh %dm %ds", days, hours, minutes, seconds);
*output = (char *)mkstr("%s", duration_string(duration));
}
break;
case MACRO_SERVICENOTIFICATIONNUMBER:
*output = (char *)mkstr("%d", temp_service->current_notification_number);
break;
case MACRO_SERVICENOTIFICATIONID:
*output = (char *)mkstr("%lu", temp_service->current_notification_id);
*output = temp_service->current_notification_id;
break;
case MACRO_SERVICEEVENTID:
*output = (char *)mkstr("%lu", temp_service->current_event_id);
Expand All @@ -1077,10 +1070,30 @@ static int grab_standard_service_macro_r(nagios_macros *mac, int macro_type, ser
*output = (char *)mkstr("%lu", temp_service->last_event_id);
break;
case MACRO_SERVICEPROBLEMID:
*output = (char *)mkstr("%lu", temp_service->current_problem_id);
*output = temp_service->current_problem_id;
break;
case MACRO_LASTSERVICEPROBLEMID:
*output = (char *)mkstr("%lu", temp_service->last_problem_id);
*output = temp_service->last_problem_id;
break;
case MACRO_SERVICEPROBLEMSTART:
*output = (char *)mkstr("%lu", (unsigned long)temp_service->problem_start);
break;
case MACRO_SERVICEPROBLEMEND:
*output = (char *)mkstr("%lu", (unsigned long)temp_service->problem_end);
break;
case MACRO_SERVICEPROBLEMDURATIONSEC:
case MACRO_SERVICEPROBLEMDURATION:
if(temp_service->problem_end > 0) {
duration = (unsigned long)(temp_service->problem_end - temp_service->problem_start);
} else if(temp_service->problem_start > 0) {
time(&current_time);
duration = (unsigned long)(current_time - temp_service->problem_start);
}
if (macro_type == MACRO_SERVICEPROBLEMDURATIONSEC)
*output = (char *)mkstr("%lu", duration);
else {
*output = (char *)mkstr("%s", duration_string(duration));
}
break;
case MACRO_SERVICEACTIONURL:
if (temp_service->action_url)
Expand Down Expand Up @@ -1569,6 +1582,11 @@ static int grab_macrox_value_r(nagios_macros *mac, int macro_type, char *arg1, c
case MACRO_LASTHOSTPROBLEMID:
case MACRO_LASTHOSTSTATE:
case MACRO_LASTHOSTSTATEID:
case MACRO_HOSTPROBLEMSTART:
case MACRO_HOSTPROBLEMEND:
case MACRO_HOSTPROBLEMDURATIONSEC:
case MACRO_HOSTPROBLEMDURATION:


/* a standard host macro */
if (arg2 == NULL) {
Expand Down Expand Up @@ -1688,6 +1706,10 @@ static int grab_macrox_value_r(nagios_macros *mac, int macro_type, char *arg1, c
case MACRO_LASTSERVICEPROBLEMID:
case MACRO_LASTSERVICESTATE:
case MACRO_LASTSERVICESTATEID:
case MACRO_SERVICEPROBLEMSTART:
case MACRO_SERVICEPROBLEMEND:
case MACRO_SERVICEPROBLEMDURATIONSEC:
case MACRO_SERVICEPROBLEMDURATION:

/* use saved service pointer */
if (arg1 == NULL && arg2 == NULL) {
Expand Down Expand Up @@ -2691,6 +2713,14 @@ int init_macrox_names(void)
add_macrox_name(HOSTVALUE);
add_macrox_name(SERVICEVALUE);
add_macrox_name(PROBLEMVALUE);
add_macrox_name(HOSTPROBLEMSTART);
add_macrox_name(HOSTPROBLEMEND);
add_macrox_name(HOSTPROBLEMDURATIONSEC);
add_macrox_name(HOSTPROBLEMDURATION);
add_macrox_name(SERVICEPROBLEMSTART);
add_macrox_name(SERVICEPROBLEMEND);
add_macrox_name(SERVICEPROBLEMDURATIONSEC);
add_macrox_name(SERVICEPROBLEMDURATION);

return OK;
}
Expand Down
12 changes: 10 additions & 2 deletions src/naemon/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
/****************** MACRO DEFINITIONS *****************/
#define MACRO_ENV_VAR_PREFIX "NAGIOS_"
#define MAX_USER_MACROS 256 /* max $USERx$ macros */
#define MACRO_X_COUNT 156 /* size of macro_x[] array */
#define MACRO_X_COUNT 164 /* size of macro_x[] array */

NAGIOS_BEGIN_DECL

Expand Down Expand Up @@ -201,7 +201,15 @@ typedef struct nagios_macros nagios_macros;
#define MACRO_HOSTVALUE 153
#define MACRO_SERVICEVALUE 154
#define MACRO_PROBLEMVALUE 155

#define MACRO_HOSTPROBLEMSTART 156
#define MACRO_HOSTPROBLEMEND 157
#define MACRO_HOSTPROBLEMDURATIONSEC 158
#define MACRO_HOSTPROBLEMDURATION 159
#define MACRO_SERVICEPROBLEMSTART 160
#define MACRO_SERVICEPROBLEMEND 161
#define MACRO_SERVICEPROBLEMDURATIONSEC 162
#define MACRO_SERVICEPROBLEMDURATION 163
/* NOTE: update MACRO_X_COUNT above to highest macro + 1 */

/************* MACRO CLEANING OPTIONS *****************/
#define STRIP_ILLEGAL_MACRO_CHARS 1
Expand Down
2 changes: 1 addition & 1 deletion src/naemon/nebmodules.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ NAGIOS_BEGIN_DECL

/***** MODULE VERSION INFORMATION *****/
#define NEB_API_VERSION(x) int __neb_api_version = x;
#define CURRENT_NEB_API_VERSION 6
#define CURRENT_NEB_API_VERSION 7


/***** MODULE INFORMATION *****/
Expand Down
12 changes: 6 additions & 6 deletions src/naemon/notifications.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,8 @@ int service_notification(service *svc, int type, char *not_author, char *not_dat
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "Current notification number: %d (%s)\n", svc->current_notification_number, (increment_notification_number == TRUE) ? "incremented" : "changed");

/* save and increase the current notification id */
svc->current_notification_id = next_notification_id;
next_notification_id++;
nm_free(svc->current_notification_id);
svc->current_notification_id = g_uuid_string_random();

log_debug_info(DEBUGL_NOTIFICATIONS, 2, "Creating list of contacts to be notified.\n");

Expand Down Expand Up @@ -478,7 +478,7 @@ int service_notification(service *svc, int type, char *not_author, char *not_dat
mac.x[MACRO_NOTIFICATIONNUMBER] = nm_strdup(mac.x[MACRO_SERVICENOTIFICATIONNUMBER]);

/* set the notification id macro */
nm_asprintf(&mac.x[MACRO_SERVICENOTIFICATIONID], "%lu", svc->current_notification_id);
nm_asprintf(&mac.x[MACRO_SERVICENOTIFICATIONID], "%s", svc->current_notification_id);

/* notify each contact (duplicates have been removed) */
for (temp_notification = notification_list; temp_notification != NULL; temp_notification = temp_notification->next) {
Expand Down Expand Up @@ -1283,8 +1283,8 @@ int host_notification(host *hst, int type, char *not_author, char *not_data, int
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "Current notification number: %d (%s)\n", hst->current_notification_number, (increment_notification_number == TRUE) ? "incremented" : "unchanged");

/* save and increase the current notification id */
hst->current_notification_id = next_notification_id;
next_notification_id++;
nm_free(hst->current_notification_id);
hst->current_notification_id = g_uuid_string_random();

log_debug_info(DEBUGL_NOTIFICATIONS, 2, "Creating list of contacts to be notified.\n");

Expand Down Expand Up @@ -1360,7 +1360,7 @@ int host_notification(host *hst, int type, char *not_author, char *not_data, int
mac.x[MACRO_NOTIFICATIONNUMBER] = nm_strdup(mac.x[MACRO_HOSTNOTIFICATIONNUMBER]);

/* set the notification id macro */
nm_asprintf(&mac.x[MACRO_HOSTNOTIFICATIONID], "%lu", hst->current_notification_id);
nm_asprintf(&mac.x[MACRO_HOSTNOTIFICATIONID], "%s", hst->current_notification_id);

/* notify each contact (duplicates have been removed) */
for (temp_notification = notification_list; temp_notification != NULL; temp_notification = temp_notification->next) {
Expand Down
3 changes: 3 additions & 0 deletions src/naemon/objects_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ void destroy_host(host *this_host)
nm_free(this_host->icon_image_alt);
nm_free(this_host->vrml_image);
nm_free(this_host->statusmap_image);
nm_free(this_host->current_notification_id);
nm_free(this_host->last_problem_id);
nm_free(this_host->current_problem_id);
nm_free(this_host);
}

Expand Down
8 changes: 5 additions & 3 deletions src/naemon/objects_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,10 @@ struct host {
int current_attempt;
unsigned long current_event_id;
unsigned long last_event_id;
unsigned long current_problem_id;
unsigned long last_problem_id;
char *current_problem_id;
char *last_problem_id;
time_t problem_start;
time_t problem_end;
double latency;
double execution_time;
int is_executing;
Expand All @@ -110,7 +112,7 @@ struct host {
int notified_on;
int current_notification_number;
int no_more_notifications;
unsigned long current_notification_id;
char *current_notification_id;
int check_flapping_recovery_notification;
int scheduled_downtime_depth;
int pending_flex_downtime; /* UNUSED */
Expand Down
3 changes: 3 additions & 0 deletions src/naemon/objects_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,9 @@ void destroy_service(service *this_service, int truncate_lists)
nm_free(this_service->action_url);
nm_free(this_service->icon_image);
nm_free(this_service->icon_image_alt);
nm_free(this_service->current_notification_id);
nm_free(this_service->last_problem_id);
nm_free(this_service->current_problem_id);
nm_free(this_service);
}

Expand Down
Loading
Loading