[PATCH 2/2] wproc callbacks: Reimplement logging

Max Sikstrom max.sikstrom at op5.com
Tue Aug 27 13:39:40 CEST 2013


From: Max Sikström <msikstrom at op5.com>

Some per-object logging was removed with previous commit, which can be useful
in many cases. This commit reimplements that logging in the related callbacks.

Using logging in the callbacks instead of centrally means that more information
can be outputted in the log, because each callback has much bigger domain
knowledge than what the worker process itself has. Using this structure also
keeps the worker API as clean as possible from domain-specific information.

Signed-off-by: Max Sikström <msikstrom at op5.com>
---
 base/checks.c        |    7 +++++
 base/notifications.c |   18 ++++--------
 base/sehandlers.c    |   73 +++++++++++++++++++++++--------------------------
 base/workers.c       |   64 +++++++++++++++++++++++++++++--------------
 include/workers.h    |   10 +++++++
 xdata/xpddefault.c   |   16 +++++++---
 6 files changed, 111 insertions(+), 77 deletions(-)

diff --git a/base/checks.c b/base/checks.c
index c5c1ff2..e81ec35 100644
--- a/base/checks.c
+++ b/base/checks.c
@@ -68,6 +68,13 @@ int reap_check_results(void) {
 static void check_complete_callback(struct wproc_result *wpres, void *data, int flags) {
 	check_result *cr = (check_result*)data;
 	if(wpres) {
+		if (cr->object_check_type == SERVICE_CHECK) {
+			wproc_handle_err(wpres, WPROC_ERROR_ALL & ~WPROC_ERROR_EXITCODE, "Service check", "Host: %s Service: %s", cr->host_name, cr->service_description);
+			}
+		else if(cr->object_check_type == HOST_CHECK) {
+			wproc_handle_err(wpres, WPROC_ERROR_ALL & ~WPROC_ERROR_EXITCODE, "Host check", "Host: %s", cr->host_name);
+			}
+
 		/* Got a result. If wpres == NULL, only clean up */
 
 		memcpy(&cr->rusage, &wpres->rusage, sizeof(wpres->rusage));
diff --git a/base/notifications.c b/base/notifications.c
index e9b6dee..7f85c83 100644
--- a/base/notifications.c
+++ b/base/notifications.c
@@ -73,21 +73,15 @@ const char *notification_reason_name(unsigned int reason_type)
 static void notification_callback(struct wproc_result *wpres, void *data, int flags) {
 	struct notification_job *oj = (struct notification_job *)data;
 	if(wpres) {
-		double runtime = ((double)wpres->runtime.tv_sec) + ((double)wpres->runtime.tv_usec) / 1000000.0;
-		if (wpres->early_timeout) {
-			if (oj->service_description) {
-				logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Notifying contact '%s' of service '%s' on host '%s' by command '%s' timed out after %.2f seconds\n",
-					  oj->contact_name, oj->service_description,
-					  oj->host_name, wpres->command, runtime);
-			} else {
-				logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Notifying contact '%s' of host '%s' by command '%s' timed out after %.2f seconds\n",
-					  oj->contact_name, oj->host_name,
-					  wpres->command, runtime);
+		if (oj->service_description) {
+			wproc_handle_err(wpres, WPROC_ERROR_ALL, "Service notification", "Host: %s Service: %s Contact: %s\n", oj->host_name, oj->service_description, oj->contact_name );
+			}
+		else {
+			wproc_handle_err(wpres, WPROC_ERROR_ALL, "Host notification", "Host: %s Contact: %s\n", oj->host_name, oj->contact_name );
 			}
 		}
-	}
 	free(oj);
-}
+	}
 
 /* notify contacts about a service problem or recovery */
 int service_notification(service *svc, int type, char *not_author, char *not_data, int options) {
diff --git a/base/sehandlers.c b/base/sehandlers.c
index 8df5f25..558eeb8 100644
--- a/base/sehandlers.c
+++ b/base/sehandlers.c
@@ -36,22 +36,42 @@
 #endif
 
 /******************************************************************/
-/************* OBSESSIVE COMPULSIVE HANDLER FUNCTIONS *************/
+/********************* JOB COMPLETE CALLBACKS *********************/
 /******************************************************************/
 
-static void ocsp_complete_callback(struct wproc_result *wpres, void *data, int flags) {
-	service *svc = (service *)data;
+struct eventhandler_job {
+	service *svc;
+	host *hst;
+	const char *msg;
+};
+
+static void eventhandler_job_complete(struct wproc_result *wpres, void *data, int flags) {
+	struct eventhandler_job *job = (struct eventhandler_job*)data;
 	if(wpres) {
-		if (wpres->early_timeout) {
-			double runtime = ((double)wpres->runtime.tv_sec) + ((double)wpres->runtime.tv_usec) / 1000000.0;
-			logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: OCSP command '%s' for service '%s' on host '%s' timed out after %.2f seconds\n",
-				  wpres->command, svc->description, svc->host_name,
-				  runtime);
+		if( job->svc ) {
+			wproc_handle_err(wpres, WPROC_ERROR_ALL, job->msg, "Host: %s Service: %s\n", job->svc->host_name, job->svc->description );
+			}
+		else if( job->hst ) {
+			wproc_handle_err(wpres, WPROC_ERROR_ALL, job->msg, "Host: %s\n", job->hst->name );
+			}
 		}
+	free(job);
 	}
+
+static int eventhandler_run_job(char *command_line, int timeout, host *hst, service *svc, char *msg, nagios_macros *mac) {
+	struct eventhandler_job *job = calloc(1, sizeof(struct eventhandler_job));
+	job->hst = hst;
+	job->svc = svc;
+	job->msg = msg;
+	/* run the command through a worker */
+	return wproc_run_callback(command_line, timeout, eventhandler_job_complete, (void*)job, mac);
 }
 
 
+/******************************************************************/
+/************* OBSESSIVE COMPULSIVE HANDLER FUNCTIONS *************/
+/******************************************************************/
+
 /* handles service check results in an obsessive compulsive manner... */
 int obsessive_compulsive_service_check_processor(service *svc) {
 	char *raw_command = NULL;
@@ -102,7 +122,7 @@ int obsessive_compulsive_service_check_processor(service *svc) {
 	log_debug_info(DEBUGL_CHECKS, 2, "Processed obsessive compulsive service processor command line: %s\n", processed_command);
 
 	/* run the command through a worker */
-	wproc_run_callback(processed_command, ocsp_timeout, ocsp_complete_callback, (void*)svc, &mac);
+	eventhandler_run_job(processed_command, ocsp_timeout, svc->host_ptr, svc, "OCSP", &mac);
 
 	/* free memory */
 	clear_volatile_macros_r(&mac);
@@ -112,16 +132,6 @@ int obsessive_compulsive_service_check_processor(service *svc) {
 	}
 
 
-static void ochp_complete_callback(struct wproc_result *wpres, void *data, int flags) {
-	host *hst = (host *)data;
-	if(wpres) {
-		if (wpres->early_timeout) {
-			double runtime = ((double)wpres->runtime.tv_sec) + ((double)wpres->runtime.tv_usec) / 1000000.0;
-			logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: OCHP command '%s' for host '%s' timed out after %.2f seconds\n",
-				  wpres->command, hst->name, runtime);
-		}
-	}
-}
 
 /* handles host check results in an obsessive compulsive manner... */
 int obsessive_compulsive_host_check_processor(host *hst) {
@@ -167,7 +177,7 @@ int obsessive_compulsive_host_check_processor(host *hst) {
 	log_debug_info(DEBUGL_CHECKS, 2, "Processed obsessive compulsive host processor command line: %s\n", processed_command);
 
 	/* run the command through a worker */
-	wproc_run_callback(processed_command, ochp_timeout, ochp_complete_callback, hst, &mac);
+	eventhandler_run_job(processed_command, ochp_timeout, hst, NULL, "OCHP", &mac);
 
 	/* free memory */
 	clear_volatile_macros_r(&mac);
@@ -183,17 +193,6 @@ int obsessive_compulsive_host_check_processor(host *hst) {
 /**************** SERVICE EVENT HANDLER FUNCTIONS *****************/
 /******************************************************************/
 
-static void evthandler_complete_with_msg(struct wproc_result *wpres, void *data, int flags) {
-	char *msg = (char *)data;
-	if(wpres) {
-		if (wpres->early_timeout) {
-			double runtime = ((double)wpres->runtime.tv_sec) + ((double)wpres->runtime.tv_usec) / 1000000.0;
-			logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE, msg,
-					wpres->command, runtime);
-		}
-	}
-}
-
 
 /* handles changes in the state of a service */
 int handle_service_event(service *svc) {
@@ -313,8 +312,7 @@ int run_global_service_event_handler(nagios_macros *mac, service *svc) {
 #endif
 
 	/* run the command through a worker */
-	result = wproc_run_callback(processed_command, event_handler_timeout, evthandler_complete_with_msg,
-			"Warning: Global service event handler command '%s' timed out after %.2f seconds\n" , mac);
+	result = eventhandler_run_job(processed_command, event_handler_timeout, svc->host_ptr, svc, "global service event handler", mac);
 
 #ifdef USE_EVENT_BROKER
 	/* get end time */
@@ -406,8 +404,7 @@ int run_service_event_handler(nagios_macros *mac, service *svc) {
 #endif
 
 	/* run the command through a worker */
-	result = wproc_run_callback(processed_command, event_handler_timeout, evthandler_complete_with_msg,
-			"Warning: Service event handler command '%s' timed out after %.2f seconds\n" , mac);
+	result = eventhandler_run_job(processed_command, event_handler_timeout, svc->host_ptr, svc, "service event handler", mac);
 
 #ifdef USE_EVENT_BROKER
 	/* get end time */
@@ -543,8 +540,7 @@ int run_global_host_event_handler(nagios_macros *mac, host *hst) {
 #endif
 
 	/* run the command through a worker */
-	result = wproc_run_callback(processed_command, event_handler_timeout, evthandler_complete_with_msg,
-			"Warning: Global host event handler command '%s' timed out after %.2f seconds\n" , mac);
+	result = eventhandler_run_job(processed_command, event_handler_timeout, hst, NULL, "global host event handler", mac);
 
 #ifdef USE_EVENT_BROKER
 	/* get end time */
@@ -634,8 +630,7 @@ int run_host_event_handler(nagios_macros *mac, host *hst) {
 #endif
 
 	/* run the command through a worker */
-	result = wproc_run_callback(processed_command, event_handler_timeout, evthandler_complete_with_msg,
-			"Warning: Host event handler command '%s' timed out after %.2f seconds\n" , mac);
+	result = eventhandler_run_job(processed_command, event_handler_timeout, hst, NULL, "host event handler", mac);
 
 #ifdef USE_EVENT_BROKER
 	/* get end time */
diff --git a/base/workers.c b/base/workers.c
index a6db4f5..2c773b3 100644
--- a/base/workers.c
+++ b/base/workers.c
@@ -492,7 +492,7 @@ static void fo_reassign_wproc_job(void *job_)
 
 static int handle_worker_result(int sd, int events, void *arg)
 {
-	char *buf, *error_reason = NULL;
+	char *buf;
 	unsigned long size;
 	int ret;
 	static struct kvvec kvv = KVVEC_INITIALIZER;
@@ -563,26 +563,6 @@ static int handle_worker_result(int sd, int events, void *arg)
 		if (wpres.error_code == ETIME) {
 			wpres.early_timeout = TRUE;
 		}
-		if (wpres.early_timeout) {
-			asprintf(&error_reason, "timed out after %.2fs", tv_delta_f(&wpres.start, &wpres.stop));
-		}
-		else if (WIFSIGNALED(wpres.wait_status)) {
-			asprintf(&error_reason, "died by signal %d%s after %.2f seconds",
-			         WTERMSIG(wpres.wait_status),
-			         WCOREDUMP(wpres.wait_status) ? " (core dumped)" : "",
-			         tv_delta_f(&wpres.start, &wpres.stop));
-		}
-
-		if (error_reason) {
-			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: job %d from worker %s %s",
-			      job->id, wp->name, error_reason);
-			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   command: %s\n", job->command);
-			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   early_timeout=%d; exited_ok=%d; wait_status=%d; error_code=%d;\n",
-			      wpres.early_timeout, wpres.exited_ok, wpres.wait_status, wpres.error_code);
-			wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   stderr", wpres.outerr);
-			wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   stdout", wpres.outstd);
-		}
-		my_free(error_reason);
 
 		/* Run the callback with the wpres struct as argument */
 		run_job_callback(job, &wpres, 0);
@@ -781,6 +761,48 @@ int init_workers(int desired_workers)
 	return 0;
 }
 
+int wproc_handle_err(struct wproc_result *wpres, int errflags,  const char *job_type, const char *fmt, ...) {
+	char *error_reason = NULL;
+	char *buffer = NULL;
+	va_list ap;
+
+	if (wpres->early_timeout && (errflags & WPROC_ERROR_EARLYTIMEOUT)) {
+		asprintf(&error_reason, "timed out after %.2fs", tv_delta_f(&wpres->start, &wpres->stop));
+		}
+	else if (WIFSIGNALED(wpres->wait_status) && (errflags & WPROC_ERROR_SIGNAL)) {
+		asprintf(&error_reason, "died by signal %d%s after %.2f seconds",
+			WTERMSIG(wpres->wait_status),
+			WCOREDUMP(wpres->wait_status) ? " (core dumped)" : "",
+			tv_delta_f(&wpres->start, &wpres->stop));
+		}
+    else if (WEXITSTATUS(wpres->wait_status) != 0 && (errflags & WPROC_ERROR_EXITCODE)) {
+		asprintf(&error_reason, "Exited with return code %d",
+				WEXITSTATUS(wpres->wait_status));
+		}
+
+	if (error_reason) {
+		logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: %s job %d: %s", job_type, wpres->job_id, error_reason);
+		logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   command: %s\n", wpres->command);
+		logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   early_timeout=%d; exited_ok=%d; wait_status=%d; error_code=%d;\n",
+		wpres->early_timeout, wpres->exited_ok, wpres->wait_status, wpres->error_code);
+
+		if(fmt) {
+			va_start(ap, fmt);
+			if(vasprintf(&buffer, fmt, ap) > 0) {
+				logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   %s\n", buffer);
+				free(buffer);
+				}
+			va_end(ap);
+			}
+
+		wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   stderr", wpres->outerr);
+		wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   stdout", wpres->outstd);
+		my_free(error_reason);
+		return 1;
+		}
+	return 0;
+	}
+
 /*
  * Handles adding the command and macros to the kvvec,
  * as well as shipping the command off to a designated
diff --git a/include/workers.h b/include/workers.h
index cf28287..172e7b4 100644
--- a/include/workers.h
+++ b/include/workers.h
@@ -5,6 +5,12 @@
 
 #define WPROC_FORCE  (1 << 0)
 
+#define WPROC_ERROR_EXITCODE (1 << 0)
+#define WPROC_ERROR_EARLYTIMEOUT (1 << 1)
+#define WPROC_ERROR_SIGNAL (1 << 3)
+
+#define WPROC_ERROR_ALL ((1 << 4)-1)
+
 NAGIOS_BEGIN_DECL;
 
 /* FIXME: Needs to be somewhere better... */
@@ -36,6 +42,10 @@ extern int wproc_can_spawn(struct load_control *lc);
 extern void free_worker_memory(int flags);
 extern int workers_alive(void);
 extern int init_workers(int desired_workers);
+
+extern int wproc_handle_err(struct wproc_result *wpres, int errflags, const char *job_type, const char *fmt, ...)
+	__attribute__((__format__(__printf__, 4, 5)));
+
 extern int wproc_run_callback(char *command_line, int timeout, void (*cb)(struct wproc_result *, void *, int), void *data, nagios_macros *mac);
 
 NAGIOS_END_DECL;
diff --git a/xdata/xpddefault.c b/xdata/xpddefault.c
index 94bddbf..b8913ef 100644
--- a/xdata/xpddefault.c
+++ b/xdata/xpddefault.c
@@ -42,11 +42,17 @@ static int     host_perfdata_fd = -1;
 static int     service_perfdata_fd = -1;
 
 /******************************************************************/
-/********************** CHECK COMPLETE CALLBACK *******************/
+/********* PERFORMANCE DATA PROCESSING COMPLETE CALLBACK **********/
 /******************************************************************/
 
-static void perfdata_command_callback(struct wproc_result *wpres, void *data, int flags) {
-	/* Dummy method as callback. we don't care about the result for now */
+static void perfdata_service_callback(struct wproc_result *wpres, void *data, int flags) {
+	service *svc = (service*)data;
+	wproc_handle_err(wpres, WPROC_ERROR_ALL, "Service perfdata processing", "Host: %s Service: %s\n", svc->host_name, svc->description );
+}
+
+static void perfdata_host_callback(struct wproc_result *wpres, void *data, int flags) {
+	host *hst = (host*)data;
+	wproc_handle_err(wpres, WPROC_ERROR_ALL, "Host perfdata processing", "Host: %s\n", hst->name );
 }
 
 /******************************************************************/
@@ -345,7 +351,7 @@ int xpddefault_run_service_performance_data_command(nagios_macros *mac, service
 	log_debug_info(DEBUGL_PERFDATA, 2, "Processed service performance data command line: %s\n", processed_command_line);
 
 	/* run the command */
-	wproc_run_callback(processed_command_line, perfdata_timeout, perfdata_command_callback, NULL, NULL);
+	wproc_run_callback(processed_command_line, perfdata_timeout, perfdata_service_callback, (void*)svc, NULL);
 
 	/* free memory */
 	my_free(processed_command_line);
@@ -386,7 +392,7 @@ int xpddefault_run_host_performance_data_command(nagios_macros *mac, host *hst)
 	log_debug_info(DEBUGL_PERFDATA, 2, "Processed host performance data command line: %s\n", processed_command_line);
 
 	/* run the command */
-	wproc_run_callback(processed_command_line, perfdata_timeout, perfdata_command_callback, NULL, NULL);
+	wproc_run_callback(processed_command_line, perfdata_timeout, perfdata_host_callback, (void*)hst, NULL);
 
 	/* free memory */
 	my_free(processed_command_line);
-- 
1.7.1


------------------------------------------------------------------------------
Introducing Performance Central, a new site from SourceForge and 
AppDynamics. Performance Central is your source for news, insights, 
analysis and resources for efficient Application Performance Management. 
Visit us today!
http://pubads.g.doubleclick.net/gampad/clk?id=48897511&iu=/4140/ostg.clktrk
_______________________________________________
Nagios-devel mailing list
Nagios-devel at lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nagios-devel


More information about the Developers mailing list