host checks not happening after first notification

Terry td3201 at gmail.com
Sun Apr 10 02:34:54 CEST 2011


On Sat, Apr 9, 2011 at 6:06 PM, Terry <td3201 at gmail.com> wrote:
> Hello,
>
> I am seeing a weird condition where host checks stop after the first
> notification.  Here's the config:
>
> execute_host_checks=1
>
> define host{
>        name                            generic-host
>        check_command                   check-host-alive
>        check_period                    24x7
>        notification_interval           30
>        notification_options            d,r
>        notifications_enabled           1
>        event_handler_enabled           1
>        flap_detection_enabled          1
>        failure_prediction_enabled      1
>        process_perf_data               1
>        retain_status_information       1
>        retain_nonstatus_information    1
>        register                        0
>        }
> define host{
>        name                            generic-host-10
>        use                             generic-host
>        notification_period             24x7
>        check_interval                  5
>        retry_interval                  1
>        max_check_attempts              3
>        register                        0
> }
> define host{
>        name                            foo-10
>        use                             generic-host-10
>        contact_groups                  +foo_primary
>        register                        0
>        }
> define host{
>        use                     foo-10
>        host_name               testpage
>        hostgroups              windows,vmguest_windows
>        notification_interval   5
>        parents                 firewall
>        address                 10.235.235.235
>        }
>
> define hostescalation{
>        hostgroup_name          z-allhosts
>        contacts                support at foo.com,support-email-critical
>        first_notification      1
>        last_notification       1
>        notification_interval   0
>        escalation_options      d
>        }
> define hostescalation{
>        hostgroup_name          z-allhosts
>        contact_groups          +foo_secondary
>        first_notification      3
>        last_notification       4
>        notification_interval   30
>        escalation_options      d,r
>        }
> define hostescalation{
>        hostgroup_name          z-allhosts
>        contact_groups          +foo_tertiary,foo_secondary
>        first_notification      5
>        last_notification       0
>        notification_interval   30
>        escalation_options      d,r
>        }
>
>
> Here's a log of the activity.  You see the first notification, then nothing
>
> [1302388222] HOST ALERT: testpage;DOWN;SOFT;1;CRITICAL - Plugin timed
> out after 10 seconds
> [1302388296] HOST ALERT: testpage;DOWN;SOFT;2;PING CRITICAL - Packet loss = 100%
> [1302388346] SERVICE ALERT: testpage;cpu -
> nrpe;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after 50 seconds.
> [1302388376] HOST ALERT: testpage;DOWN;HARD;3;PING CRITICAL - Packet loss = 100%
> [1302388376] HOST NOTIFICATION:
> joe-epager;testpage;DOWN;host-notify-by-epager;PING CRITICAL - Packet
> loss = 100%
> [1302388376] HOST NOTIFICATION:
> joe at DOM.COM;testpage;DOWN;host-notify-by-email;PING CRITICAL - Packet
> loss = 100%
> [1302388377] HOST NOTIFICATION:
> support-email-critical;testpage;DOWN;host-notify-by-email;PING
> CRITICAL - Packet loss = 100%
> [1302388446] SERVICE ALERT: testpage;disk drives -
> nrpe;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after 50 seconds.
> [1302388547] SERVICE ALERT: testpage;memory - page -
> nrpe;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after 50 seconds.
> [1302388657] SERVICE ALERT: testpage;memory - physical -
> nrpe;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after 50 seconds.
> [1302388757] SERVICE ALERT:
> testpage;nrpeclient;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after
> 50 seconds.
> [1302389057] SERVICE ALERT:
> testpage;nrpeclient;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after
> 50 seconds.
> [1302389357] SERVICE ALERT:
> testpage;nrpeclient;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after
> 50 seconds.
> [1302389658] SERVICE ALERT:
> testpage;nrpeclient;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after
> 50 seconds.
>
>
> I appreciate the help.
>

More info:

[04-09-2011 19:29:17] SERVICE ALERT:
testpage;nrpeclient;CRITICAL;HARD;1;CHECK_NRPE: Socket timeout after
50 seconds.

I get this event every 5 minutes.  It's just a service on this box.  I
thought if the host was down, service checks were suppressed. Is that
not the case?

------------------------------------------------------------------------------
Xperia(TM) PLAY
It's a major breakthrough. An authentic gaming
smartphone on the nation's most reliable network.
And it wants your games.
http://p.sf.net/sfu/verizon-sfdev
_______________________________________________
Nagios-users mailing list
Nagios-users at lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nagios-users
::: Please include Nagios version, plugin version (-v) and OS when reporting any issue. 
::: Messages without supporting info will risk being sent to /dev/null





More information about the Users mailing list