Management service down (Virtual)-checkpoint-gaia,secureplatform

error
health-checks
checkpoint
gaiasecureplatform
Management service down (Virtual)-checkpoint-gaia,secureplatform
0

#1

Management service down (Virtual)-checkpoint-gaia,secureplatform

Vendor: checkpoint

OS: gaia,secureplatform

Description:
Alert if a management component is down on a device.

Remediation Steps:
This may be due to someone stopping the management component itself, a licensing or a performance issue.
The management service is handled by the “fwm” process (for each domain/CMA). Run “mdsstat” for more details. Review the licenses installed on the device, as well as whether or not anyone has run cpstop recently.

How does this work?
By using the Check Point built-in “cpstat mg” command, the status of the management server is retrieved.

Why is this important?
Unless the management services are running correctly, it might not be possible to manage other gateways.

Without Indeni how would you find this?
An administrator could login and manually run the command.

cpstat-mg

#! META
name: cpstat-mg
description: Shows status of management server
type: monitoring
monitoring_interval: 5 minutes
requires:
    vendor: checkpoint
    or:
        -
            os.name: gaia
        -
            os.name: secureplatform
    role-management: true
    mds:
        neq: true

#! COMMENTS
mgmt-status:
    why: |
        Unless the management services are running correctly, it might not be possible to manage other gateways.
    how: |
        By using the Check Point built-in "cpstat mg" command, the status of the management server is retrieved.
    without-indeni: |
        An administrator could login and manually run the command.
    can-with-snmp: false
    can-with-syslog: false
    vendor-provided-management: |
        Retreiving the management status is only available from the command line.

mgmt-status-description:
    skip-documentation: true

#! REMOTE::SSH
${nice-path} -n 15 cpstat mg

#! PARSER::AWK

############
# ToDo: Add the status message to a descriptive tag in the future
###########

BEGIN {
	# A lot of data is separated with ":" so using as delimiter instead of whitespace
	FS=":"
}

#Is started:    1
/Is started/ {
	isStarted = trim($2)
}


# Active status: active
# Active status: standby
/Active status/ {
	activeStatusMessage = trim($2)
	if (trim($2) == "active" || trim($2) == "standby") {
		activeStatus = 1
	}
}


# Status:        OK
/Status/ {
	statusMessage = trim($2)
	if (trim($2) == "OK") {
		status = 1
	}
}


END {
	if ( isStarted == 1 && activeStatus == 1 && status == 1 ) {
		mgmtStatus = 1
	} else {
		mgmtStatus = 0
		# Write data collected to troubleshoot
		totalMessages = "is-started: " isStarted " - active-status-message: " activeStatusMessage " - status-message: " statusMessage
		writeComplexMetricString("mgmt-status-description", null, totalMessages)
	}
	# Write metric
	writeDoubleMetricWithLiveConfig("mgmt-status", null, "gauge", "300", mgmtStatus, "Management Services Status", "state", "")
}

cross_vendor_mgmt_component_down_vsx

package com.indeni.server.rules.library.templatebased.crossvendor

import com.indeni.ruleengine.expressions.conditions.Contains
import com.indeni.ruleengine.utility.LastNNonEmptyValues
import com.indeni.server.rules.RuleContext
import com.indeni.server.rules.library.{ConditionalRemediationSteps, StateDownTemplateRule}

/**
  *
  */
case class cross_vendor_mgmt_component_down_vsx(context: RuleContext) extends StateDownTemplateRule(context,
  ruleName = "cross_vendor_mgmt_component_down_vsx",
  ruleFriendlyName = "Management Devices: Management service down (Virtual)",
  ruleDescription = "Alert if a management component is down on a device.",
  metricName = "mgmt-status",
  historyLength = 3,
  generateStateDownCondition = (historyLength, tsToTestAgainst, stateToLookFor) =>
    Contains(LastNNonEmptyValues(tsToTestAgainst, historyLength), stateToLookFor),
  applicableMetricTag = "vs.name",
  alertItemsHeader = "Management Systems Affected",
  alertDescription = "One or more management components on this device are down.",
  baseRemediationText = "This may be due to someone stopping the management component itself, a licensing or a performance issue.")(
  ConditionalRemediationSteps.VENDOR_CP -> "The management service is handled by the \"fwm\" process (for each domain/CMA). Run \"mdsstat\" for more details. Review the licenses installed on the device, as well as whether or not anyone has run cpstop recently."
)