High load average-checkpoint-secureplatform
Vendor: checkpoint
OS: secureplatform
Description:
indeni will trigger an issue when the load average on a given device seems high.
Remediation Steps:
Review the current activity on the device to determine if there is a specific cause for this.
chkp-secureplatform-load_averages
name: chkp-secureplatform-load_averages
description: displays the load average of the system
type: monitoring
monitoring_interval: 10 minute
requires:
vendor: checkpoint
os.name: secureplatform
comments:
load-average-one-minute:
why: |
To check the normalized load average time for the 1st minute
how: |
By checking the first value dispalyed in the linux commmand "uptime"
can-with-snmp: false
can-with-syslog: false
load-average-five-minutes:
why: |
To check the load average time for the 5th minute
how: |
By checking the second value dispalyed in the linux commmand "uptime"
can-with-snmp: false
can-with-syslog: false
load-average-fifteen-minutes:
why: |
To check the normalized load average time for the 5th minute
how: |
By checking the second value dispalyed in the linux commmand "uptime"
can-with-snmp: false
can-with-syslog: false
load-average-fifteen-minutes-live-config:
why: |
To check the normalized load average time for the 15th minute
how: |
By checking the third value dispalyed in the linux commmand "uptime"
can-with-snmp: false
can-with-syslog: false
load-average-one-minute-live-config:
why: |
To check the load average time for the 1st minute
how: |
By checking the first value dispalyed in the linux commmand "uptime"
can-with-snmp: false
can-with-syslog: false
load-average-five-minutes-live-config:
why: |
To check the load average time for the 5th minute
how: |
By checking the second value dispalyed in the linux commmand "uptime"
can-with-snmp: false
can-with-syslog: false
steps:
- run:
type: SSH
command: ${nice-path} -n 15 uptime ; ${nice-path} -n 15 egrep -e 'processor'
/proc/cpuinfo | wc -l
parse:
type: AWK
file: load-average.parser.1.awk
linux_high_load_average
package com.indeni.server.rules.library.core
import com.indeni.apidata.time.TimeSpan
import com.indeni.ruleengine.expressions.OptionalExpression
import com.indeni.ruleengine.expressions.conditions.GreaterThanOrEqual
import com.indeni.ruleengine.expressions.core._
import com.indeni.ruleengine.expressions.data.{SelectTagsExpression, SelectTimeSeriesExpression, TimeSeriesExpression}
import com.indeni.server.common.data.conditions.True
import com.indeni.server.params.ParameterDefinition
import com.indeni.server.params.ParameterDefinition.UIType
import com.indeni.server.rules._
import com.indeni.server.rules.library.{ConditionalRemediationSteps, PerDeviceRule, RuleHelper}
import com.indeni.server.sensor.models.managementprocess.alerts.dto.AlertSeverity
import com.indeni.server.rules.library.core.HighLoadAverageRule._
case class HighLoadAverageRule() extends PerDeviceRule with RuleHelper {
private[library] val highThresholdParameterName = "High_Threshold_of_Load_Average"
private val highThresholdParameter = new ParameterDefinition(highThresholdParameterName,
"",
"High Threshold of Five Minute Load Average",
"What is the threshold for the five-minute load average for which once it is crossed an issue will be triggered.",
UIType.DOUBLE,
1.5)
override val metadata: RuleMetadata = RuleMetadata.builder(NAME, "High load average",
"indeni will trigger an issue when the load average on a given device seems high.",
AlertSeverity.ERROR,
categories= Set(RuleCategory.HealthChecks), deviceCategory = DeviceCategory.LinuxbasedDevices).interval(TimeSpan.fromMinutes(10)).configParameter(highThresholdParameter).build()
override def expressionTree(context: RuleContext): StatusTreeExpression = {
val actualValue = TimeSeriesExpression[Double]("load-average-five-minutes").last
val threshold: OptionalExpression[Double] = getParameterDouble(highThresholdParameter)
StatusTreeExpression(
// Which objects to pull (normally, devices)
SelectTagsExpression(context.metaDao, Set(DeviceKey), True),
StatusTreeExpression(
// The time-series we check the test condition against:
SelectTimeSeriesExpression[Double](context.tsDao, Set("load-average-five-minutes"), denseOnly = false),
// The condition which, if true, we have an issue. Checked against the time-series we've collected
GreaterThanOrEqual(
actualValue,
threshold)
// The Alert Item to add for this specific item
).withRootInfo(
getHeadline(),
scopableStringFormatExpression("The five-minute load average is %.0f, above the threshold of %.0f.", actualValue, threshold),
ConditionalRemediationSteps("Review the current activity on the device to determine if there is a specific cause for this.")
).asCondition()
).withoutInfo()
}
}
object HighLoadAverageRule {
/* --- Constants --- */
private[library] val NAME = "linux_high_load_average"
}