Device uptime too high-f5-False

error
false
ongoing-maintenance
f5
Device uptime too high-f5-False
0

#1

Device uptime too high-f5-False

Vendor: f5

OS: False

Description:
Indeni will alert when a device’s uptime is too high

Remediation Steps:
Upgrade the device. You may also change the alert’s threshold, or disable the alert completely, if not needed.

How does this work?
This script uses the F5 iControl REST API to retrieve the current uptime of vCMP guests.

Why is this important?
Keeping track of vCMP guest uptimes can help trigger alerts if they are suddenly restarted due to a system failure.

Without Indeni how would you find this?
This metric is available by logging into the device with SSH, entering TMSH and executing “show vcmp guest”.

f5-rest-mgmt-tm-vcmp-guest-stats

#! META
name: f5-rest-mgmt-tm-vcmp-guest-stats
description: Extract status of deployed vCMP guests
type: monitoring
monitoring_interval: 10 minutes
requires:
    vendor: "f5"
    product: "load-balancer"
    rest-api: "true"
    vsx: "true"

#! COMMENTS
vs-state:
    why: |
        Keeping track of vCMP guest states will enable alerting for when a VM has failed.
    how: |
        This script uses the F5 iControl REST API to retrieve the current status of vCMP guests.
    without-indeni: |
        This metric is available by logging into the device with SSH, entering TMSH and executing "show vcmp guest".
    can-with-snmp: false
    can-with-syslog: false
uptime-milliseconds:
    why: |
        Keeping track of vCMP guest uptimes can help trigger alerts if they are suddenly restarted due to a system failure.
    how: |
        This script uses the F5 iControl REST API to retrieve the current uptime of vCMP guests.
    without-indeni: |
        This metric is available by logging into the device with SSH, entering TMSH and executing "show vcmp guest".
    can-with-snmp: false
    can-with-syslog: false

#! REMOTE::HTTP
url: /mgmt/tm/sys/clock
protocol: HTTPS

#! PARSER::JSON

_dynamic_vars:
    _temp:
        "fullDate":
            _value: "$.entries.*.nestedStats.entries.fullDate.description"
    _transform:
        _dynamic:
            "currentTime": |
                {

                    strCurrentTime = temp("fullDate")

                    #2016-12-01T07:55:34Z
                    gsub(/[^\d-:]/, " ", strCurrentTime)

                    #2016-12-01 07:55:34
                    split(strCurrentTime, dateTimeArr, /\s/)

                    strDate = dateTimeArr[1]
                    strTime = dateTimeArr[2]

                    #2016-12-01
                    split(strDate, dateArr, /-/)

                    year = dateArr[1]
                    month = dateArr[2]
                    day = dateArr[3]

                    #07:55:34
                    split(strTime, timeArr, /:/)

                    hour = timeArr[1]
                    minute = timeArr[2]
                    second = timeArr[3]

                    secondsSinceEpoch = datetime(year, month, day, hour, minute, second)

                    print secondsSinceEpoch
                }

#! REMOTE::HTTP
url: /mgmt/tm/vcmp/guest/stats
protocol: HTTPS

#! PARSER::JSON

_metrics:
    - #vs-state OK
        _groups:
            "$.entries.*.nestedStats.entries.[?(@.requestedState.description == 'deployed' && @.requestComplete.description == 'true' && @.vmStatus.description == 'running')]":
                _tags:
                    "im.name":
                        _constant: "vs-state"
                    "name":
                        _value: "tmName.description"
                _value.double:
                    _constant: "1"
    - #vs-state not OK
        _groups:
            "$.entries.*.nestedStats.entries.[?(@.vmStatus.description == 'failed')]":
                _tags:
                    "im.name":
                        _constant: "vs-state"
                    "name":
                        _value: "tmName.description"
                _value.double:
                    _constant: "0"
    - #Guest uptime
        _groups:
            "$.entries.*.nestedStats.entries.[?(@.requestedState.description == 'deployed' && @.requestComplete.description == 'true')]":
                _tags:
                    "im.name":
                        _constant: "uptime-milliseconds"
                    "vs.name":
                        _value: "tmName.description"
                _temp:
                    "uptime":
                        _value: "uptime.description"
        _transform:
            _value.double: |
                {
                    strUptime = temp("uptime")

                    #2016-12-01T07:55:34Z
                    gsub(/[^\d-:]/, " ", strUptime)

                    #2016-12-01 07:55:34
                    split(strUptime, dateTimeArr, /\s/)

                    strDate = dateTimeArr[1]
                    strTime = dateTimeArr[2]

                    #2016-12-01
                    split(strDate, dateArr, /-/)

                    year = dateArr[1]
                    month = dateArr[2]
                    day = dateArr[3]

                    #07:55:34
                    split(strTime, timeArr, /:/)

                    hour = timeArr[1]
                    minute = timeArr[2]
                    second = timeArr[3]

                    secondsSinceEpoch = datetime(year, month, day, hour, minute, second)

                    uptime = (dynamic("currentTime") - secondsSinceEpoch) * 1000

                    print uptime
                }

cross_vendor_uptime_high

package com.indeni.server.rules.library.templatebased.crossvendor

import com.indeni.apidata.time.TimeSpan
import com.indeni.apidata.time.TimeSpan.TimePeriod
import com.indeni.server.rules.RuleContext
import com.indeni.server.rules.library.{ConditionalRemediationSteps, ThresholdDirection, TimeIntervalThresholdOnDoubleMetricTemplateRule}
import com.indeni.server.sensor.models.managementprocess.alerts.dto.AlertSeverity

/**
  *
  */
case class cross_vendor_uptime_high(context: RuleContext) extends TimeIntervalThresholdOnDoubleMetricTemplateRule(context,
  ruleName = "cross_vendor_uptime_high",
  ruleFriendlyName = "All Devices: Device uptime too high",
  ruleDescription = "Indeni will alert when a device's uptime is too high",
  severity = AlertSeverity.ERROR,
  metricName = "uptime-milliseconds",
  metricUnits = TimePeriod.MILLISECOND,
  threshold = TimeSpan.fromDays(365 * 10),
  thresholdDirection = ThresholdDirection.ABOVE,
  alertDescriptionFormat = "The current uptime is %.0f seconds. This alert identifies when a device has been up for a very long time and may need an upgrade.",
  alertDescriptionValueUnits = TimePeriod.SECOND,
  baseRemediationText = "Upgrade the device. You may also change the alert's threshold, or disable the alert completely, if not needed.")(
  ConditionalRemediationSteps.OS_NXOS ->
    """|
       |1. Use the "show version" NX-OS command to display the current system uptime.
       |2. Run the "show system reset-reason" to check the reason for the last reboot of the device.
       |3. Check if the installed NX-OS version is supported and review it for software bugs.""".stripMargin
)