Device restarted (uptime low)-f5-False

health-checks
false
critical
f5
Device restarted (uptime low)-f5-False
0

#1

Device restarted (uptime low)-f5-False

Vendor: f5

OS: False

Description:
Indeni will alert when a device has restarted.

Remediation Steps:
Determine why the device was restarted.

How does this work?
This script uses the F5 iControl REST API to retrieve the current uptime of vCMP guests.

Why is this important?
Keeping track of vCMP guest uptimes can help trigger alerts if they are suddenly restarted due to a system failure.

Without Indeni how would you find this?
This metric is available by logging into the device with SSH, entering TMSH and executing “show vcmp guest”.

f5-rest-mgmt-tm-vcmp-guest-stats

#! META
name: f5-rest-mgmt-tm-vcmp-guest-stats
description: Extract status of deployed vCMP guests
type: monitoring
monitoring_interval: 10 minutes
requires:
    vendor: "f5"
    product: "load-balancer"
    rest-api: "true"
    vsx: "true"

#! COMMENTS
vs-state:
    why: |
        Keeping track of vCMP guest states will enable alerting for when a VM has failed.
    how: |
        This script uses the F5 iControl REST API to retrieve the current status of vCMP guests.
    without-indeni: |
        This metric is available by logging into the device with SSH, entering TMSH and executing "show vcmp guest".
    can-with-snmp: false
    can-with-syslog: false
uptime-milliseconds:
    why: |
        Keeping track of vCMP guest uptimes can help trigger alerts if they are suddenly restarted due to a system failure.
    how: |
        This script uses the F5 iControl REST API to retrieve the current uptime of vCMP guests.
    without-indeni: |
        This metric is available by logging into the device with SSH, entering TMSH and executing "show vcmp guest".
    can-with-snmp: false
    can-with-syslog: false

#! REMOTE::HTTP
url: /mgmt/tm/sys/clock
protocol: HTTPS

#! PARSER::JSON

_dynamic_vars:
    _temp:
        "fullDate":
            _value: "$.entries.*.nestedStats.entries.fullDate.description"
    _transform:
        _dynamic:
            "currentTime": |
                {

                    strCurrentTime = temp("fullDate")

                    #2016-12-01T07:55:34Z
                    gsub(/[^\d-:]/, " ", strCurrentTime)

                    #2016-12-01 07:55:34
                    split(strCurrentTime, dateTimeArr, /\s/)

                    strDate = dateTimeArr[1]
                    strTime = dateTimeArr[2]

                    #2016-12-01
                    split(strDate, dateArr, /-/)

                    year = dateArr[1]
                    month = dateArr[2]
                    day = dateArr[3]

                    #07:55:34
                    split(strTime, timeArr, /:/)

                    hour = timeArr[1]
                    minute = timeArr[2]
                    second = timeArr[3]

                    secondsSinceEpoch = datetime(year, month, day, hour, minute, second)

                    print secondsSinceEpoch
                }

#! REMOTE::HTTP
url: /mgmt/tm/vcmp/guest/stats
protocol: HTTPS

#! PARSER::JSON

_metrics:
    - #vs-state OK
        _groups:
            "$.entries.*.nestedStats.entries.[?(@.requestedState.description == 'deployed' && @.requestComplete.description == 'true' && @.vmStatus.description == 'running')]":
                _tags:
                    "im.name":
                        _constant: "vs-state"
                    "name":
                        _value: "tmName.description"
                _value.double:
                    _constant: "1"
    - #vs-state not OK
        _groups:
            "$.entries.*.nestedStats.entries.[?(@.vmStatus.description == 'failed')]":
                _tags:
                    "im.name":
                        _constant: "vs-state"
                    "name":
                        _value: "tmName.description"
                _value.double:
                    _constant: "0"
    - #Guest uptime
        _groups:
            "$.entries.*.nestedStats.entries.[?(@.requestedState.description == 'deployed' && @.requestComplete.description == 'true')]":
                _tags:
                    "im.name":
                        _constant: "uptime-milliseconds"
                    "vs.name":
                        _value: "tmName.description"
                _temp:
                    "uptime":
                        _value: "uptime.description"
        _transform:
            _value.double: |
                {
                    strUptime = temp("uptime")

                    #2016-12-01T07:55:34Z
                    gsub(/[^\d-:]/, " ", strUptime)

                    #2016-12-01 07:55:34
                    split(strUptime, dateTimeArr, /\s/)

                    strDate = dateTimeArr[1]
                    strTime = dateTimeArr[2]

                    #2016-12-01
                    split(strDate, dateArr, /-/)

                    year = dateArr[1]
                    month = dateArr[2]
                    day = dateArr[3]

                    #07:55:34
                    split(strTime, timeArr, /:/)

                    hour = timeArr[1]
                    minute = timeArr[2]
                    second = timeArr[3]

                    secondsSinceEpoch = datetime(year, month, day, hour, minute, second)

                    uptime = (dynamic("currentTime") - secondsSinceEpoch) * 1000

                    print uptime
                }

cross_vendor_uptime_low

package com.indeni.server.rules.library.templatebased.crossvendor

import com.indeni.apidata.time.TimeSpan
import com.indeni.apidata.time.TimeSpan.TimePeriod
import com.indeni.server.common.data.conditions.Equals
import com.indeni.server.rules.RuleContext
import com.indeni.server.rules.library._
import com.indeni.server.sensor.models.managementprocess.alerts.dto.AlertSeverity

case class cross_vendor_uptime_low() extends TimeIntervalThresholdOnDoubleMetricTemplateRule(
  ruleName = "cross_vendor_uptime_low",
  ruleFriendlyName = "All Devices (Non-VSX): Device restarted (uptime low)",
  ruleDescription = "Indeni will alert when a device has restarted.",
  severity = AlertSeverity.CRITICAL,
  metricName = "uptime-milliseconds",
  threshold = TimeSpan.fromMinutes(60),
  metricUnits = TimePeriod.MILLISECOND,
  thresholdDirection = ThresholdDirection.BELOW,
  alertDescriptionFormat = "The current uptime is %.0f seconds which seems to indicate the device has restarted.",
  alertDescriptionValueUnits = TimePeriod.SECOND,
  baseRemediationText = "Determine why the device was restarted.",
  metaCondition = !Equals("vsx", "true")
)(
  ConditionalRemediationSteps.OS_NXOS ->
    """|
       |1. Use the "show version" or "show system reset-reason" NX-OS commands to display the reason for the reload.
       |2. Use the "show cores" command to determine if a core file was recorded during the unexpected reboot.
       |3. Run the "show process log" command to display the processes and if a core was created.
       |4. With the show logging command, review the events that happened close to the time of reboot.""".stripMargin,
  ConditionalRemediationSteps.VENDOR_FORTINET ->
    """
      |1. Watch the system reboot time.
      |2. Review the log messages and focus on error messages that were generated at least 5 minutes prior to system reboot, especially before unexpected system reboot.
      |3. Verify the status of the scheduled restart command to making  sure it's an irregular restart
      |   - config sys global
      |   - get | grep restart
      |   - end
      |4. Login via ssh to the Fortinet firewall and review the crash log in a readable format by using the FortiOS command “diag debug crashlog read”.
      |5. Contact Fortinet Technical support at https://support.fortinet.com/ for further assistance.""".stripMargin
)