High disk space utilization-linux-False

error
health-checks
false
linux
High disk space utilization-linux-False
0

#1

High disk space utilization-linux-False

Vendor: linux

OS: False

Description:
Many systems store vital configuration and operational data on their " +
"storage devices at different mount points and on different " +
"disk devices. It is important to monitor the usage of the " +
"different storage devices to ensure they do not fill up and " +
"cause issues in the on-going operation of the system.

Remediation Steps:
Determine the cause for the high disk usage of the listed file systems.

How does this work?
Using the built-in “df” command, the size and usage of all partitions are retrieved.

Why is this important?
It is very important to monitor the disk space usage of a system. If the disk space is full it will prevent writing more data to the disk. Compressing and moving data from a disk that is 100% full is time consuming, which is why it is important to take care of any such issue early.

Without Indeni how would you find this?
An administrator could login and manually list the disk space usage. Vendors generally provide tools which provide access to this information.

unix-df_k

#! META
name: unix-df_k
description: run "df -k"
type: monitoring
monitoring_interval: 10 minutes
requires:
    or:
        -
            linux-based: true
        -
            linux-busybox: true
        -
            freebsd-based: true
    asg:
        neq: true

#! COMMENTS
disk-usage-percentage:
    why: |
        It is very important to monitor the disk space usage of a system. If the disk space is full it will prevent writing more data to the disk. Compressing and moving data from a disk that is 100% full is time consuming, which is why it is important to take care of any such issue early.
    how: |
        Using the built-in "df" command, the size and usage of all partitions are retrieved.
    without-indeni: |
        An administrator could login and manually list the disk space usage. Vendors generally provide tools which provide access to this information.
    can-with-snmp: true
    can-with-syslog: false
    vendor-provided-management: |
        This is accessible from the command line interface or vendor-provided tools, as well as SNMP.

disk-used-kbytes:
    why: |
        Used to display how much, in kilobytes, of the partition being used. If the file system gets data that should be written to disk can be lost.
    how: |
        Using the built in command "df"
    without-indeni: |
        An administrator could login and manually list the disk space usage. Vendors generally provide tools which provide access to this information.
    can-with-snmp: true
    can-with-syslog: false
    vendor-provided-management: |
        This is accessible from the command line interface or vendor-provided tools, as well as SNMP.

disk-total-kbytes:
    why: |
        Used to display the total partition size, in kilobytes.
    how: |
        Using the built in command "df"
    without-indeni: |
        An administrator could login and manually list the disk space usage. Vendors generally provide tools which provide access to this information.
    can-with-snmp: true
    can-with-syslog: false
    vendor-provided-management: |
        This is accessible from the command line interface or vendor-provided tools, as well as SNMP.

#! REMOTE::SSH
${nice-path} -n 15 df -k

#! PARSER::AWK

############
# Script explanation: 
# The mount point info could be on a single line, or two, depending on the filesystem's name:
# But we don't care about the filesystem, so we can ignore that.
############


# 10157368   6384800   3248280  67% /
# /dev/sda1  295561     24017    256284   9% /boot
/(\d+)%/ {
    mount = $NF
    usage = $(NF-1)
    sub(/%/, "", usage)
    available = $(NF-2)
    used = $(NF-3)
	total = $(NF-4)

    mounttags["file-system"] = mount

    writeDoubleMetricWithLiveConfig("disk-usage-percentage", mounttags, "gauge", "60", usage, "Mount Points - Usage", "percentage", "file-system")
	writeDoubleMetricWithLiveConfig("disk-used-kbytes", mounttags, "gauge", "60", used, "Mount Points - Used", "kbytes", "file-system")
	writeDoubleMetricWithLiveConfig("disk-total-kbytes", mounttags, "gauge", "60", total, "Mount Points - Total", "kbytes", "file-system")
}

high_disk_space_usage

package com.indeni.server.rules.library

import com.indeni.ruleengine.expressions.conditions.{And, ConditionHelper, GreaterThanOrEqual}
import com.indeni.ruleengine.expressions.core.{StatusTreeExpression, _}
import com.indeni.ruleengine.expressions.data._
import com.indeni.ruleengine.expressions.math.AverageExpression
import com.indeni.ruleengine.expressions.scope.ScopeValueExpression
import com.indeni.server.common.ParameterValue
import com.indeni.server.common.data.conditions.True
import com.indeni.server.params.ParameterDefinition
import com.indeni.server.rules._
import com.indeni.server.rules.library.core.PerDeviceRule
import com.indeni.server.sensor.models.managementprocess.alerts.dto.AlertSeverity


case class HighDiskSpaceUsageRule() extends PerDeviceRule with RuleHelper {

  private val excludeDisks = Set("/dev", "/mnt/cdrom", "/proc", "/dev/shm", "/dev/shm", "/junos", "/junos/dev")

  val highThresholdParameter: ParameterDefinition = new ParameterDefinition(
    "High_Threshold_of_Space_Usage",
    "",
    "High Threshold of Space Usage",
    "What is the threshold for the mount point's disk usage for which once it is crossed " + "an issue will be triggered.",
    ParameterDefinition.UIType.DOUBLE,
    new ParameterValue((80.0).asInstanceOf[Object])
  )

  override def metadata: RuleMetadata =
    RuleMetadata.builder("high_disk_space_usage",
      "All Devices: High disk space utilization",
      "Many systems store vital configuration and operational data on their " +
        "storage devices at different mount points and on different " +
        "disk devices. It is important to monitor the usage of the " +
        "different storage devices to ensure they do not fill up and " +
        "cause issues in the on-going operation of the system.",
      AlertSeverity.ERROR).configParameter(highThresholdParameter).build()

  override def expressionTree(context: RuleContext): StatusTreeExpression = {

    val diskUsage = AverageExpression(TimeSeriesExpression[Double]("disk-usage-percentage"))
    val fileSizeValue = TimeSeriesExpression[Double]("file-size").last

    val diskNotExcluded = ScopeValueExpression("file-system").visible().isIn(excludeDisks).not

    val diskUsageGreaterThanThreshold =
      GreaterThanOrEqual(diskUsage, getParameterDouble(highThresholdParameter))

    val filesSubInformer = MultiIssueInformer(
      headline = scopableStringFormatExpression("${scope(\"path\")}"),
      description = scopableStringFormatExpression("File size: %.0f bytes", fileSizeValue),
      title = "Large Files")
      .iterateOver(
        collection = SelectTimeSeriesExpression[Double](context.tsDao, Set("file-size"), denseOnly = false),
        condition = GreaterThanOrEqual(fileSizeValue, ConstantExpression[Option[Double]](Some(10000)))
      )

    val filesInformer = MultiIssueInformer(
      collection = SelectTagsExpression(context.tsDao, Set("path"), True),
      subInformer = filesSubInformer)

    StatusTreeExpression(
      SelectTagsExpression(context.metaDao, Set(DeviceKey), True),
      StatusTreeExpression(
        SelectTagsExpression(context.tsDao, Set("file-system"), True),
        And(
          diskNotExcluded,
          StatusTreeExpression(
            SelectTimeSeriesExpression[Double](context.tsDao, Set("disk-usage-percentage"), denseOnly = false),
            diskUsageGreaterThanThreshold
          ).withSecondaryInfo(
            scopableStringFormatExpression("${scope(\"file-system\")}"),
            scopableStringFormatExpression("Current disk space utilization is: %.0f%%", diskUsage),
            title = "Affected Disks / File Systems"
          ).asCondition()
        ),
        multiInformers = Set(filesInformer)
      ).withoutInfo().asCondition()
    ).withRootInfo(
      getHeadline(),
      ConstantExpression("Some disks or file systems are under high usage."),
      ConditionalRemediationSteps("Determine the cause for the high disk usage of the listed file systems.",
        ConditionalRemediationSteps.VENDOR_PANOS -> "Review https://live.paloaltonetworks.com/t5/Management-Articles/How-and-When-to-Clear-Disk-Space-on-the-Palo-Alto-Networks/ta-p/55736 and https://live.paloaltonetworks.com/t5/tkb/articleprintpage/tkb-id/FeaturedArticles/article-id/89",
        ConditionalRemediationSteps.VENDOR_JUNIPER ->
          """|1. On the devices command line interface execute "show system storage detail"  command to review storage utilization.
             |2. Run "request system storage disconnect" command to free storage space on the SRX by rotating log files and proposing a list of files for deletion.
             |3. Remove the debug files after debug is done.
             |4. Consider configuring logs to be sent to remote log servers.
             |5. Review the following article on Juniper tech support site: <a target="_blank" href="https://www.juniper.net/documentation/en_US/junos/topics/reference/command-summary/request-system-storage-disconnect.html">Operational Commands: request system storage disconnect</a>.""".stripMargin
      )
    )
  }
}