High disk space utilization

error
health-checks
High disk space utilization
0

#1

High disk space utilization

Vendor:

OS:

Description:
Many systems store vital configuration and operational data on their " +
"storage devices at different mount points and on different " +
"disk devices. It is important to monitor the usage of the " +
"different storage devices to ensure they do not fill up and " +
"cause issues in the on-going operation of the system.

Remediation Steps:
Determine the cause for the high disk usage of the listed file systems.

high_disk_space_usage

package com.indeni.server.rules.library

import com.indeni.ruleengine.expressions.conditions.{And, ConditionHelper, GreaterThanOrEqual}
import com.indeni.ruleengine.expressions.core.{StatusTreeExpression, _}
import com.indeni.ruleengine.expressions.data._
import com.indeni.ruleengine.expressions.math.AverageExpression
import com.indeni.ruleengine.expressions.scope.ScopeValueExpression
import com.indeni.server.common.ParameterValue
import com.indeni.server.common.data.conditions.True
import com.indeni.server.params.ParameterDefinition
import com.indeni.server.rules._
import com.indeni.server.rules.library.core.PerDeviceRule
import com.indeni.server.sensor.models.managementprocess.alerts.dto.AlertSeverity


case class HighDiskSpaceUsageRule() extends PerDeviceRule with RuleHelper {

  private val excludeDisks = Set("/dev", "/mnt/cdrom", "/proc", "/dev/shm", "/dev/shm", "/junos", "/junos/dev")

  val highThresholdParameter: ParameterDefinition = new ParameterDefinition(
    "High_Threshold_of_Space_Usage",
    "",
    "High Threshold of Space Usage",
    "What is the threshold for the mount point's disk usage for which once it is crossed " + "an issue will be triggered.",
    ParameterDefinition.UIType.DOUBLE,
    new ParameterValue((80.0).asInstanceOf[Object])
  )

  override def metadata: RuleMetadata =
    RuleMetadata.builder("high_disk_space_usage",
      "All Devices: High disk space utilization",
      "Many systems store vital configuration and operational data on their " +
        "storage devices at different mount points and on different " +
        "disk devices. It is important to monitor the usage of the " +
        "different storage devices to ensure they do not fill up and " +
        "cause issues in the on-going operation of the system.",
      AlertSeverity.ERROR).configParameter(highThresholdParameter).build()

  override def expressionTree(context: RuleContext): StatusTreeExpression = {

    val diskUsage = AverageExpression(TimeSeriesExpression[Double]("disk-usage-percentage"))
    val fileSizeValue = TimeSeriesExpression[Double]("file-size").last

    val diskNotExcluded = ScopeValueExpression("file-system").visible().isIn(excludeDisks).not

    val diskUsageGreaterThanThreshold =
      GreaterThanOrEqual(diskUsage, getParameterDouble(highThresholdParameter))

    val filesSubInformer = MultiIssueInformer(
      headline = scopableStringFormatExpression("${scope(\"path\")}"),
      description = scopableStringFormatExpression("File size: %.0f bytes", fileSizeValue),
      title = "Large Files")
      .iterateOver(
        collection = SelectTimeSeriesExpression[Double](context.tsDao, Set("file-size"), denseOnly = false),
        condition = GreaterThanOrEqual(fileSizeValue, ConstantExpression[Option[Double]](Some(10000)))
      )

    val filesInformer = MultiIssueInformer(
      collection = SelectTagsExpression(context.tsDao, Set("path"), True),
      subInformer = filesSubInformer)

    StatusTreeExpression(
      SelectTagsExpression(context.metaDao, Set(DeviceKey), True),
      StatusTreeExpression(
        SelectTagsExpression(context.tsDao, Set("file-system"), True),
        And(
          diskNotExcluded,
          StatusTreeExpression(
            SelectTimeSeriesExpression[Double](context.tsDao, Set("disk-usage-percentage"), denseOnly = false),
            diskUsageGreaterThanThreshold
          ).withSecondaryInfo(
            scopableStringFormatExpression("${scope(\"file-system\")}"),
            scopableStringFormatExpression("Current disk space utilization is: %.0f%%", diskUsage),
            title = "Affected Disks / File Systems"
          ).asCondition()
        ),
        multiInformers = Set(filesInformer)
      ).withoutInfo().asCondition()
    ).withRootInfo(
      getHeadline(),
      ConstantExpression("Some disks or file systems are under high usage."),
      ConditionalRemediationSteps("Determine the cause for the high disk usage of the listed file systems.",
        ConditionalRemediationSteps.VENDOR_PANOS -> "Review https://live.paloaltonetworks.com/t5/Management-Articles/How-and-When-to-Clear-Disk-Space-on-the-Palo-Alto-Networks/ta-p/55736 and https://live.paloaltonetworks.com/t5/tkb/articleprintpage/tkb-id/FeaturedArticles/article-id/89",
        ConditionalRemediationSteps.VENDOR_JUNIPER ->
          """|1. On the devices command line interface execute "show system storage detail"  command to review storage utilization.
             |2. Run "request system storage disconnect" command to free storage space on the SRX by rotating log files and proposing a list of files for deletion.
             |3. Remove the debug files after debug is done.
             |4. Consider configuring logs to be sent to remote log servers.
             |5. Review the following article on Juniper tech support site: <a target="_blank" href="https://www.juniper.net/documentation/en_US/junos/topics/reference/command-summary/request-system-storage-disconnect.html">Operational Commands: request system storage disconnect</a>.""".stripMargin
      )
    )
  }
}


#2