DNS server response time is slow compared to other DNS servers-bluecoat-sgos

DNS server response time is slow compared to other DNS servers-bluecoat-sgos

Vendor: bluecoat

OS: sgos

Description:
indeni will trigger an issue when a DNS server takes longer to respond than other DNS servers.

Remediation Steps:
Identify any network and server issues which may be causing this.

How does this work?
Indeni logs on to the device and executes the command “show health-checks statistics”.

Why is this important?
Slow DNS lookups could impact production traffic negatively by causing delays for client requests.

Without Indeni how would you find this?
An administrator could login and manually run the command.

bluecoat-show-health-checks-statistics

name: bluecoat-show-health-checks-statistics
description: Get DNS servers states and response times
type: monitoring
monitoring_interval: 5 minutes
requires:
    vendor: bluecoat
    os.name: sgos
comments:
    dns-server-state:
        why: |
            Even though DNS servers are configured, that does not guarantee that they work. Many products require a fully functional DNS server being set.
        how: |
            Using the built-in "dig" command, each configured DNS server on the device is sent a query to resolve www.indeni.com
        can-with-snmp: false
        can-with-syslog: false
    dns-response-time:
        why: |
            Slow DNS lookups could impact production traffic negatively by causing delays for client requests.
        how: |
            Indeni logs on to the device and executes the command "show health-checks statistics".
        can-with-snmp: false
        can-with-syslog: false
    dns-average-response-time:
        why: |
            Slow DNS lookups could impact production traffic negatively by causing delays for client requests.
        how: |
            Indeni logs on to the device and executes the command "show health-checks statistics".
        can-with-snmp: false
        can-with-syslog: false
    bluecoat-icap-state:
        why: |
            The ProxySG device is integrating with a variety of different services like ICAP. It is important to monitor the current state of the service.
        how: |
            Indeni logs in over SSH and executes "show health-checks statistics". The output includes the current icap state.
        can-with-snmp: false
        can-with-syslog: true
    bluecoat-process-state:
        why: |
            The ProxySG device is integrating with a variety of different services like ICAP and DTTR. It is important to monitor the current state of the services, otherwise the organization might suffer from security risks and unavailability of external resources.
        how: |
            Indeni logs in over SSH and executes "show health-checks statistics".  The output includes the current state of each service.
        can-with-snmp: false
        can-with-syslog: true
    bc-identity-integration-connection-state:
        why: |
            It is important to make sure that the connectivity between the ProxySG and the authentication servers is up and running.
        how: |
            Indeni logs in over SSH and executes "show health-checks statistics".  The output includes the current state of each service.
        can-with-snmp: false
        can-with-syslog: true
    auth-response-time:
        why: |
            Slow authentication connectivity could impact production traffic negatively by causing delays for authenticated client requests.
        how: |
            Indeni logs on to the device and executes the command "show health-checks statistics".
        can-with-snmp: false
        can-with-syslog: false
    bluecoat-external-rating-service-state:
        why: |
            It is important to make sure that the connectivity between the ProxySG and the Dynamic Real-time Rating Service (DRTR) servers is up and running.
        how: |
            Indeni logs in over SSH and executes "show health-checks statistics".  The output includes the current state of DRTR service.
        can-with-snmp: false
        can-with-syslog: true
    auth-average-response-time:
        why: |
            Slow authentication connectivity could impact production traffic negatively by causing delays for authenticated client requests.
        how: |
            Indeni logs on to the device and executes the command "show health-checks statistics".
        can-with-snmp: false
        can-with-syslog: false
steps:
-   run:
        type: SSH
        command: show health-checks statistics
    parse:
        type: AWK
        file: show-health-checks-statistics.parser.1.awk

cross_vendor_odd_dns_server_response_time

package com.indeni.server.rules.library.core
import com.indeni.ruleengine.expressions.OptionalExpression
import com.indeni.ruleengine.expressions.core.{ConstantExpression, StatusTreeExpression}
import com.indeni.ruleengine.expressions.data.{SelectTagsExpression, SelectTimeSeriesExpression}
import com.indeni.ruleengine.expressions.tools.OddOneOutExpression
import com.indeni.server.common.data.conditions.True
import com.indeni.server.params.ParameterDefinition
import com.indeni.server.params.ParameterDefinition.UIType
import com.indeni.server.rules._
import com.indeni.server.rules.library.{ConditionalRemediationSteps, PerDeviceRule, RuleHelper}
import com.indeni.server.sensor.models.managementprocess.alerts.dto.AlertSeverity


case class OddDnsResponseTimeRule() extends PerDeviceRule with RuleHelper {
  private[library] val percentageDistanceParameterName = "Relative_Distance_Threshold"
  private val percentageDistanceParameter = new ParameterDefinition(percentageDistanceParameterName,
    "",
    "Relative Distance (%)",
    "If the DNS server response time Relative Distance to the baseline in percentage is bigger than the Relative Distance, and the DNS server response time absolute distance to baseline is bigger than the value set in " +
      "Absolute Distance (ms), an issue will be triggered..",
    UIType.DOUBLE,
    0.9)


  private[library] val absoluteDistanceParameterName = "Absolute_Distance_Threshold"
  private val absoluteDistanceParameter = new ParameterDefinition(absoluteDistanceParameterName,
    "",
    "Absolute Distance (ms)",
    "If the DNS server response time Absolute Distance from the baseline and the DNS server response time Relative Distance in percentage is bigger than the value set in " +
      "\"" + percentageDistanceParameter.getFriendlyName + "\"" + ", an issue will be triggered..",
    UIType.DOUBLE,
    100)


  override val metadata: RuleMetadata = RuleMetadata.builder(
    "cross_vendor_odd_dns_server_response_time",
    "DNS server response time is slow compared to other DNS servers",
    "indeni will trigger an issue when a DNS server takes longer to respond than other DNS servers.",
    AlertSeverity.WARN,
    categories = Set(RuleCategory.VendorBestPractices),
    deviceCategory = DeviceCategory.AllDevices).configParameters(absoluteDistanceParameter, percentageDistanceParameter).build()

  override def expressionTree(context: RuleContext): StatusTreeExpression = {
    val absoluteDistance: OptionalExpression[Double] = getParameterDouble(absoluteDistanceParameter)
    val percentageDistance: OptionalExpression[Double] = getParameterDouble(percentageDistanceParameter)
    StatusTreeExpression(
      // Which objects to pull (normally, devices)
      SelectTagsExpression(context.metaDao, Set(DeviceKey), True),
      StatusTreeExpression(
        // The scopes of the time-series we check the test condition against:
        OddOneOutExpression(absoluteDistance,
          percentageDistance,
          alertExceededLowValues = false,
          SelectTimeSeriesExpression[Double](context.tsDao, Set("dns-response-time"), denseOnly = true)),
        // The condition which, if true, we have an issue. Checked against the time-series we've collected
        OddOneOutExpression.isTimeSeriesOdd("dns-response-time")

        // The Alert Item to add for this specific item
      ).withSecondaryInfo(
        scopableStringFormatExpression("${scope(\"TS:dns-response-time>dns-server\")}"),
        scopableStringFormatExpression("This server's response time is above normal"),
        title = "Affected Servers"
      )
        .asCondition()
      // Details of the alert itself
    ).withRootInfo(
      getHeadline(),
      ConstantExpression(
        "One or more DNS servers have a high response time compared to other DNS servers. This may slow down critical system processes."),
      ConditionalRemediationSteps("Identify any network and server issues which may be causing this.")
    )
  }
}

1 Like