Skip to content

Commit

Permalink
set mdsd limits (#1027)
Browse files Browse the repository at this point in the history
* set mdsd limit using container memory limit

---------

Co-authored-by: Amol Agrawal <[email protected]>
  • Loading branch information
pfrcks and Amol Agrawal committed Aug 8, 2023
1 parent 9a96a82 commit f476b3a
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 13 deletions.
29 changes: 16 additions & 13 deletions build/common/installer/scripts/tomlparser-agent-config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
# Checking to see if container is not prometheus sidecar.
# CONTAINER_TYPE is populated only for prometheus sidecar container.
@containerType = ENV["CONTAINER_TYPE"]
@containerMemoryLimitInBytes = ENV["CONTAINER_MEMORY_LIMIT_IN_BYTES"]

@promFbitChunkSize = 0
@promFbitBufferSize = 0
Expand Down Expand Up @@ -268,11 +269,12 @@ def populateSettingValuesFromConfigMap(parsedConfig)
puts "Using config map value: require_ack_response = #{@requireAckResponse}"
end
end
# ama-logs daemonset only settings
if !@controllerType.nil? && !@controllerType.empty? && @controllerType.strip.casecmp(@daemonset) == 0 && @containerType.nil?
# mdsd settings
mdsd_config = parsedConfig[:agent_settings][:mdsd_config]
if !mdsd_config.nil?

# mdsd settings
mdsd_config = parsedConfig[:agent_settings][:mdsd_config]
if !mdsd_config.nil?
# ama-logs daemonset only settings
if !@controllerType.nil? && !@controllerType.empty? && @controllerType.strip.casecmp(@daemonset) == 0 && @containerType.nil?
mdsdMonitoringMaxEventRate = mdsd_config[:monitoring_max_event_rate]
if is_valid_number?(mdsdMonitoringMaxEventRate)
@mdsdMonitoringMaxEventRate = mdsdMonitoringMaxEventRate.to_i
Expand All @@ -288,13 +290,6 @@ def populateSettingValuesFromConfigMap(parsedConfig)
@mdsdUploadFrequencyInSeconds = mdsdUploadFrequencyInSeconds.to_i
puts "Using config map value: upload_frequency_seconds = #{@mdsdUploadFrequencyInSeconds}"
end
mdsdBackPressureThresholdInMB = mdsd_config[:backpressure_memory_threshold_in_mb]
if is_valid_number?(mdsdBackPressureThresholdInMB) && mdsdBackPressureThresholdInMB.to_i > 100
@mdsdBackPressureThresholdInMB = mdsdBackPressureThresholdInMB.to_i
puts "Using config map value: backpressure_memory_threshold_in_mb = #{@mdsdBackPressureThresholdInMB}"
else
puts "Ignoring mdsd backpressure limit. Check input values for correctness."
end
mdsdCompressionLevel = mdsd_config[:compression_level]
if is_number?(mdsdCompressionLevel) && mdsdCompressionLevel.to_i >= 0 && mdsdCompressionLevel.to_i < 10 # supported levels from 0 to 9
@mdsdCompressionLevel = mdsdCompressionLevel.to_i
Expand All @@ -303,6 +298,14 @@ def populateSettingValuesFromConfigMap(parsedConfig)
puts "Ignoring mdsd compression_level level since its not supported level. Check input values for correctness."
end
end

mdsdBackPressureThresholdInMB = mdsd_config[:backpressure_memory_threshold_in_mb]
if is_valid_number?(mdsdBackPressureThresholdInMB) && is_valid_number?(@containerMemoryLimitInBytes) && mdsdBackPressureThresholdInMB.to_i < (@containerMemoryLimitInBytes.to_i / 1048576) && mdsdBackPressureThresholdInMB.to_i > 100
@mdsdBackPressureThresholdInMB = mdsdBackPressureThresholdInMB.to_i
puts "Using config map value: backpressure_memory_threshold_in_mb = #{@mdsdBackPressureThresholdInMB}"
else
puts "Ignoring mdsd backpressure limit. Check input values for correctness. Configmap value in mb: #{mdsdBackPressureThresholdInMB}, container limit in bytes: #{@containerMemoryLimitInBytes}"
end
end

prom_fbit_config = nil
Expand Down Expand Up @@ -443,7 +446,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
end

if @mdsdBackPressureThresholdInMB > 0
file.write("export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=#{@mdsdBackPressureThresholdInMB}\n")
file.write("export BACKPRESSURE_THRESHOLD_IN_MB=#{@mdsdBackPressureThresholdInMB}\n")
end

if @mdsdCompressionLevel >= 0
Expand Down
24 changes: 24 additions & 0 deletions build/common/installer/scripts/tomlparser-prom-agent-config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,18 @@
@waittime_port_25226 = 45
@waittime_port_25228 = 120
@waittime_port_25229 = 45
@containerMemoryLimitInBytes = ENV["CONTAINER_MEMORY_LIMIT_IN_BYTES"]
@mdsdBackPressureThresholdInMB = 0

def is_number?(value)
true if Integer(value) rescue false
end

# check if it is number and greater than 0
def is_valid_number?(value)
return !value.nil? && is_number?(value) && value.to_i > 0
end

# check if it is a valid waittime
def is_valid_waittime?(value, default)
return !value.nil? && is_number?(value) && value.to_i >= default/2 && value.to_i <= 3*default
Expand Down Expand Up @@ -94,6 +101,18 @@ def populateSettingValuesFromConfigMap(parsedConfig)
end
end

# mdsd settings
mdsd_config = parsedConfig[:agent_settings][:mdsd_config]
if !mdsd_config.nil?
mdsdBackPressureThresholdInMB = mdsd_config[:backpressure_memory_threshold_in_mb]
if is_valid_number?(mdsdBackPressureThresholdInMB) && is_valid_number?(@containerMemoryLimitInBytes) && mdsdBackPressureThresholdInMB.to_i < (@containerMemoryLimitInBytes.to_i / 1048576) && mdsdBackPressureThresholdInMB.to_i > 100
@mdsdBackPressureThresholdInMB = mdsdBackPressureThresholdInMB.to_i
puts "Using config map value: backpressure_memory_threshold_in_mb = #{@mdsdBackPressureThresholdInMB}"
else
puts "Ignoring mdsd backpressure limit. Check input values for correctness. Configmap value in mb: #{mdsdBackPressureThresholdInMB}, container limit in bytes: #{@containerMemoryLimitInBytes}"
end
end

end
rescue => errorStr
puts "config::error:Exception while reading config settings for sidecar agent configuration setting - #{errorStr}, using defaults"
Expand Down Expand Up @@ -124,6 +143,11 @@ def populateSettingValuesFromConfigMap(parsedConfig)
file.write("export WAITTIME_PORT_25226=#{@waittime_port_25226}\n")
file.write("export WAITTIME_PORT_25228=#{@waittime_port_25228}\n")
file.write("export WAITTIME_PORT_25229=#{@waittime_port_25229}\n")

if @mdsdBackPressureThresholdInMB > 0
file.write("export BACKPRESSURE_THRESHOLD_IN_MB=#{@mdsdBackPressureThresholdInMB}\n")
end

# Close file after writing all environment variables
file.close
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
valueFrom:
resourceFieldRef:
containerName: ama-logs-windows
resource: limits.memory
- name: NODE_IP
valueFrom:
fieldRef:
Expand Down
10 changes: 10 additions & 0 deletions charts/azuremonitor-containers/templates/ama-logs-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
valueFrom:
resourceFieldRef:
containerName: ama-logs
resource: limits.memory
{{- if not (empty .Values.Azure.Extension.Name) }}
- name: ARC_K8S_EXTENSION_NAME
value: {{ .Values.Azure.Extension.Name | quote }}
Expand Down Expand Up @@ -210,6 +215,11 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
valueFrom:
resourceFieldRef:
containerName: ama-logs-prometheus
resource: limits.memory
- name: ISTEST
value: {{ .Values.amalogs.ISTEST | quote }}
- name: HOSTNAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ spec:
resourceFieldRef:
containerName: ama-logs
resource: limits.cpu
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
valueFrom:
resourceFieldRef:
containerName: ama-logs
resource: limits.memory
{{- if ne .Values.amalogs.env.clusterId "<your_cluster_id>" }}
- name: AKS_RESOURCE_ID
value: {{ .Values.amalogs.env.clusterId | quote }}
Expand Down
22 changes: 22 additions & 0 deletions kubernetes/linux/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,28 @@ else
fi
source ~/.bashrc

# manually set backpressure value using container limit only when neither backpressure or fbit tail buffer is provided through configmap
if [ -n "${BACKPRESSURE_THRESHOLD_IN_MB}" ]; then
export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=${BACKPRESSURE_THRESHOLD_IN_MB}
echo "export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=$MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB" >> ~/.bashrc
echo "Setting MDSD backpressure threshold from configmap: ${MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB} MB"
source ~/.bashrc
elif [ -z "${FBIT_TAIL_MEM_BUF_LIMIT}" ]; then
if [ -n "${CONTAINER_MEMORY_LIMIT_IN_BYTES}" ]; then
echo "Container limit in bytes: ${CONTAINER_MEMORY_LIMIT_IN_BYTES}"
limit_in_mebibytes=$((CONTAINER_MEMORY_LIMIT_IN_BYTES / 1048576))

export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=$((limit_in_mebibytes * 50 / 100))
echo "export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=$MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB" >> ~/.bashrc
echo "Setting MDSD backpressure threshold as 50 percent of container limit: ${MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB} MB"
source ~/.bashrc
else
echo "Container limit not found. Not setting mdsd backpressure threshold"
fi
else
echo "MDSD backpressure threshold not set since tail_mem_buf_limit_megabytes is used in configmap. Use backpressure_memory_threshold_in_mb in configmap to set it."
fi

if [ "${CONTAINER_TYPE}" == "PrometheusSidecar" ]; then
if [ "${MUTE_PROM_SIDECAR}" != "true" ]; then
echo "starting mdsd with mdsd-port=26130, fluentport=26230 and influxport=26330 in sidecar container..."
Expand Down
5 changes: 5 additions & 0 deletions source/plugins/go/src/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
telemetryDimensions["PromFbitBufferSize"] = os.Getenv("AZMON_SIDECAR_FBIT_BUFFER_SIZE")
telemetryDimensions["PromFbitMemBufLimit"] = os.Getenv("AZMON_SIDECAR_FBIT_MEM_BUF_LIMIT")

mdsdBackPressureThresholdInMB := os.Getenv("MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB")
if mdsdBackPressureThresholdInMB != "" {
telemetryDimensions["mdsdBackPressureThresholdInMB"] = mdsdBackPressureThresholdInMB
}

SendEvent(eventNameCustomPrometheusSidecarHeartbeat, telemetryDimensions)

} else {
Expand Down
7 changes: 7 additions & 0 deletions source/plugins/ruby/in_kube_nodes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def initialize(is_unit_test_mode = nil, kubernetesApiClient = nil,
@@rsPromMonitorPodsFieldSelectorLength = @env["TELEMETRY_RS_PROM_FIELD_SELECTOR_LENGTH"]
@@collectAllKubeEvents = @env["AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS"]
@@osmNamespaceCount = @env["TELEMETRY_OSM_CONFIGURATION_NAMESPACES_COUNT"]
@@mdsdBackPressureThresholdInMB = @env["MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB"]

@ContainerNodeInventoryTag = "oneagent.containerInsights.CONTAINER_NODE_INVENTORY_BLOB"
@insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
Expand Down Expand Up @@ -393,6 +394,12 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
if (File.file?(@@osmConfigMountPath))
properties["osmNamespaceCount"] = @@osmNamespaceCount
end

# telemetry about mdsd backpressure limits for replicaset
if (!@@mdsdBackPressureThresholdInMB.nil?) && (!@@mdsdBackPressureThresholdInMB.empty?)
properties["mdsdBackPressureThresholdInMB"] = @@mdsdBackPressureThresholdInMB
end

@applicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
telemetrySent = true
rescue => errorStr
Expand Down

0 comments on commit f476b3a

Please sign in to comment.