Skip to content

Commit

Permalink
Merge pull request #1 from nimbux911/refactor_monitoring
Browse files Browse the repository at this point in the history
Refactor monitoring
  • Loading branch information
qemanuel committed May 11, 2022
2 parents 1d18f5d + e74f63b commit 381844f
Show file tree
Hide file tree
Showing 32 changed files with 4,459 additions and 303 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [3.0.0] - 2022-05-06

### Added
- aws-iam-authenticator is not needed anymore
- loki-stack has been replaced for loki-distributed, kube-stack-prometheus and fluent-bit helm charts
- tempo-distributed helm chart
- cert-manager helm chart
- opentelemetry manifests for auto-instrumentation
- ingress-nginx, metrics-server and cluster-autoscaler helm charts updated to latest version

## [2.1.0] - 2021-11-30

### Added
Expand Down
293 changes: 198 additions & 95 deletions README.md

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions asg.tf
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
locals {
eks_worker_userdata = <<USERDATA
eks_worker_userdata_max_pods_enabled = <<USERDATA
#!/bin/bash
set -o xtrace
/etc/eks/bootstrap.sh --apiserver-endpoint '${aws_eks_cluster.main.endpoint}' --b64-cluster-ca '${aws_eks_cluster.main.certificate_authority.0.data}' '${aws_eks_cluster.main.name}' --use-max-pods false --kubelet-extra-args '--max-pods=${var.max_pods_per_node}'
USERDATA

eks_worker_userdata = <<USERDATA
#!/bin/bash
set -o xtrace
/etc/eks/bootstrap.sh --apiserver-endpoint '${aws_eks_cluster.main.endpoint}' --b64-cluster-ca '${aws_eks_cluster.main.certificate_authority.0.data}' '${aws_eks_cluster.main.name}'
USERDATA
}


resource "aws_key_pair" "eks" {
key_name = aws_eks_cluster.main.name
public_key = base64decode(aws_ssm_parameter.eks_public_key.value)
Expand All @@ -18,7 +25,7 @@ resource "aws_launch_configuration" "eks" {
instance_type = var.instance_type
name_prefix = aws_eks_cluster.main.name
security_groups = [aws_security_group.eks_worker.id]
user_data_base64 = base64encode(local.eks_worker_userdata)
user_data_base64 = var.eks_worker_max_pods_enabled ? base64encode(local.eks_worker_userdata_max_pods_enabled) : base64encode(local.eks_worker_userdata)
key_name = aws_key_pair.eks.key_name

lifecycle {
Expand Down
2 changes: 2 additions & 0 deletions eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ resource "aws_eks_cluster" "main" {
version = var.cluster_version
role_arn = aws_iam_role.eks_master.arn

enabled_cluster_log_types = var.enabled_cluster_log_types

vpc_config {
subnet_ids = var.subnets_ids
security_group_ids = [aws_security_group.eks_master.id]
Expand Down
82 changes: 82 additions & 0 deletions helm-values/fluent-bit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@

image:
repository: grafana/fluent-bit-plugin-loki
tag: 2.1.0-amd64
pullPolicy: IfNotPresent

## https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/configuration-file
config:
service: |
[SERVICE]
Daemon Off
Flush {{ .Values.flush }}
Log_Level {{ .Values.logLevel }}
Parsers_File parsers.conf
Parsers_File custom_parsers.conf
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port {{ .Values.metricsPort }}
Health_Check On
## https://docs.fluentbit.io/manual/pipeline/inputs
inputs: |
[INPUT]
Name tail
Tag kube.*
Path /var/log/containers/*.log
Parser docker
DB /run/fluent-bit/flb_kube.db
Mem_Buf_Limit 5MB
## https://docs.fluentbit.io/manual/pipeline/filters
filters: |
[FILTER]
Name kubernetes
Match kube.*
Kube_URL https://kubernetes.default.svc:443
Merge_Log On
K8S-Logging.Exclude Off
K8S-Logging.Parser Off
## https://docs.fluentbit.io/manual/pipeline/outputs
outputs: |
[OUTPUT]
Name grafana-loki
Match *
Url http://loki-distributed-distributor:3100/api/prom/push
TenantID ""
BatchWait 1
BatchSize 1048576
Labels {job="fluent-bit"}
RemoveKeys kubernetes,stream
AutoKubernetesLabels false
LabelMapPath /fluent-bit/etc/labelmap.json
LineFormat json
LogLevel warn
## https://docs.fluentbit.io/manual/pipeline/parsers
customParsers: |
[PARSER]
Name docker
Format json
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L
# This allows adding more files with arbitary filenames to /fluent-bit/etc by providing key/value pairs.
# The key becomes the filename, the value becomes the file content.
extraFiles:
labelmap.json: |
{
"kubernetes": {
"container_name": "container",
"host": "node",
"labels": {
"app": "app",
"release": "release",
"pod-template-hash": "template_hash"
},
"namespace_name": "namespace",
"pod_name": "pod"
},
"stream": "stream"
}
logLevel: warn
12 changes: 0 additions & 12 deletions helm-values/ingress-nginx.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
controller:
name: controller
image:
registry: k8s.gcr.io
image: ingress-nginx/controller
# for backwards compatibility consider setting the full image url via the repository value below
# use *either* current default registry/image or repository format or installing chart by providing the values.yaml will fail
# repository:
tag: "v0.47.0"
digest: sha256:a1e4efc107be0bb78f32eaec37bef17d7a0c81bec8066cdf2572508d21351d0b

service:
enabled: true

type: NodePort
nodePorts:
http: 32080
Expand Down
129 changes: 129 additions & 0 deletions helm-values/loki-distributed.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
loki:
config: |
auth_enabled: false
server:
http_listen_port: 3100
distributor:
ring:
kvstore:
store: memberlist
memberlist:
join_members:
- {{ include "loki.fullname" . }}-memberlist
ingester:
lifecycler:
ring:
kvstore:
store: memberlist
replication_factor: 1
chunk_idle_period: 30m
chunk_block_size: 262144
chunk_retain_period: 1m
max_transfer_retries: 0
wal:
dir: /data/loki/wal
{{- if .Values.loki.schemaConfig}}
schema_config:
{{- toYaml .Values.loki.schemaConfig | nindent 2}}
{{- end}}
{{- if .Values.loki.storageConfig}}
storage_config:
{{- if .Values.indexGateway.enabled}}
{{- $indexGatewayClient := dict "server_address" (printf "dns:///%s:9095" (include "loki.indexGatewayFullname" .)) }}
{{- $_ := set .Values.loki.storageConfig.boltdb_shipper "index_gateway_client" $indexGatewayClient }}
{{- end}}
{{- toYaml .Values.loki.storageConfig | nindent 2}}
{{- end}}
chunk_store_config:
max_look_back_period: 0s
query_range:
align_queries_with_step: true
max_retries: 5
cache_results: true
results_cache:
cache:
enable_fifocache: true
fifocache:
max_size_items: 1024
validity: 24h
frontend_worker:
frontend_address: {{ include "loki.queryFrontendFullname" . }}:9095
frontend:
max_outstanding_per_tenant: 2048
log_queries_longer_than: 5s
compress_responses: true
tail_proxy_url: http://{{ include "loki.querierFullname" . }}:3100
table_manager:
retention_deletes_enabled: false
retention_period: 0s
structuredConfig:
compactor:
working_directory: /data/loki/boltdb-shipper-compactor
shared_store: aws
compaction_interval: 10m
retention_delete_delay: 2h
retention_delete_worker_count: 150

limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
max_cache_freshness_per_query: 10m
split_queries_by_interval: 15m

# -- Check https://grafana.com/docs/loki/latest/configuration/#schema_config for more info on how to configure schemas
schemaConfig:
configs:
- from: "2021-08-01"
store: boltdb-shipper
object_store: aws
schema: v11
index:
prefix: index_
period: 24h

# -- Check https://grafana.com/docs/loki/latest/configuration/#storage_config for more info on how to configure storages
storageConfig:
boltdb_shipper:
shared_store: s3
active_index_directory: /data/loki/boltdb-shipper-active
cache_location: /data/loki/boltdb-shipper-cache
cache_ttl: 24h
filesystem:
directory: /data/loki/chunks

# Configuration for the ingester
ingester:
extraVolumes:
- name: wal
emptyDir: {}
- name: data
emptyDir: {}
extraVolumeMounts:
- name: data
mountPath: /data/loki
- name: wal
mountPath: /data/loki/wal

# Configuration for the querier
querier:
extraVolumes:
- name: data
emptyDir: {}
extraVolumeMounts:
- name: data
mountPath: /data/loki

# Configuration for the index-gateway
indexGateway:
extraVolumes:
- name: data
emptyDir: {}
extraVolumeMounts:
- name: data
mountPath: /data/loki

gateway:
ingress:
tls: []
25 changes: 25 additions & 0 deletions helm-values/tempo-distributed.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
gateway:
ingress:
tls: []

queryFrontend:
query:
enabled: false

memcached:
enabled: false

search:
enabled: true

server:
logLevel: info

traces:
otlp:
http: true
grpc: false

storage:
trace:
backend: s3
Loading

0 comments on commit 381844f

Please sign in to comment.