Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Added metrics to liteprotocoltester #3002

Merged
merged 3 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions apps/liteprotocoltester/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
START_PUBLISHING_AFTER=10
START_PUBLISHING_AFTER=45
# can add some seconds delay before SENDER starts publishing

NUM_MESSAGES=0
Expand All @@ -12,16 +12,16 @@ MIN_MESSAGE_SIZE=15Kb
MAX_MESSAGE_SIZE=145Kb

## for wakusim
PUBSUB=/waku/2/rs/66/0
CONTENT_TOPIC=/tester/2/light-pubsub-test/wakusim
CLUSTER_ID=66
#PUBSUB=/waku/2/rs/66/0
#CONTENT_TOPIC=/tester/2/light-pubsub-test/wakusim
#CLUSTER_ID=66

## for status.prod
#PUBSUB=/waku/2/rs/16/32
#CONTENT_TOPIC=/tester/2/light-pubsub-test/fleet
#CLUSTER_ID=16

## for TWN
#PUBSUB=/waku/2/rs/1/4
#CONTENT_TOPIC=/tester/2/light-pubsub-test/twn
#CLUSTER_ID=1
PUBSUB=/waku/2/rs/1/4
CONTENT_TOPIC=/tester/2/light-pubsub-test/twn
CLUSTER_ID=1
2 changes: 2 additions & 0 deletions apps/liteprotocoltester/Dockerfile.liteprotocoltester
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

COPY build/liteprotocoltester /usr/bin/
COPY apps/liteprotocoltester/run_tester_node.sh /usr/bin/
COPY ./libnegentropy.so /usr/lib/


ENTRYPOINT ["/usr/bin/run_tester_node.sh", "/usr/bin/liteprotocoltester"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

# Copy migration scripts for DB upgrades
COPY --from=nim-build /app/migrations/ /app/migrations/
COPY --from=nim-build /app/libnegentropy.so /usr/lib/

ENTRYPOINT ["/usr/bin/liteprotocoltester"]

Expand Down
35 changes: 0 additions & 35 deletions apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy

This file was deleted.

4 changes: 4 additions & 0 deletions apps/liteprotocoltester/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ docker compose -f docker-compose-on-simularor.yml logs -f receivernode
- Notice there is a configurable wait before start publishing messages as it is noticed time is needed for the service nodes to get connected to full nodes from simulator
- light clients will print report on their and the connected service node's connectivity to the network in every 20 secs.

#### Test monitoring

Navigate to http://localhost:3033 to see the lite-protocol-tester dashboard.

### Phase 3

> Run independently on a chosen waku fleet
Expand Down
74 changes: 41 additions & 33 deletions apps/liteprotocoltester/docker-compose-on-simularor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
deploy:
replicas: ${NUM_PUBLISHER_NODES:-3}
# ports:
Expand All @@ -84,13 +84,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- SENDER
- waku-sim
Expand Down Expand Up @@ -139,7 +138,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
deploy:
replicas: ${NUM_RECEIVER_NODES:-1}
# ports:
Expand All @@ -161,13 +160,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- RECEIVER
- waku-sim
Expand All @@ -180,34 +178,44 @@ services:
networks:
- waku-simulator_simulation

## We have prometheus and grafana defined in waku-simulator already
# prometheus:
# image: docker.io/prom/prometheus:latest
# volumes:
# - ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
# command:
# - --config.file=/etc/prometheus/prometheus.yml
# ports:
# - 127.0.0.1:9090:9090
# depends_on:
# - servicenode
# We have prometheus and grafana defined in waku-simulator already
prometheus:
image: docker.io/prom/prometheus:latest
volumes:
- ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --web.listen-address=:9099
# ports:
# - 127.0.0.1:9090:9090
restart: on-failure:5
depends_on:
- filter-service
- lightpush-service
- publishernode
- receivernode
networks:
- waku-simulator_simulation

# grafana:
# image: docker.io/grafana/grafana:latest
# env_file:
# - ./monitoring/configuration/grafana-plugins.env
# volumes:
# - ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
# - ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
# - ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
# - ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
# - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
# - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
# - ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
# ports:
# - 0.0.0.0:3000:3000
# depends_on:
# - prometheus
grafana:
image: docker.io/grafana/grafana:latest
env_file:
- ./monitoring/configuration/grafana-plugins.env
volumes:
- ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
- ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
- ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
- ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
- ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
ports:
- 0.0.0.0:3033:3033
restart: on-failure:5
depends_on:
- prometheus
networks:
- waku-simulator_simulation

configs:
cfg_tester_node.toml:
Expand Down
9 changes: 4 additions & 5 deletions apps/liteprotocoltester/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
ports:
# - 30304:30304/tcp
# - 30304:30304/udp
Expand All @@ -79,13 +79,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- SENDER
- servicenode
Expand All @@ -99,7 +98,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
ports:
# - 30304:30304/tcp
# - 30304:30304/udp
Expand All @@ -125,7 +124,7 @@ services:
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- RECEIVER
- servicenode
Expand Down
6 changes: 5 additions & 1 deletion apps/liteprotocoltester/lightpush_publisher.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import
common/utils/parse_size_units,
],
./tester_config,
./tester_message
./tester_message,
./lpt_metrics

randomize()

Expand Down Expand Up @@ -141,12 +142,15 @@ proc publishMessages(
pubsubTopic = lightpushPubsubTopic,
hash = msgHash
inc(messagesSent)
lpt_publisher_sent_messages_count.inc()
lpt_publisher_sent_bytes.inc(amount = msgSize.int64)
else:
sentMessages[messagesSent] = (hash: msgHash, relayed: false)
failedToSendCause.mgetOrPut(wlpRes.error, 1).inc()
error "failed to publish message using lightpush",
err = wlpRes.error, hash = msgHash
inc(failedToSendCount)
lpt_publisher_failed_messages_count.inc(labelValues = [wlpRes.error])

await sleepAsync(delayMessages)

Expand Down
7 changes: 4 additions & 3 deletions apps/liteprotocoltester/liteprotocoltester.nim
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ when isMainModule:
wakuConf.clusterId = conf.clusterId
## TODO: Depending on the tester needs we might extend here with shards, clusterId, etc...

wakuConf.metricsServer = true
wakuConf.metricsServerAddress = parseIpAddress("0.0.0.0")
wakuConf.metricsServerPort = 8003

if conf.testFunc == TesterFunctionality.SENDER:
wakuConf.lightpushnode = conf.serviceNode
else:
Expand All @@ -108,9 +112,6 @@ when isMainModule:

wakuConf.rest = false

wakuConf.metricsServer = true
wakuConf.metricsServerAddress = parseIpAddress("0.0.0.0")

# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
# It will always be called from main thread anyway.
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
Expand Down
30 changes: 30 additions & 0 deletions apps/liteprotocoltester/lpt_metrics.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Example showing how a resource restricted client may
## subscribe to messages without relay

import metrics

export metrics

declarePublicGauge lpt_receiver_sender_peer_count, "count of sender peers"

declarePublicCounter lpt_receiver_received_messages_count,
"number of messages received per peer", ["peer"]

declarePublicCounter lpt_receiver_received_bytes,
"number of received bytes per peer", ["peer"]

declarePublicGauge lpt_receiver_missing_messages_count,
"number of missing messages per peer", ["peer"]

declarePublicCounter lpt_receiver_duplicate_messages_count,
"number of duplicate messages per peer", ["peer"]

declarePublicGauge lpt_receiver_distinct_duplicate_messages_count,
"number of distinct duplicate messages per peer", ["peer"]

declarePublicCounter lpt_publisher_sent_messages_count, "number of messages published"

declarePublicCounter lpt_publisher_failed_messages_count,
"number of messages failed to publish per failure cause", ["cause"]

declarePublicCounter lpt_publisher_sent_bytes, "number of total bytes sent"
Loading
Loading