Skip to content

Commit

Permalink
Added metrics to tests
Browse files Browse the repository at this point in the history
Fix liteprotocoltester docker files with libnegentropy COPY
docker compose with waku-sim simulation now having test performance dashboard and localhost:3033

Mention dashboard in Readme
  • Loading branch information
NagyZoltanPeter committed Aug 28, 2024
1 parent 87b340f commit 28c7a80
Show file tree
Hide file tree
Showing 20 changed files with 1,230 additions and 6,159 deletions.
2 changes: 1 addition & 1 deletion apps/liteprotocoltester/.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ START_PUBLISHING_AFTER=10
NUM_MESSAGES=0
# 0 for infinite number of messages

DELAY_MESSAGES=8000
DELAY_MESSAGES=188
# ms delay between messages


Expand Down
2 changes: 2 additions & 0 deletions apps/liteprotocoltester/Dockerfile.liteprotocoltester
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

COPY build/liteprotocoltester /usr/bin/
COPY apps/liteprotocoltester/run_tester_node.sh /usr/bin/
COPY ./libnegentropy.so /usr/lib/


ENTRYPOINT ["/usr/bin/run_tester_node.sh", "/usr/bin/liteprotocoltester"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

# Copy migration scripts for DB upgrades
COPY --from=nim-build /app/migrations/ /app/migrations/
COPY --from=nim-build /app/libnegentropy.so /usr/lib/

ENTRYPOINT ["/usr/bin/liteprotocoltester"]

Expand Down
35 changes: 0 additions & 35 deletions apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy

This file was deleted.

4 changes: 4 additions & 0 deletions apps/liteprotocoltester/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ docker compose -f docker-compose-on-simularor.yml logs -f receivernode
- Notice there is a configurable wait before start publishing messages as it is noticed time is needed for the service nodes to get connected to full nodes from simulator
- light clients will print report on their and the connected service node's connectivity to the network in every 20 secs.

#### Test monitoring

Navigate to http://localhost:3033 to see the lite-protocol-tester dashboard.

### Phase 3

> Run independently on a chosen waku fleet
Expand Down
74 changes: 41 additions & 33 deletions apps/liteprotocoltester/docker-compose-on-simularor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
deploy:
replicas: ${NUM_PUBLISHER_NODES:-3}
# ports:
Expand All @@ -84,13 +84,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- SENDER
- waku-sim
Expand Down Expand Up @@ -139,7 +138,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
deploy:
replicas: ${NUM_RECEIVER_NODES:-1}
# ports:
Expand All @@ -161,13 +160,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- RECEIVER
- waku-sim
Expand All @@ -180,34 +178,44 @@ services:
networks:
- waku-simulator_simulation

## We have prometheus and grafana defined in waku-simulator already
# prometheus:
# image: docker.io/prom/prometheus:latest
# volumes:
# - ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
# command:
# - --config.file=/etc/prometheus/prometheus.yml
# ports:
# - 127.0.0.1:9090:9090
# depends_on:
# - servicenode
# We have prometheus and grafana defined in waku-simulator already
prometheus:
image: docker.io/prom/prometheus:latest
volumes:
- ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --web.listen-address=:9099
# ports:
# - 127.0.0.1:9090:9090
restart: on-failure:5
depends_on:
- filter-service
- lightpush-service
- publishernode
- receivernode
networks:
- waku-simulator_simulation

# grafana:
# image: docker.io/grafana/grafana:latest
# env_file:
# - ./monitoring/configuration/grafana-plugins.env
# volumes:
# - ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
# - ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
# - ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
# - ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
# - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
# - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
# - ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
# ports:
# - 0.0.0.0:3000:3000
# depends_on:
# - prometheus
grafana:
image: docker.io/grafana/grafana:latest
env_file:
- ./monitoring/configuration/grafana-plugins.env
volumes:
- ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
- ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
- ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
- ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
- ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
ports:
- 0.0.0.0:3033:3033
restart: on-failure:5
depends_on:
- prometheus
networks:
- waku-simulator_simulation

configs:
cfg_tester_node.toml:
Expand Down
9 changes: 4 additions & 5 deletions apps/liteprotocoltester/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
ports:
# - 30304:30304/tcp
# - 30304:30304/udp
Expand All @@ -79,13 +79,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- SENDER
- servicenode
Expand All @@ -99,7 +98,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
ports:
# - 30304:30304/tcp
# - 30304:30304/udp
Expand All @@ -125,7 +124,7 @@ services:
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- RECEIVER
- servicenode
Expand Down
6 changes: 5 additions & 1 deletion apps/liteprotocoltester/lightpush_publisher.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import
common/utils/parse_size_units,
],
./tester_config,
./tester_message
./tester_message,
./lpt_metrics

randomize()

Expand Down Expand Up @@ -141,12 +142,15 @@ proc publishMessages(
pubsubTopic = lightpushPubsubTopic,
hash = msgHash
inc(messagesSent)
lpt_publisher_sent_messages_count.inc()
lpt_publisher_sent_bytes.inc(amount = msgSize.int64)
else:
sentMessages[messagesSent] = (hash: msgHash, relayed: false)
failedToSendCause.mgetOrPut(wlpRes.error, 1).inc()
error "failed to publish message using lightpush",
err = wlpRes.error, hash = msgHash
inc(failedToSendCount)
lpt_publisher_failed_messages_count.inc(labelValues = [wlpRes.error])

await sleepAsync(delayMessages)

Expand Down
7 changes: 4 additions & 3 deletions apps/liteprotocoltester/liteprotocoltester.nim
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ when isMainModule:
wakuConf.clusterId = conf.clusterId
## TODO: Depending on the tester needs we might extend here with shards, clusterId, etc...

wakuConf.metricsServer = true
wakuConf.metricsServerAddress = parseIpAddress("0.0.0.0")
wakuConf.metricsServerPort = 8003

if conf.testFunc == TesterFunctionality.SENDER:
wakuConf.lightpushnode = conf.serviceNode
else:
Expand All @@ -108,9 +112,6 @@ when isMainModule:

wakuConf.rest = false

wakuConf.metricsServer = true
wakuConf.metricsServerAddress = parseIpAddress("0.0.0.0")

# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
# It will always be called from main thread anyway.
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
Expand Down
30 changes: 30 additions & 0 deletions apps/liteprotocoltester/lpt_metrics.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Example showing how a resource restricted client may
## subscribe to messages without relay

import metrics

export metrics

declarePublicGauge lpt_receiver_sender_peer_count, "count of sender peers"

declarePublicCounter lpt_receiver_received_messages_count,
"number of messages received per peer", ["peer"]

declarePublicCounter lpt_receiver_received_bytes,
"number of received bytes per peer", ["peer"]

declarePublicGauge lpt_receiver_missing_messages_count,
"number of missing messages per peer", ["peer"]

declarePublicCounter lpt_receiver_duplicate_messages_count,
"number of duplicate messages per peer", ["peer"]

declarePublicGauge lpt_receiver_distinct_duplicate_messages_count,
"number of distinct duplicate messages per peer", ["peer"]

declarePublicCounter lpt_publisher_sent_messages_count, "number of messages published"

declarePublicCounter lpt_publisher_failed_messages_count,
"number of messages failed to publish per failure cause", ["cause"]

declarePublicCounter lpt_publisher_sent_bytes, "number of total bytes sent"
Loading

0 comments on commit 28c7a80

Please sign in to comment.