From 5862a9f5d90f0036fc4786eb0d4361bfa2c35b72 Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Thu, 7 Jun 2018 09:27:53 +0200 Subject: [PATCH 1/8] revert derived metric raw data send - was fixed in monitoring v1.7.2 --- src/ConsumerStats.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ConsumerStats.cxx b/src/ConsumerStats.cxx index 8203fc0b..c3acb3df 100644 --- a/src/ConsumerStats.cxx +++ b/src/ConsumerStats.cxx @@ -89,7 +89,7 @@ class ConsumerStats: public Consumer { // https://alice.its.cern.ch/jira/browse/FLPPROT-69 monitoringCollector->send({counterBlocks, "readout.Blocks"}); - monitoringCollector->send({counterBytesTotal, "readout.BytesTotal"}); +// monitoringCollector->send({counterBytesTotal, "readout.BytesTotal"}); monitoringCollector->send({counterBytesTotal, "readout.BytesTotal"}, DerivedMetricMode::RATE); monitoringCollector->send({counterBytesDiff, "readout.BytesInterval"}); // monitoringCollector->send({(counterBytesTotal/(1024*1024)), "readout.MegaBytesTotal"}); From 95545f3d5c548de79b8293658eeb0c4ad201e69a Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Thu, 7 Jun 2018 15:14:32 +0200 Subject: [PATCH 2/8] added getHbOrbbit --- src/RdhUtils.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/RdhUtils.h b/src/RdhUtils.h index 447ac285..960cddf9 100644 --- a/src/RdhUtils.h +++ b/src/RdhUtils.h @@ -39,6 +39,9 @@ class RdhHandle { inline uint8_t getHeaderSize() { return rdhPtr->headerSize; } + inline uint32_t getHbOrbit() { + return (uint32_t) rdhPtr->heartbeatOrbit; + } private: o2::Header::RAWDataHeader *rdhPtr; // pointer to RDH in memory From abf8dd4a99ecb93f4e0da42b2e38d9255ba56c3f Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Thu, 7 Jun 2018 15:16:02 +0200 Subject: [PATCH 3/8] update to latest rdh spec --- src/RAWDataHeader.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/RAWDataHeader.h b/src/RAWDataHeader.h index bbd55f4d..606a40f1 100644 --- a/src/RAWDataHeader.h +++ b/src/RAWDataHeader.h @@ -114,23 +114,24 @@ typedef struct _RAWDataHeaderV3 struct { uint32_t version:8; /// bit 0 to 7: header version uint32_t headerSize:8; /// bit 8 to 15: header size - uint32_t blockLength:16; /// bit 16 to 32: block length + uint32_t blockLength:16; /// bit 16 to 31: block length }; }; union { uint32_t word2 = 0x00ffffff; struct { - uint32_t feeId:8; /// bit 0 to 15: FEE id + uint32_t feeId:16; /// bit 0 to 15: FEE id uint32_t priorityBit:8; /// bit 16 to 23: priority bit - uint32_t zero2:16; /// bit 16 to 32: reserved + uint32_t zero2:8; /// bit 24 to 31: reserved }; }; union { uint32_t word1 = 0x0; struct { - uint32_t zero1:32; /// bit 0 to 31: reserved + uint32_t offsetNextPacket:16; /// bit 0 to 15: offset of next block + uint32_t memorySize:16; /// bit 16 to 31: size of block (in bytes) in memory }; }; @@ -152,7 +153,7 @@ typedef struct _RAWDataHeaderV3 union { uint32_t word6 = 0xffffffff; struct { - uint32_t heartbeatOrbit; /// bit 0 to 31: TRG orbit + uint32_t heartbeatOrbit; /// bit 0 to 31: HB orbit }; }; @@ -173,9 +174,9 @@ typedef struct _RAWDataHeaderV3 union { uint32_t word11 = 0x0; struct { - uint32_t triggerBC:12; /// bit 0 to 11: trigger BC ID + uint32_t triggerBC:12; /// bit 0 to 11: TRG BC ID uint32_t zero11_0:4; /// bit 12 to 15: reserved - uint32_t heartbeatBC:12; /// bit 16 to 27: heartbeat BC ID + uint32_t heartbeatBC:12; /// bit 16 to 27: HB BC ID uint32_t zero11_1:4; /// bit 28 to 31: reserved }; }; @@ -212,9 +213,9 @@ typedef struct _RAWDataHeaderV3 union { uint32_t word14 = 0x0; struct { - uint32_t stopBit:8; /// bit 0 to 7: stop bit + uint32_t stopBit:8; /// bit 0 to 7: stop bit uint32_t pagesCoutner:16; /// bit 8 to 23: pages counter - uint32_t zero14:8; /// bit 24 to 31: reserved + uint32_t zero14:8; /// bit 24 to 31: reserved }; }; From 50190dbd80a30503cff308626af4b6d7af01982d Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Fri, 8 Jun 2018 09:36:07 +0200 Subject: [PATCH 4/8] timeframe id from RDH --- src/ReadoutEquipmentRORC.cxx | 75 ++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/src/ReadoutEquipmentRORC.cxx b/src/ReadoutEquipmentRORC.cxx index e130e435..d8d49c3f 100644 --- a/src/ReadoutEquipmentRORC.cxx +++ b/src/ReadoutEquipmentRORC.cxx @@ -47,10 +47,21 @@ class ReadoutEquipmentRORC : public ReadoutEquipment { unsigned long long statsRdhCheckOk=0; // number of RDH structs which have passed check ok unsigned long long statsRdhCheckErr=0; // number of RDH structs which have not passed check + unsigned long long statsNumberOfPages=0; // number of pages read out + unsigned long long statsNumberOfTimeframes=0; // number of timeframes read out + AliceO2::Common::Timer timeframeClock; // timeframe id should be increased at each clock cycle - int currentTimeframe=0; // id of current timeframe + int currentTimeframe=0; // id of current timeframe + bool usingSoftwareClock=false; // if set, using internal software clock to generate timeframe id + + const unsigned int LHCBunches=3564; // number of bunches in LHC + const unsigned int LHCOrbitRate=11246; // LHC orbit rate, in Hz. 299792458 / 26659 + const uint32_t timeframePeriodOrbits=256; // timeframe interval duration in number of LHC orbits + uint32_t currentTimeframeHbOrbitBegin=0; // HbOrbit of beginning of timeframe + uint32_t firstTimeframeHbOrbitBegin=0; // HbOrbit of beginning of first timeframe + size_t superPageSize=0; // usable size of a superpage }; @@ -202,9 +213,19 @@ ReadoutEquipmentRORC::ReadoutEquipmentRORC(ConfigFile &cfg, std::string name) : theLog.log("ROC input queue size = %d pages",RocFifoSize); if (RocFifoSize==0) {RocFifoSize=1;} - // reset timeframe clock - timeframeClock.reset(1000000/50.0); // 50Hz rate + // reset timeframe id currentTimeframe=0; + if (!cfgRdhCheckEnabled) { + usingSoftwareClock=true; // if RDH disabled, use internal clock for TF id + } + if (usingSoftwareClock) { + // reset timeframe clock + double timeframeRate=LHCOrbitRate*1.0/timeframePeriodOrbits; // timeframe rate, in Hz + theLog.log("Timeframe IDs generated by software, %.2lf Hz",timeframeRate); + timeframeClock.reset(1000000/timeframeRate); + } else { + theLog.log("Timeframe IDs generated from RDH trigger counters"); + } } catch (const std::exception& e) { @@ -222,7 +243,7 @@ ReadoutEquipmentRORC::~ReadoutEquipmentRORC() { } if (cfgRdhCheckEnabled) { - theLog.log("Equipment %s : RDH checks %llu ok, %llu errors",name.c_str(),statsRdhCheckOk,statsRdhCheckErr); + theLog.log("Equipment %s : %llu timeframes, %llu pages, RDH checks %llu ok, %llu errors",name.c_str(),statsNumberOfTimeframes,statsNumberOfPages,statsRdhCheckOk,statsRdhCheckErr); } } @@ -323,20 +344,22 @@ DataBlockContainerReference ReadoutEquipmentRORC::getNextBlock() { theLog.log("make_shared failed"); } if (d!=nullptr) { + statsNumberOfPages++; + d->getData()->header.dataSize=superpage.getReceived(); d->getData()->header.linkId=0; // TODO channel->popSuperpage(); nextBlock=d; - - if (timeframeClock.isTimeout()) { - currentTimeframe++; - timeframeClock.increment(); - } - d->getData()->header.id=currentTimeframe; // validate RDH structure, if configured to do so int linkId=-1; + int hbOrbit=-1; + + // checks to do: + // - HB clock consistent in all RDHs + // - increasing counters + if (cfgRdhCheckEnabled) { std::string errorDescription; size_t blockSize=d->getData()->header.dataSize; @@ -352,6 +375,26 @@ DataBlockContainerReference ReadoutEquipmentRORC::getNextBlock() { } } + if (hbOrbit==-1) { + hbOrbit=h.getHbOrbit(); + if ((statsNumberOfPages==1) || ((uint32_t)hbOrbit>=currentTimeframeHbOrbitBegin+timeframePeriodOrbits)) { + if (statsNumberOfPages==1) { + firstTimeframeHbOrbitBegin=hbOrbit; + } + statsNumberOfTimeframes++; + currentTimeframeHbOrbitBegin=hbOrbit-((hbOrbit-firstTimeframeHbOrbitBegin)%timeframePeriodOrbits); // keep it periodic and aligned to 1st timeframe + int newTimeframe=1+(currentTimeframeHbOrbitBegin-firstTimeframeHbOrbitBegin)/timeframePeriodOrbits; + if (newTimeframe!=currentTimeframe+1) { + printf("Non-contiguous timeframe IDs %d ... %d\n",currentTimeframe,newTimeframe); + } + currentTimeframe=newTimeframe; + //printf("Starting timeframe %d @ orbit %d (actual: %d)\n",currentTimeframe,(int)currentTimeframeHbOrbitBegin,(int)hbOrbit); + } else { + //printf("HB orbit %d\n",hbOrbit); + } + + } + //data format: // RDH v3 = https://docs.google.com/document/d/1otkSDYasqpVBDnxplBI7dWNxaZohctA-bvhyrzvtLoQ/edit?usp=sharing if (h.validateRdh(errorDescription)) { @@ -383,7 +426,17 @@ DataBlockContainerReference ReadoutEquipmentRORC::getNextBlock() { if (linkId>=0) { d->getData()->header.linkId=linkId; } - + + if (usingSoftwareClock) { + if (timeframeClock.isTimeout()) { + currentTimeframe++; + statsNumberOfTimeframes++; + timeframeClock.increment(); + } + } + + // set timeframe id + d->getData()->header.id=currentTimeframe; } else { // no data block container... what to do??? From e403bf8792c35d5b5042a810f47e95f1e54b9883 Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Fri, 8 Jun 2018 09:37:02 +0200 Subject: [PATCH 5/8] added debug info compile option --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7724f7fc..e4181f9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,7 @@ ELSE () ENDIF () # Add compiler flags for warnings and (more importantly) fPIC and debug symbols -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pedantic -Wextra -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pedantic -Wextra -fPIC -g") #################################### # Module, library and executable definition From c3b09df82c13c23f17aff263b32899bb324e0c35 Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Tue, 12 Jun 2018 16:29:12 +0200 Subject: [PATCH 6/8] log pid --- src/mainReadout.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mainReadout.cxx b/src/mainReadout.cxx index 12bb8b75..8623c26a 100644 --- a/src/mainReadout.cxx +++ b/src/mainReadout.cxx @@ -94,7 +94,7 @@ int main(int argc, char* argv[]) sigaction(SIGINT,&signalSettings,NULL); // log startup and options - theLog.log("Readout process starting"); + theLog.log("Readout process starting, pid %d",getpid()); theLog.log("Optional built features enabled:"); #ifdef WITH_FAIRMQ theLog.log("FAIRMQ : yes"); From 43582c81745301a8a9ad7c4ee7aa65455f4f82a4 Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Thu, 14 Jun 2018 10:45:53 +0200 Subject: [PATCH 7/8] added memory banks and monitoring to local infologger --- readout.cfg | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/readout.cfg b/readout.cfg index 7f22907b..e301d1fb 100644 --- a/readout.cfg +++ b/readout.cfg @@ -34,6 +34,21 @@ enabled=0 class=MockInjector +################################### +# memory banks +################################### +# All section names should start with 'bank-' to be taken into account. +# They define memory to be allocated to readout +# If bank name not specified in each equipment, the first available bank (created first) will be used. +# Types of memory banks include: malloc, MemoryMappedFile +# NB: the FairMQChannel consumers may also create some banks, which will not be +# listed here, and created before them. + +[bank-default] +type=malloc +size=128M + + ################################### # equipments @@ -53,6 +68,8 @@ equipmentType=dummy enabled=1 eventMaxSize=20000 eventMinSize=10000 +memoryPoolNumberOfPages=100 +memoryPoolPageSize=128k [equipment-dummy-2] name=dummy-2 @@ -60,6 +77,9 @@ equipmentType=dummy enabled=1 eventMaxSize=30000 eventMinSize=20000 +memoryPoolNumberOfPages=100 +memoryPoolPageSize=128k + @@ -103,10 +123,9 @@ cardId=86:00.0 consumerType=stats enabled=1 # this publishes stats, if enabled, to O2 monitoring system -monitoringEnabled=0 +monitoringEnabled=1 monitoringUpdatePeriod=5 -monitoringConfig=file:/etc/monitoring.cfg - +monitoringURI=infologger:// # recording to file From 85292e0610c5c38a251cbbbe5283f77f40cdfbde Mon Sep 17 00:00:00 2001 From: Sylvain Chapeland Date: Thu, 14 Jun 2018 10:47:41 +0200 Subject: [PATCH 8/8] release of FMQ objects in specific order to avoid blocking on destroy --- src/ConsumerFMQchannel.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ConsumerFMQchannel.cxx b/src/ConsumerFMQchannel.cxx index a6b3e05a..cbb370c3 100644 --- a/src/ConsumerFMQchannel.cxx +++ b/src/ConsumerFMQchannel.cxx @@ -137,6 +137,11 @@ class ConsumerFMQchannel: public Consumer { } ~ConsumerFMQchannel() { + // release in reverse order + mp=nullptr; + memoryBuffer=nullptr; // warning: data range may still be referenced in memory bank manager + sendingChannel=nullptr; + transportFactory=nullptr; } int pushData(DataBlockContainerReference &) {