diff --git a/daemon/BlockCounterFrameBuilder.cpp b/daemon/BlockCounterFrameBuilder.cpp index 67e8ac63..f342615e 100644 --- a/daemon/BlockCounterFrameBuilder.cpp +++ b/daemon/BlockCounterFrameBuilder.cpp @@ -8,9 +8,7 @@ BlockCounterFrameBuilder::~BlockCounterFrameBuilder() { - if (isFrameStarted) { - rawBuilder.endFrame(); - } + endFrame(); } bool BlockCounterFrameBuilder::eventHeader(uint64_t time) @@ -83,17 +81,18 @@ bool BlockCounterFrameBuilder::event64(int key, int64_t value) bool BlockCounterFrameBuilder::check(const uint64_t time) { if ((*flushIsNeeded)(time, rawBuilder.needsFlush())) { - bool shouldEndFrame = isFrameStarted; - if (shouldEndFrame) { - rawBuilder.endFrame(); - isFrameStarted = false; - } - rawBuilder.flush(); - return shouldEndFrame; + return flush(); } return false; } +bool BlockCounterFrameBuilder::flush() +{ + const bool shouldEndFrame = endFrame(); + rawBuilder.flush(); + return shouldEndFrame; +} + bool BlockCounterFrameBuilder::checkSpace(const int bytes) { return rawBuilder.bytesAvailable() >= bytes; @@ -114,3 +113,13 @@ bool BlockCounterFrameBuilder::ensureFrameStarted() isFrameStarted = true; return true; } + +bool BlockCounterFrameBuilder::endFrame() +{ + const bool shouldEndFrame = isFrameStarted; + if (shouldEndFrame) { + rawBuilder.endFrame(); + isFrameStarted = false; + } + return shouldEndFrame; +} diff --git a/daemon/BlockCounterFrameBuilder.h b/daemon/BlockCounterFrameBuilder.h index f792e010..19404152 100644 --- a/daemon/BlockCounterFrameBuilder.h +++ b/daemon/BlockCounterFrameBuilder.h @@ -38,11 +38,14 @@ class BlockCounterFrameBuilder : public IBlockCounterFrameBuilder { virtual bool check(const uint64_t time) override; + virtual bool flush() override; + private: IRawFrameBuilder & rawBuilder; std::shared_ptr flushIsNeeded; bool isFrameStarted = false; bool ensureFrameStarted(); + bool endFrame(); bool checkSpace(const int bytes); }; diff --git a/daemon/Buffer.cpp b/daemon/Buffer.cpp index 1aa55085..22542168 100644 --- a/daemon/Buffer.cpp +++ b/daemon/Buffer.cpp @@ -100,7 +100,7 @@ int Buffer::bytesAvailable() const // this is full. remaining -= 200; - return remaining; + return std::max(remaining, 0); } void Buffer::waitForSpace(int bytes) @@ -226,3 +226,20 @@ void Buffer::setDone() // as sender waits for new data *and* EOF sem_post(&mReaderSem); } + +int Buffer::getWriteIndex() const +{ + return mWritePos; +} + +void Buffer::advanceWrite(int bytes) +{ + mWritePos = (mWritePos + bytes) & /*mask*/ (mSize - 1); +} + +void Buffer::writeDirect(int index, const void * data, std::size_t count) +{ + for (std::size_t i = 0; i < count; ++i) { + mBuf[(index + i) & mask] = static_cast(data)[i]; + } +} diff --git a/daemon/Buffer.h b/daemon/Buffer.h index 3d1650a2..033e103e 100644 --- a/daemon/Buffer.h +++ b/daemon/Buffer.h @@ -13,7 +13,7 @@ #include #include -class Buffer : public IBufferControl, public IRawFrameBuilder { +class Buffer : public IBufferControl, public IRawFrameBuilderWithDirectAccess { public: Buffer(int size, sem_t & readerSem, bool includeResponseType); #ifdef BUFFER_USE_SESSION_DATA @@ -32,7 +32,10 @@ class Buffer : public IBufferControl, public IRawFrameBuilder { // Prefer a new member to using these functions if possible char * getWritePos() { return mBuf + mWritePos; } - void advanceWrite(int bytes) { mWritePos = (mWritePos + bytes) & /*mask*/ (mSize - 1); } + + int getWriteIndex() const override; + void advanceWrite(int bytes) override; + void writeDirect(int index, const void * data, std::size_t count) override; int packInt(int32_t x) override; int packInt64(int64_t x) override; diff --git a/daemon/CCNDriver.cpp b/daemon/CCNDriver.cpp index cd28f6b9..f29522b9 100644 --- a/daemon/CCNDriver.cpp +++ b/daemon/CCNDriver.cpp @@ -393,8 +393,11 @@ std::string CCNDriver::validateCounters() continue; } - if (strncmp(counter.getType(), ARM_CCN_5XX_CNT, sizeof(ARM_CCN_5XX_CNT) - 1) == 0) { - const int node = counter.getEvent() & mask; + const auto isCnnCounter = (strncmp(counter.getType(), ARM_CCN_5XX_CNT, sizeof(ARM_CCN_5XX_CNT) - 1) == 0); + const auto & eventCode = counter.getEventCode(); + + if (isCnnCounter && eventCode.isValid()) { + const int node = eventCode.asI32() & mask; for (auto & count : counts) { if (count[0] == 0) { diff --git a/daemon/CapturedXML.cpp b/daemon/CapturedXML.cpp index b157d2a5..52f4d154 100644 --- a/daemon/CapturedXML.cpp +++ b/daemon/CapturedXML.cpp @@ -3,6 +3,7 @@ #include "CapturedXML.h" #include "CapturedSpe.h" +#include "Constant.h" #include "ICpuInfo.h" #include "Logging.h" #include "OlyUtility.h" @@ -68,6 +69,19 @@ static const char * detectOs() } #endif +static std::string modeAsString(const ConstantMode mode) +{ + switch (mode) { + case ConstantMode::SystemWide: + return "system-wide"; + case ConstantMode::PerCore: + return "per-core"; + default: + logg.logError("Unexpected ConstantMode %d", static_cast(mode)); + handleException(); + } +} + /** Generate the xml tree for capture.xml */ static mxml_node_t * getTree(bool includeTime, lib::Span spes, @@ -152,8 +166,8 @@ static mxml_node_t * getTree(bool includeTime, mxml_node_t * const node = mxmlNewElement(counters, "counter"); mxmlElementSetAttrf(node, "key", "0x%x", counter.getKey()); mxmlElementSetAttr(node, "type", counter.getType()); - if (counter.getEvent() != -1) { - mxmlElementSetAttrf(node, "event", "0x%x", counter.getEvent()); + if (counter.getEventCode().isValid()) { + mxmlElementSetAttrf(node, "event", "0x%" PRIxEventCode, counter.getEventCode().asU64()); } if (counter.getCount() > 0) { mxmlElementSetAttrf(node, "count", "%d", counter.getCount()); @@ -164,6 +178,23 @@ static mxml_node_t * getTree(bool includeTime, } } + for (const auto & constant : gSessionData.mConstants) { + + const std::string mode = modeAsString(constant.getMode()); + + if (counters == nullptr) { + counters = mxmlNewElement(captured, "counters"); + } + mxml_node_t * const node = mxmlNewElement(counters, "counter"); + + mxmlElementSetAttrf(node, "key", "0x%x", constant.getKey()); + mxmlElementSetAttr(node, "counter", constant.getCounterString().c_str()); + mxmlElementSetAttr(node, "title", constant.getTitle().c_str()); + mxmlElementSetAttr(node, "name", constant.getName().c_str()); + mxmlElementSetAttr(node, "class", "constant"); + mxmlElementSetAttr(node, "mode", mode.c_str()); + } + for (const auto & spe : spes) { if (counters == nullptr) { counters = mxmlNewElement(captured, "counters"); diff --git a/daemon/Child.cpp b/daemon/Child.cpp index e9d23b93..ed69c4c0 100644 --- a/daemon/Child.cpp +++ b/daemon/Child.cpp @@ -8,6 +8,7 @@ #include "CounterXML.h" #include "Driver.h" #include "Drivers.h" +#include "ExitStatus.h" #include "ExternalSource.h" #include "ICpuInfo.h" #include "LocalCapture.h" @@ -43,13 +44,6 @@ std::atomic Child::gSingleton = ATOMIC_VAR_INIT(nullptr); extern void cleanUp(); -constexpr int exceptionExitCode = 1; -constexpr int secondExceptionExitCode = 2; -// constexpr int secondSignalExitCode = 3; no longer used -// constexpr int alarmExitCode = 4; no longer used -constexpr int noSingletonExitCode = 5; -constexpr int signalFailedExitCode = 6; - void handleException() { Child * const singleton = Child::getSingleton(); @@ -69,7 +63,7 @@ void handleException() // don't call exit handlers / global destructors // because other threads may be still running - _exit(exceptionExitCode); + _exit(EXCEPTION_EXIT_CODE); } std::unique_ptr Child::createLocal(Drivers & drivers, const Child::Config & config) @@ -93,7 +87,7 @@ void Child::signalHandler(int signum) if (singleton == nullptr) { // this should not be possible because we set the singleton before // installing the handlers - exit(noSingletonExitCode); + exit(NO_SINGLETON_EXIT_CODE); } singleton->endSession(signum); @@ -188,9 +182,11 @@ void Child::run() checkError(configuration_xml::setCounters(counterConfigs, !countersAreDefaults, drivers)); - // Initialize all drivers + // Initialize all drivers and register their constants with the global constant list for (Driver * driver : drivers.getAll()) { driver->resetCounters(); + + driver->insertConstants(gSessionData.mConstants); } // Set up counters using the associated driver's setup function @@ -435,7 +431,7 @@ void Child::endSession(int signum) if (signum != 0) { // we're in a signal handler so it's not safe to log // and if this has failed something has gone really wrong - _exit(signalFailedExitCode); + _exit(SIGNAL_FAILED_EXIT_CODE); } logg.logError("write failed (%d) %s", errno, strerror(errno)); handleException(); @@ -465,7 +461,7 @@ void Child::cleanupException() logg.logMessage("Received multiple exceptions, terminating the child"); // Something is really wrong, exit immediately - _exit(secondExceptionExitCode); + _exit(SECOND_EXCEPTION_EXIT_CODE); } if (command) { @@ -544,6 +540,16 @@ namespace { sender.writeData(nullptr, 0, ResponseType::ACK); return State::PROCESS_COMMANDS; } + State handleExit() override + { + logg.logMessage("INVESTIGATE: Received unknown command type COMMAND_EXIT"); + return State::EXIT_OK; + } + State handleRequestCurrentConfig() override + { + logg.logMessage("INVESTIGATE: Received unknown command type COMMAND_REQUEST_CURRENT_CONFIG"); + return State::PROCESS_COMMANDS; + } private: Sender & sender; diff --git a/daemon/Config.h b/daemon/Config.h index 5e90a8ad..647812bb 100644 --- a/daemon/Config.h +++ b/daemon/Config.h @@ -10,10 +10,6 @@ #define MAX_PERFORMANCE_COUNTERS 100 -// If debugfs is not mounted at /sys/kernel/debug, update TRACING_PATH -#define TRACING_PATH "/sys/kernel/debug/tracing" -#define EVENTS_PATH TRACING_PATH "/events" - // feature control options #ifndef CONFIG_PREFER_SYSTEM_WIDE_MODE #define CONFIG_PREFER_SYSTEM_WIDE_MODE 1 @@ -31,6 +27,10 @@ #define GATORD_BUILD_ID "oss" #endif +#ifndef GATOR_SELF_PROFILE +#define GATOR_SELF_PROFILE 0 +#endif + // assume /proc/sys/kernel/perf_event_paranoid == 2 if it cannot be read #ifndef CONFIG_ASSUME_PERF_HIGH_PARANOIA #define CONFIG_ASSUME_PERF_HIGH_PARANOIA 1 diff --git a/daemon/Configuration.h b/daemon/Configuration.h index 6d1c7efa..dbabe9d1 100644 --- a/daemon/Configuration.h +++ b/daemon/Configuration.h @@ -3,6 +3,8 @@ #ifndef CONFIGURATION_H_ #define CONFIGURATION_H_ +#include "EventCode.h" + #include #include #include @@ -46,7 +48,7 @@ namespace std { struct CounterConfiguration { std::string counterName {}; - int event = -1; + EventCode event {}; int count = 0; int cores = 0; }; diff --git a/daemon/ConfigurationXML.cpp b/daemon/ConfigurationXML.cpp index cf5f5b9f..36b263c0 100644 --- a/daemon/ConfigurationXML.cpp +++ b/daemon/ConfigurationXML.cpp @@ -40,13 +40,13 @@ static void appendError(std::ostream & error, const std::string & possibleError) namespace configuration_xml { static bool addCounter(const char * counterName, - int event, + const EventCode & event, int count, int cores, int mIndex, bool printWarningIfUnclaimed, lib::Span drivers, - const std::map & counterToEventMap); + const std::map & counterToEventMap); Contents getConfigurationXML(lib::Span clusters) { @@ -130,7 +130,7 @@ namespace configuration_xml { for (auto & mCounter : gSessionData.mCounters) { mCounter.setEnabled(false); } - const std::map counterToEventMap = + const std::map counterToEventMap = events_xml::getCounterToEventMap(drivers.getAllConst(), drivers.getPrimarySourceProvider().getCpuInfo().getClusters()); //Add counter @@ -219,22 +219,23 @@ namespace configuration_xml { } static bool addCounter(const char * counterName, - int event, + const EventCode & event, int count, int cores, int mIndex, bool printWarningIfUnclaimed, lib::Span drivers, - const std::map & counterToEventMap) + const std::map & counterToEventMap) { - const auto end = counterToEventMap.end(); - const auto it = - std::find_if(counterToEventMap.begin(), end, [&counterName](const std::pair & pair) { - return strcasecmp(pair.first.c_str(), counterName) == 0; - }); - const bool hasEventsXmlCounter = (it != end); - const int counterEvent = (hasEventsXmlCounter ? it->second : -1); + const auto eventsXmlCounterEnd = counterToEventMap.end(); + const auto eventsXmlCounterIterator = + std::find_if(counterToEventMap.begin(), + eventsXmlCounterEnd, + [&counterName](const std::pair & pair) { + return strcasecmp(pair.first.c_str(), counterName) == 0; + }); + // read attributes Counter & counter = gSessionData.mCounters[mIndex]; counter.clear(); @@ -245,13 +246,15 @@ namespace configuration_xml { // overriding anything from user map. This is necessary for cycle counters for example where // they have a name "XXX_ccnt" but also often an event code. If not the event code -1 is used // which is incorrect. - if (hasEventsXmlCounter) { - counter.setEvent(counterEvent); + if (eventsXmlCounterIterator != eventsXmlCounterEnd) { + if (eventsXmlCounterIterator->second.isValid()) { + counter.setEventCode(eventsXmlCounterIterator->second); + } } // the counter is not in events.xml. This usually means it is a PMU slot counter // the user specified the event code, use that - else if (event > -1) { - counter.setEvent(event); + else if (event.isValid()) { + counter.setEventCode(event); } // the counter is not in events.xml. This usually means it is a PMU slot counter, but since // the user has not specified an event code, this is probably incorrect. @@ -273,9 +276,10 @@ namespace configuration_xml { for (Driver * driver : drivers) { if (driver->claimCounter(counter)) { if ((counter.getDriver() != nullptr) && (counter.getDriver() != driver)) { - logg.logError("More than one driver has claimed %s:%i (%s vs %s)", + const auto & optionalEventCode = counter.getEventCode(); + logg.logError("More than one driver has claimed %s:0x%" PRIxEventCode " (%s vs %s)", counter.getType(), - counter.getEvent(), + (optionalEventCode.isValid() ? optionalEventCode.asU64() : 0), counter.getDriver()->getName(), driver->getName()); handleException(); @@ -286,7 +290,11 @@ namespace configuration_xml { // If no driver is associated with the counter, disable it if (counter.getDriver() == nullptr) { if (printWarningIfUnclaimed) { - logg.logWarning("No driver has claimed %s:%i", counter.getType(), counter.getEvent()); + const auto & optionalEventCode = counter.getEventCode(); + + logg.logWarning("No driver has claimed %s:0x%" PRIxEventCode, + counter.getType(), + (optionalEventCode.isValid() ? optionalEventCode.asU64() : 0)); } counter.setEnabled(false); } diff --git a/daemon/ConfigurationXMLParser.cpp b/daemon/ConfigurationXMLParser.cpp index d7f40560..ba00c3e5 100644 --- a/daemon/ConfigurationXMLParser.cpp +++ b/daemon/ConfigurationXMLParser.cpp @@ -75,14 +75,14 @@ int ConfigurationXMLParser::readCounter(mxml_node_t * node) } counter.cores = cores; } - int event; + long long event; if (eventStr != nullptr) { - if (!stringToInt(&event, eventStr, 16)) { + if (!stringToLongLong(&event, eventStr, 16)) { logg.logError("Configuration XML event must be an integer"); return PARSER_ERROR; } else { - counter.event = event; + counter.event = EventCode(event); } } counterConfigurations.push_back(counter); diff --git a/daemon/Constant.cpp b/daemon/Constant.cpp new file mode 100644 index 00000000..627565ef --- /dev/null +++ b/daemon/Constant.cpp @@ -0,0 +1,13 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#include "Constant.h" + +Constant::Constant(CounterKey key, std::string counterString, std::string title, std::string name, ConstantMode mode) + : mKey(key), mCounterString(std::move(counterString)), mTitle(std::move(title)), mName(std::move(name)), mMode(mode) +{ +} + +bool Constant::operator<(const Constant & rhs) const +{ + return mCounterString < rhs.mCounterString; +} diff --git a/daemon/Constant.h b/daemon/Constant.h new file mode 100644 index 00000000..c16a2797 --- /dev/null +++ b/daemon/Constant.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#ifndef CONSTANT_H +#define CONSTANT_H + +#include "ConstantMode.h" +#include "GetEventKey.h" +#include "lib/Optional.h" + +#include + +/** + * Represents something that a Driver can send at the start of a capture. + * An instance of this class is intended to be associated with a separate value + * that shouldn't change for the lifetime of a capture. + * + * The key identifies the meaning of the constant's associated value + * when it is transmitted down to Streamline. + */ +class Constant { +public: + Constant(CounterKey key, std::string counterString, std::string title, std::string name, ConstantMode mode); + + CounterKey getKey() const { return mKey; } + + const std::string & getCounterString() const { return mCounterString; } + + const std::string & getTitle() const { return mTitle; } + + const std::string & getName() const { return mName; } + + ConstantMode getMode() const { return mMode; } + + bool operator<(const Constant & rhs) const; + +private: + CounterKey mKey; + std::string mCounterString; + std::string mTitle; + std::string mName; + ConstantMode mMode; +}; + +#endif // CONSTANT_H diff --git a/daemon/ConstantMode.h b/daemon/ConstantMode.h new file mode 100644 index 00000000..0b8fd427 --- /dev/null +++ b/daemon/ConstantMode.h @@ -0,0 +1,8 @@ +/* Copyright (C) 2013-2020 by Arm Limited. All rights reserved. */ + +#ifndef CONSTANT_MODE_H +#define CONSTANT_MODE_H + +enum class ConstantMode { SystemWide, PerCore }; + +#endif // CONSTANT_MODE_H diff --git a/daemon/Counter.h b/daemon/Counter.h index d364182c..ef96b5df 100644 --- a/daemon/Counter.h +++ b/daemon/Counter.h @@ -3,6 +3,8 @@ #ifndef COUNTER_H #define COUNTER_H +#include "EventCode.h" + #include #include @@ -13,7 +15,7 @@ class Counter { static const size_t MAX_STRING_LEN = 80; static const size_t MAX_DESCRIPTION_LEN = 400; - Counter() : mType(), mEnabled(false), mEvent(-1), mCount(0), mCores(-1), mKey(0), mDriver(nullptr) + Counter() : mType(), mEnabled(false), mEvent(), mCount(0), mCores(-1), mKey(0), mDriver(nullptr) { mType[0] = '\0'; } @@ -30,7 +32,7 @@ class Counter { mType[sizeof(mType) - 1] = '\0'; } void setEnabled(const bool enabled) { mEnabled = enabled; } - void setEvent(const int event) { mEvent = event; } + void setEventCode(const EventCode event) { mEvent = event; } void setCount(const int count) { mCount = count; } void setCores(const int cores) { mCores = cores; } void setKey(const int key) { mKey = key; } @@ -38,7 +40,7 @@ class Counter { const char * getType() const { return mType; } bool isEnabled() const { return mEnabled; } - int getEvent() const { return mEvent; } + EventCode getEventCode() const { return mEvent; } int getCount() const { return mCount; } int getCores() const { return mCores; } int getKey() const { return mKey; } @@ -53,7 +55,7 @@ class Counter { char mType[MAX_STRING_LEN]; bool mEnabled; - int mEvent; + EventCode mEvent; int mCount; int mCores; int mKey; diff --git a/daemon/DiskIODriver.cpp b/daemon/DiskIODriver.cpp index f32aa094..88eaa22c 100644 --- a/daemon/DiskIODriver.cpp +++ b/daemon/DiskIODriver.cpp @@ -83,6 +83,7 @@ void DiskIODriver::doRead() int nameEnd = -1; uint64_t readBytes = -1; uint64_t writeBytes = -1; + // NOLINTNEXTLINE(cert-err34-c) const int count = sscanf(line, "%*d %*d %n%*s%n %*u %*u %" SCNu64 " %*u %*u %*u %" SCNu64, &nameStart, diff --git a/daemon/Driver.h b/daemon/Driver.h index 0167c7d5..883c14fe 100644 --- a/daemon/Driver.h +++ b/daemon/Driver.h @@ -4,10 +4,12 @@ #define DRIVER_H #include "CapturedSpe.h" +#include "Constant.h" #include "lib/Optional.h" #include "mxml/mxml.h" #include +#include class Counter; struct SpeConfiguration; @@ -27,6 +29,10 @@ class Driver { // Enables and prepares the counter for capture virtual void setupCounter(Counter & counter) = 0; + // Allow the driver the opportunity to insert a set of + // constants that it is capable of sending to Streamline + virtual void insertConstants(std::set &) {} + // Claims and prepares the SPE for capture virtual lib::Optional setupSpe(int /* sampleRate */, const SpeConfiguration & /* configuration */) { diff --git a/daemon/DriverCounter.cpp b/daemon/DriverCounter.cpp index b12e2f7d..01b63684 100644 --- a/daemon/DriverCounter.cpp +++ b/daemon/DriverCounter.cpp @@ -2,7 +2,7 @@ #include "DriverCounter.h" -#include "SessionData.h" +#include "GetEventKey.h" DriverCounter::DriverCounter(DriverCounter * const next, const char * const name) : mNext(next), mName(name), mKey(getEventKey()), mEnabled(false) diff --git a/daemon/Drivers.cpp b/daemon/Drivers.cpp index b691bc91..95d37648 100644 --- a/daemon/Drivers.cpp +++ b/daemon/Drivers.cpp @@ -7,12 +7,16 @@ #include "xml/EventsXML.h" static std::unique_ptr createPrimarySourceProvider(bool systemWide, + const TraceFsConstants & traceFsConstants, PmuXML && pmuXml, const char * maliFamilyName, bool disableCpuOnlining) { - std::unique_ptr primarySourceProvider = - PrimarySourceProvider::detect(systemWide, std::move(pmuXml), maliFamilyName, disableCpuOnlining); + std::unique_ptr primarySourceProvider = PrimarySourceProvider::detect(systemWide, + traceFsConstants, + std::move(pmuXml), + maliFamilyName, + disableCpuOnlining); if (!primarySourceProvider) { logg.logError( "Unable to initialize primary capture source:\n" @@ -24,14 +28,16 @@ static std::unique_ptr createPrimarySourceProvider(bool s return primarySourceProvider; } -Drivers::Drivers(bool systemWide, PmuXML && pmuXml, bool disableCpuOnlining) +Drivers::Drivers(bool systemWide, PmuXML && pmuXml, bool disableCpuOnlining, const TraceFsConstants & traceFsConstants) : mMaliHwCntrs {}, mPrimarySourceProvider {createPrimarySourceProvider(systemWide, + traceFsConstants, std::move(pmuXml), mMaliHwCntrs.getSupportedDeviceFamilyName(), disableCpuOnlining)}, mMidgard {}, - mFtraceDriver {!mPrimarySourceProvider->supportsTracepointCapture(), + mFtraceDriver {traceFsConstants, + !mPrimarySourceProvider->supportsTracepointCapture(), mPrimarySourceProvider->getCpuInfo().getCpuIds().size()}, mAtraceDriver {mFtraceDriver}, mTtraceDriver {mFtraceDriver}, diff --git a/daemon/Drivers.h b/daemon/Drivers.h index 57818436..97b84a86 100644 --- a/daemon/Drivers.h +++ b/daemon/Drivers.h @@ -19,7 +19,7 @@ class Drivers { public: - Drivers(bool systemWide, PmuXML && pmuXml, bool disableCpuOnlining); + Drivers(bool systemWide, PmuXML && pmuXml, bool disableCpuOnlining, const TraceFsConstants & traceFsConstants); armnn::Driver & getArmnnDriver() { return mArmnnDriver; } diff --git a/daemon/DynBuf.cpp b/daemon/DynBuf.cpp index 2de1924e..5b23234c 100644 --- a/daemon/DynBuf.cpp +++ b/daemon/DynBuf.cpp @@ -99,6 +99,7 @@ int DynBuf::readlink(const char * const path) return 0; } +// NOLINTNEXTLINE(cert-dcl50-cpp) bool DynBuf::printf(const char * format, ...) { va_list ap; @@ -112,6 +113,7 @@ bool DynBuf::printf(const char * format, ...) return result; } +// NOLINTNEXTLINE(cert-dcl50-cpp) bool DynBuf::append(const char * format, ...) { va_list ap; diff --git a/daemon/EventCode.h b/daemon/EventCode.h new file mode 100644 index 00000000..dbaf84af --- /dev/null +++ b/daemon/EventCode.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#pragma once + +#include +#include +#include + +class EventCode { +public: + static constexpr std::uint64_t INVALID_EVENT_CODE_VALUE = ~0ull; + + constexpr EventCode() noexcept : value(INVALID_EVENT_CODE_VALUE) {} + constexpr explicit EventCode(int value) noexcept : value(value & 0xffffffffull) {} + constexpr explicit EventCode(long long value) noexcept : value(value) {} + constexpr explicit EventCode(unsigned long long value) noexcept : value(value) {} + + constexpr EventCode(const EventCode & that) noexcept : value(that.value) {} + EventCode & operator=(const EventCode & that) noexcept + { + if (this != &that) { + value = that.value; + } + return *this; + } + EventCode(EventCode && that) noexcept + : value(that.value) // FIXME: C++14, make constexpr, using std::exchange(that.value, INVALID_EVENT_CODE_VALUE) + { + that.value = INVALID_EVENT_CODE_VALUE; + } + EventCode & operator=(EventCode && that) noexcept + { + if (this != &that) { + value = that.value; + that.value = INVALID_EVENT_CODE_VALUE; + } + return *this; + } + + constexpr bool isValid() const noexcept { return value != INVALID_EVENT_CODE_VALUE; } + constexpr std::uint64_t asU64() const noexcept { return value; } + constexpr std::int32_t asI32() const noexcept { return value; } + + constexpr bool operator==(const EventCode & that) const noexcept { return (value == that.value); } + constexpr bool operator<(const EventCode & that) const noexcept { return value < that.value; } + +private: + std::uint64_t value; +}; + +#define PRIxEventCode PRIx64 +#define PRIuEventCode PRIu64 diff --git a/daemon/Events.h b/daemon/Events.h index 0f5e4ece..7418d5f7 100644 --- a/daemon/Events.h +++ b/daemon/Events.h @@ -2,6 +2,7 @@ #pragma once +#include "EventCode.h" #include "lib/Optional.h" #include @@ -11,7 +12,7 @@ struct Event { enum Class { DELTA, INCIDENT, ABSOLUTE, ACTIVITY }; // at least one of eventNumber or counter should be present - lib::Optional eventNumber; + EventCode eventNumber; lib::Optional counter; Class clazz; double multiplier; diff --git a/daemon/ExitStatus.h b/daemon/ExitStatus.h new file mode 100644 index 00000000..826faa96 --- /dev/null +++ b/daemon/ExitStatus.h @@ -0,0 +1,15 @@ +/* Copyright (C) 2010-2020 by Arm Limited. All rights reserved. */ + +#ifndef EXITSTATUS_H_ +#define EXITSTATUS_H_ + +static constexpr int EXCEPTION_EXIT_CODE = 1; +static constexpr int SECOND_EXCEPTION_EXIT_CODE = 2; +// constexpr int secondSignalExitCode = 3; no longer used +// constexpr int alarmExitCode = 4; no longer used +static constexpr int NO_SINGLETON_EXIT_CODE = 5; +static constexpr int SIGNAL_FAILED_EXIT_CODE = 6; +// child will return this exit code on exit_ok command +static constexpr int OK_TO_EXIT_GATOR_EXIT_CODE = 7; + +#endif /* EXITSTATUS_H_ */ diff --git a/daemon/ExternalDriver.cpp b/daemon/ExternalDriver.cpp index 1908203f..8a008c30 100644 --- a/daemon/ExternalDriver.cpp +++ b/daemon/ExternalDriver.cpp @@ -59,17 +59,17 @@ static int readPackedInt(const char * const buf, const size_t bufSize, size_t * class ExternalCounter : public DriverCounter { public: ExternalCounter(DriverCounter * next, const char * name, int cores) - : DriverCounter(next, name), mCores(cores), mEvent(-1) + : DriverCounter(next, name), mCores(cores), mEvent() { } int getCores() const { return mCores; } - void setEvent(const int event) { mEvent = event; } - int getEvent() const { return mEvent; } + void setEvent(EventCode event) { mEvent = event; } + EventCode getEvent() const { return mEvent; } private: const int mCores; - int mEvent; + EventCode mEvent; // Intentionally undefined ExternalCounter(const ExternalCounter &) = delete; @@ -202,7 +202,7 @@ void ExternalDriver::start() } memcpy(buf + pos, counter->getName(), nameLen + 1); pos += nameLen + 1; - buffer_utils::packInt(buf, pos, counter->getEvent()); + buffer_utils::packInt(buf, pos, (counter->getEvent().isValid() ? counter->getEvent().asI32() : -1)); buffer_utils::packInt(buf, pos, counter->getKey()); } buffer_utils::writeLEInt(buf + 1, pos); @@ -258,10 +258,8 @@ void ExternalDriver::setupCounter(Counter & counter) return; } externalCounter->setEnabled(true); + externalCounter->setEvent(counter.getEventCode()); counter.setKey(externalCounter->getKey()); - if (counter.getEvent() != -1) { - externalCounter->setEvent(counter.getEvent()); - } if (externalCounter->getCores() > 0) { counter.setCores(externalCounter->getCores()); } diff --git a/daemon/ExternalSource.cpp b/daemon/ExternalSource.cpp index 5eb17e1a..e7f0ea2a 100644 --- a/daemon/ExternalSource.cpp +++ b/daemon/ExternalSource.cpp @@ -7,6 +7,7 @@ #include "Buffer.h" #include "BufferUtils.h" #include "Child.h" +#include "CommitTimeChecker.h" #include "Drivers.h" #include "Logging.h" #include "Monitor.h" @@ -32,11 +33,15 @@ static const char MALI_UTGARD_STARTUP[] = "\0mali-utgard-startup"; static const char FTRACE_V1[] = "FTRACE 1\n"; static const char FTRACE_V2[] = "FTRACE 2\n"; +static constexpr int BUFFER_SIZE = 1 * 1024 * 1024; + class ExternalSource : public Source { public: - ExternalSource(sem_t & senderSem, Drivers & mDrivers) + ExternalSource(sem_t & senderSem, Drivers & mDrivers, std::function getMonotonicTime) : mBufferSem(), - mBuffer(128 * 1024, senderSem), + mGetMonotonicTime(std::move(getMonotonicTime)), + mCommitChecker(gSessionData.mLiveRate), + mBuffer(BUFFER_SIZE, senderSem), mMonitor(), mMidgardStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mUtgardStartupUds(MALI_UTGARD_STARTUP, sizeof(MALI_UTGARD_STARTUP)), @@ -156,7 +161,7 @@ class ExternalSource : public Source { return true; } - void run(std::uint64_t /* monotonicStart */, std::function endSession) override + void run(std::uint64_t monotonicStart, std::function endSession) override { prctl(PR_SET_NAME, reinterpret_cast(&"gatord-external"), 0, 0, 0); @@ -235,7 +240,7 @@ class ExternalSource : public Source { * starve out the gator data. */ while (mSessionIsActive) { - if (!transfer(fd, endSession)) { + if (!transfer(monotonicStart, fd, endSession)) { break; } } @@ -247,17 +252,18 @@ class ExternalSource : public Source { const auto ftraceFds = mDrivers.getFtraceDriver().stop(); // Read any slop for (int fd : ftraceFds) { - transfer(fd, endSession); + transfer(monotonicStart, fd, endSession); close(fd); } mDrivers.getTtraceDriver().stop(); mDrivers.getAtraceDriver().stop(); } + mBuffer.flush(); mBuffer.setDone(); } - bool transfer(const int fd, const std::function & endSession) + bool transfer(const std::uint64_t monotonicStart, const int fd, const std::function & endSession) { // Wait until there is enough room for a header and two ints waitFor(IRawFrameBuilder::MAX_FRAME_HEADER_SIZE + 2 * buffer_utils::MAXSIZE_PACK32, endSession); @@ -278,14 +284,14 @@ class ExternalSource : public Source { mBuffer.packInt(fd); mBuffer.endFrame(); // Always force-flush the buffer as this frame don't work like others - mBuffer.flush(); + checkFlush(monotonicStart, true); close(fd); return false; } mBuffer.advanceWrite(bytes); mBuffer.endFrame(); - mBuffer.flush(); + checkFlush(monotonicStart, isBufferOverFull(mBuffer.contiguousSpaceAvailable())); // Short reads also mean nothing is left to read return bytes >= contiguous; @@ -312,6 +318,8 @@ class ExternalSource : public Source { private: sem_t mBufferSem; + std::function mGetMonotonicTime; + CommitTimeChecker mCommitChecker; Buffer mBuffer; Monitor mMonitor; OlyServerSocket mMidgardStartupUds; @@ -325,11 +333,26 @@ class ExternalSource : public Source { int mMidgardUds; Drivers & mDrivers; std::atomic_bool mSessionIsActive {true}; + + void checkFlush(std::uint64_t monotonicStart, bool force) + { + const auto delta = mGetMonotonicTime() - monotonicStart; + + if (mCommitChecker(delta, force)) { + mBuffer.flush(); + } + } + + static bool isBufferOverFull(int sizeAvailable) + { + // if less than a quarter left + return (sizeAvailable < (BUFFER_SIZE / 4)); + } }; std::unique_ptr createExternalSource(sem_t & senderSem, Drivers & drivers) { - auto source = lib::make_unique(senderSem, drivers); + auto source = lib::make_unique(senderSem, drivers, &getTime); if (!source->prepare()) { return {}; } diff --git a/daemon/FtraceDriver.cpp b/daemon/FtraceDriver.cpp index 6b2db402..2e2f2ad7 100644 --- a/daemon/FtraceDriver.cpp +++ b/daemon/FtraceDriver.cpp @@ -63,7 +63,10 @@ void Barrier::wait() class FtraceCounter : public DriverCounter { public: - FtraceCounter(DriverCounter * next, const char * name, const char * enable); + FtraceCounter(DriverCounter * next, + const TraceFsConstants & traceFsConstants, + const char * name, + const char * enable); ~FtraceCounter() override; bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer); @@ -72,6 +75,7 @@ class FtraceCounter : public DriverCounter { void stop(); private: + const TraceFsConstants & traceFsConstants; char * const mEnable; int mWasEnabled; @@ -82,8 +86,14 @@ class FtraceCounter : public DriverCounter { FtraceCounter & operator=(FtraceCounter &&) = delete; }; -FtraceCounter::FtraceCounter(DriverCounter * next, const char * name, const char * enable) - : DriverCounter(next, name), mEnable(enable == nullptr ? nullptr : strdup(enable)), mWasEnabled(0) +FtraceCounter::FtraceCounter(DriverCounter * next, + const TraceFsConstants & traceFsConstants, + const char * name, + const char * enable) + : DriverCounter(next, name), + traceFsConstants(traceFsConstants), + mEnable(enable == nullptr ? nullptr : strdup(enable)), + mWasEnabled(0) { } @@ -108,7 +118,7 @@ void FtraceCounter::prepare() } char buf[1 << 10]; - snprintf(buf, sizeof(buf), EVENTS_PATH "/%s/enable", mEnable); + snprintf(buf, sizeof(buf), "%s/%s/enable", traceFsConstants.path__events, mEnable); if ((lib::readIntFromFile(buf, mWasEnabled) != 0) || (lib::writeIntToFile(buf, 1) != 0)) { logg.logError("Unable to read or write to %s", buf); handleException(); @@ -122,13 +132,13 @@ void FtraceCounter::stop() } char buf[1 << 10]; - snprintf(buf, sizeof(buf), EVENTS_PATH "/%s/enable", mEnable); + snprintf(buf, sizeof(buf), "%s/%s/enable", traceFsConstants.path__events, mEnable); lib::writeIntToFile(buf, mWasEnabled); } bool FtraceCounter::readTracepointFormat(IPerfAttrsConsumer & attrsConsumer) { - return ::readTracepointFormat(attrsConsumer, mEnable); + return ::readTracepointFormat(attrsConsumer, traceFsConstants.path__events, mEnable); } static void handlerUsr1(int signum) @@ -314,8 +324,9 @@ void FtraceReader::run() // Intentionally don't close mPfd0 as it is used after this thread is exited to read the slop } -FtraceDriver::FtraceDriver(bool useForTracepoints, size_t numberOfCores) +FtraceDriver::FtraceDriver(const TraceFsConstants & traceFsConstants, bool useForTracepoints, size_t numberOfCores) : SimpleDriver("Ftrace"), + traceFsConstants(traceFsConstants), mBarrier(), mTracingOn(0), mSupported(false), @@ -346,7 +357,7 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) mMonotonicRawSupport = kernelVersion >= KERNEL_VERSION(4, 2, 0); // Is debugfs or tracefs available? - if (access(TRACING_PATH, R_OK) != 0) { + if (access(traceFsConstants.path, R_OK) != 0) { mSupported = false; logg.logSetup("Ftrace is disabled\nUnable to locate the tracing directory"); return; @@ -392,7 +403,7 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) } if (enable != nullptr) { char buf[1 << 10]; - snprintf(buf, sizeof(buf), EVENTS_PATH "/%s/enable", enable); + snprintf(buf, sizeof(buf), "%s/%s/enable", traceFsConstants.path__events, enable); if (access(buf, W_OK) != 0) { logg.logSetup("%s is disabled\n%s was not found", counter, buf); continue; @@ -400,7 +411,7 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) } logg.logMessage("Using ftrace for %s", counter); - setCounters(new FtraceCounter(getCounters(), counter, enable)); + setCounters(new FtraceCounter(getCounters(), traceFsConstants, counter, enable)); } } @@ -408,7 +419,7 @@ std::pair, bool> FtraceDriver::prepare() { if (gSessionData.mFtraceRaw) { // Don't want the performace impact of sending all formats so gator only sends it for the enabled counters. This means other counters need to be disabled - if (lib::writeCStringToFile(TRACING_PATH "/events/enable", "0") != 0) { + if (lib::writeCStringToFile(traceFsConstants.path__events__enable, "0") != 0) { logg.logError("Unable to turn off all events"); handleException(); } @@ -422,12 +433,12 @@ std::pair, bool> FtraceDriver::prepare() counter->prepare(); } - if (lib::readIntFromFile(TRACING_PATH "/tracing_on", mTracingOn) != 0) { + if (lib::readIntFromFile(traceFsConstants.path__tracing_on, mTracingOn) != 0) { logg.logError("Unable to read if ftrace is enabled"); handleException(); } - if (lib::writeCStringToFile(TRACING_PATH "/tracing_on", "0") != 0) { + if (lib::writeCStringToFile(traceFsConstants.path__tracing_on, "0") != 0) { logg.logError("Unable to turn ftrace off before truncating the buffer"); handleException(); } @@ -435,7 +446,7 @@ std::pair, bool> FtraceDriver::prepare() { int fd; // The below call can be slow on loaded high-core count systems. - fd = open(TRACING_PATH "/trace", O_WRONLY | O_TRUNC | O_CLOEXEC, 0666); + fd = open(traceFsConstants.path__trace, O_WRONLY | O_TRUNC | O_CLOEXEC, 0666); if (fd < 0) { logg.logError("Unable truncate ftrace buffer: %s", strerror(errno)); handleException(); @@ -443,7 +454,6 @@ std::pair, bool> FtraceDriver::prepare() close(fd); } - const char * const trace_clock_path = TRACING_PATH "/trace_clock"; const char * const clock = mMonotonicRawSupport ? "mono_raw" : "perf"; const char * const clock_selected = mMonotonicRawSupport ? "[mono_raw]" : "[perf]"; const size_t max_trace_clock_file_length = 200; @@ -455,13 +465,13 @@ std::pair, bool> FtraceDriver::prepare() // core count systems. The idea is that hopefully only on the first // capture, the trace clock needs to be changed. On subsequent captures, // the right clock is already being used. - int fd = open(trace_clock_path, O_RDONLY | O_CLOEXEC); + int fd = open(traceFsConstants.path__trace_clock, O_RDONLY | O_CLOEXEC); if (fd < 0) { - logg.logError("Couldn't open %s", trace_clock_path); + logg.logError("Couldn't open %s", traceFsConstants.path__trace_clock); handleException(); } if ((trace_clock_file_length = ::read(fd, trace_clock_file_content, max_trace_clock_file_length - 1)) < 0) { - logg.logError("Couldn't read from %s", trace_clock_path); + logg.logError("Couldn't read from %s", traceFsConstants.path__trace_clock); close(fd); handleException(); } @@ -474,7 +484,7 @@ std::pair, bool> FtraceDriver::prepare() // Writing to trace_clock can be very slow on loaded high core count // systems. - if (must_switch_clock && lib::writeCStringToFile(TRACING_PATH "/trace_clock", clock) != 0) { + if (must_switch_clock && lib::writeCStringToFile(traceFsConstants.path__trace_clock, clock) != 0) { logg.logError("Unable to switch ftrace to the %s clock, please ensure you are running Linux %s or later", clock, mMonotonicRawSupport ? "4.2" : "3.10"); @@ -482,7 +492,7 @@ std::pair, bool> FtraceDriver::prepare() } if (!gSessionData.mFtraceRaw) { - const int fd = open(TRACING_PATH "/trace_pipe", O_RDONLY | O_CLOEXEC); + const int fd = open(traceFsConstants.path__trace_pipe, O_RDONLY | O_CLOEXEC); if (fd < 0) { logg.logError("Unable to open trace_pipe"); handleException(); @@ -515,7 +525,7 @@ std::pair, bool> FtraceDriver::prepare() } char buf[64]; - snprintf(buf, sizeof(buf), TRACING_PATH "/per_cpu/cpu%zu/trace_pipe_raw", cpu); + snprintf(buf, sizeof(buf), "%s/per_cpu/cpu%zu/trace_pipe_raw", traceFsConstants.path, cpu); const int tfd = open(buf, O_RDONLY | O_CLOEXEC); (new FtraceReader(&mBarrier, cpu, tfd, pfd[0], pfd[1]))->start(); result.first.push_back(pfd[0]); @@ -526,7 +536,7 @@ std::pair, bool> FtraceDriver::prepare() void FtraceDriver::start() { - if (lib::writeCStringToFile(TRACING_PATH "/tracing_on", "1") != 0) { + if (lib::writeCStringToFile(traceFsConstants.path__tracing_on, "1") != 0) { logg.logError("Unable to turn ftrace on"); handleException(); } @@ -538,7 +548,7 @@ void FtraceDriver::start() std::vector FtraceDriver::stop() { - lib::writeIntToFile(TRACING_PATH "/tracing_on", mTracingOn); + lib::writeIntToFile(traceFsConstants.path__tracing_on, mTracingOn); for (auto * counter = static_cast(getCounters()); counter != nullptr; counter = static_cast(counter->getNext())) { @@ -567,7 +577,7 @@ bool FtraceDriver::readTracepointFormats(IPerfAttrsConsumer & attrsConsumer, Dyn return true; } - if (!printb->printf(EVENTS_PATH "/header_page")) { + if (!printb->printf("%s/header_page", traceFsConstants.path__events)) { logg.logMessage("DynBuf::printf failed"); return false; } @@ -577,7 +587,7 @@ bool FtraceDriver::readTracepointFormats(IPerfAttrsConsumer & attrsConsumer, Dyn } attrsConsumer.marshalHeaderPage(b->getBuf()); - if (!printb->printf(EVENTS_PATH "/header_event")) { + if (!printb->printf("%s/header_event", traceFsConstants.path__events)) { logg.logMessage("DynBuf::printf failed"); return false; } @@ -587,7 +597,7 @@ bool FtraceDriver::readTracepointFormats(IPerfAttrsConsumer & attrsConsumer, Dyn } attrsConsumer.marshalHeaderEvent(b->getBuf()); - std::unique_ptr dir {opendir(EVENTS_PATH "/ftrace"), &closedir}; + std::unique_ptr dir {opendir(traceFsConstants.path__events__ftrace), &closedir}; if (dir == nullptr) { logg.logError("Unable to open events ftrace folder"); handleException(); @@ -597,7 +607,7 @@ bool FtraceDriver::readTracepointFormats(IPerfAttrsConsumer & attrsConsumer, Dyn if (dirent->d_name[0] == '.' || dirent->d_type != DT_DIR) { continue; } - if (!printb->printf(EVENTS_PATH "/ftrace/%s/format", dirent->d_name)) { + if (!printb->printf("%s/%s/format", traceFsConstants.path__events__ftrace, dirent->d_name)) { logg.logMessage("DynBuf::printf failed"); return false; } diff --git a/daemon/FtraceDriver.h b/daemon/FtraceDriver.h index 62e2b1ed..0a49af88 100644 --- a/daemon/FtraceDriver.h +++ b/daemon/FtraceDriver.h @@ -4,6 +4,7 @@ #define FTRACEDRIVER_H #include "SimpleDriver.h" +#include "Tracepoints.h" #include #include @@ -29,7 +30,7 @@ class Barrier { class FtraceDriver : public SimpleDriver { public: - FtraceDriver(bool useForTracepoint, size_t numberOfCores); + FtraceDriver(const TraceFsConstants & traceFsConstants, bool useForTracepoint, size_t numberOfCores); void readEvents(mxml_node_t * xml) override; @@ -41,6 +42,7 @@ class FtraceDriver : public SimpleDriver { bool isSupported() const { return mSupported; } private: + const TraceFsConstants & traceFsConstants; Barrier mBarrier; int mTracingOn; bool mSupported, mMonotonicRawSupport, mUseForTracepoints; diff --git a/daemon/GatorCLIParser.cpp b/daemon/GatorCLIParser.cpp index 5493eda9..5e2599e1 100644 --- a/daemon/GatorCLIParser.cpp +++ b/daemon/GatorCLIParser.cpp @@ -114,19 +114,22 @@ SampleRate getSampleRate(const std::string & value) void GatorCLIParser::addCounter(int startpos, int pos, std::string & counters) { std::string counterType; - std::string subStr = counters.substr(startpos, pos); - int event = -1; + std::string subStr = counters.substr(startpos, pos - startpos); + EventCode event; size_t eventpos = 0; //TODO : support for A53:Cycles:1:2:8:0x1 if ((eventpos = subStr.find(':')) != std::string::npos) { - if (!stringToInt(&event, subStr.substr(eventpos + 1, subStr.size()).c_str(), 10)) { //check for decimal - if (!stringToInt(&event, subStr.substr(eventpos + 1, subStr.size()).c_str(), 16)) { //check for hex + auto eventStr = subStr.substr(eventpos + 1, subStr.size()); + long long eventCode; + if (!stringToLongLong(&eventCode, eventStr.c_str(), 10)) { //check for decimal + if (!stringToLongLong(&eventCode, eventStr.c_str(), 16)) { //check for hex logg.logError("event must be an integer"); result.mode = ExecutionMode::EXIT; return; } } + event = EventCode(eventCode); } if (eventpos == std::string::npos) { counterType = subStr; @@ -331,8 +334,6 @@ void GatorCLIParser::parseCLIArguments(int argc, const int optionInt = optarg == nullptr ? -1 : parseBoolean(optarg); SampleRate sampleRate; std::string value; - int startpos = -1; - size_t counterSplitPos = 0; switch (c) { case 'N': if (!stringToInt(&result.mAndroidApiLevel, optarg, 10)) { @@ -459,6 +460,9 @@ void GatorCLIParser::parseCLIArguments(int argc, result.mStopGator = optionInt == 1; break; case 'C': //counter + { + int startpos = -1; + size_t counterSplitPos = 0; if (perfCounterCount > maxPerformanceCounter) { continue; } @@ -471,6 +475,7 @@ void GatorCLIParser::parseCLIArguments(int argc, //adding last counter in list addCounter(startpos + 1, value.length(), value); break; + } case 'X': // spe { parseAndUpdateSpe(); diff --git a/daemon/GatorCLIParser.h b/daemon/GatorCLIParser.h index f957ead1..6f1c9f14 100644 --- a/daemon/GatorCLIParser.h +++ b/daemon/GatorCLIParser.h @@ -71,7 +71,7 @@ class ParserResult { int64_t parameterSetFlag; - std::map events; + std::map events; ExecutionMode mode; std::set printables; diff --git a/daemon/GetEventKey.cpp b/daemon/GetEventKey.cpp new file mode 100644 index 00000000..c3a674ce --- /dev/null +++ b/daemon/GetEventKey.cpp @@ -0,0 +1,16 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#include "GetEventKey.h" + +CounterKey getEventKey() +{ + // key 0 is reserved as a timestamp + // key 1 is reserved as the marker for thread specific counters + // key 2 is reserved as the marker for core + // Odd keys are assigned by the driver, even keys by the daemon + static CounterKey key = 4; + + const CounterKey ret = key; + key += 2; + return ret; +} diff --git a/daemon/GetEventKey.h b/daemon/GetEventKey.h new file mode 100644 index 00000000..4fa5df97 --- /dev/null +++ b/daemon/GetEventKey.h @@ -0,0 +1,9 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#ifndef GET_EVENT_KEY_H +#define GET_EVENT_KEY_H + +using CounterKey = int; +CounterKey getEventKey(); + +#endif // GET_EVENT_KEY_H diff --git a/daemon/IBlockCounterFrameBuilder.h b/daemon/IBlockCounterFrameBuilder.h index ebd752fb..8fce69cc 100644 --- a/daemon/IBlockCounterFrameBuilder.h +++ b/daemon/IBlockCounterFrameBuilder.h @@ -46,4 +46,7 @@ class IBlockCounterFrameBuilder { */ // TODO: rename to commitIfNeeded virtual bool check(uint64_t time) = 0; + + /** force commit/flush if there is any data. used at end of capture */ + virtual bool flush() = 0; }; diff --git a/daemon/IRawFrameBuilder.h b/daemon/IRawFrameBuilder.h index 3fdaa34e..6cd0ec98 100644 --- a/daemon/IRawFrameBuilder.h +++ b/daemon/IRawFrameBuilder.h @@ -94,3 +94,13 @@ class IRawFrameBuilder { */ virtual void waitForSpace(int bytes) = 0; }; + +class IRawFrameBuilderWithDirectAccess : public IRawFrameBuilder { +public: + /** @return The raw write index */ + virtual int getWriteIndex() const = 0; + /** @return Skip the write index forward by 'bytes' */ + virtual void advanceWrite(int bytes) = 0; + /** Write directly into the buffer */ + virtual void writeDirect(int index, const void * data, std::size_t count) = 0; +}; diff --git a/daemon/ISender.h b/daemon/ISender.h index b4d62c39..79d2dd36 100644 --- a/daemon/ISender.h +++ b/daemon/ISender.h @@ -14,6 +14,7 @@ enum class ResponseType : char { APC_DATA = 3, ACK = 4, NAK = 5, + CURRENT_CONFIG = 6, ERROR = '\xFF' }; diff --git a/daemon/Logging.cpp b/daemon/Logging.cpp index e024cd9c..1348da89 100644 --- a/daemon/Logging.cpp +++ b/daemon/Logging.cpp @@ -84,6 +84,7 @@ static void format(char * const buf, vsnprintf(buf + len, bufSize - 1 - len, fmt, args); // subtract 1 for \0 } +// NOLINTNEXTLINE(cert-dcl50-cpp) void Logging::_logError(const char * function, const char * file, int line, const char * fmt, ...) { va_list args; @@ -97,6 +98,7 @@ void Logging::_logError(const char * function, const char * file, int line, cons fprintf(stderr, "%s\n", mErrBuf); } +// NOLINTNEXTLINE(cert-dcl50-cpp) void Logging::_logSetup(const char * function, const char * file, int line, const char * fmt, ...) { char logBuf[4096]; // Arbitrarily large buffer to hold a string @@ -116,6 +118,7 @@ void Logging::_logSetup(const char * function, const char * file, int line, cons } } +// NOLINTNEXTLINE(cert-dcl50-cpp) void Logging::_logMessage(const char * function, const char * file, int line, const char * fmt, ...) { if (mDebug) { @@ -132,6 +135,7 @@ void Logging::_logMessage(const char * function, const char * file, int line, co } } +// NOLINTNEXTLINE(cert-dcl50-cpp) void Logging::_logWarning(const char * function, const char * file, int line, const char * fmt, ...) { char logBuf[4096]; // Arbitrarily large buffer to hold a string diff --git a/daemon/MidgardDriver.cpp b/daemon/MidgardDriver.cpp index 53a946c5..cc6a70fe 100644 --- a/daemon/MidgardDriver.cpp +++ b/daemon/MidgardDriver.cpp @@ -67,7 +67,7 @@ struct CounterData { class MidgardCounter : public DriverCounter { public: MidgardCounter(DriverCounter * next, const char * name, CounterData * const counterData) - : DriverCounter(next, name), mCounterData(*counterData), mEvent(-1) + : DriverCounter(next, name), mCounterData(*counterData), mEvent() { } @@ -79,12 +79,12 @@ class MidgardCounter : public DriverCounter { // ACTIVITY int getCores() const { return mCounterData.mCores; } - void setEvent(const int event) { mEvent = event; } - int getEvent() const { return mEvent; } + void setEvent(EventCode event) { mEvent = event; } + EventCode getEvent() const { return mEvent; } private: const CounterData mCounterData; - int mEvent; + EventCode mEvent; // Intentionally undefined MidgardCounter(const MidgardCounter &) = delete; @@ -305,11 +305,12 @@ bool MidgardDriver::start(const int uds) foundWindumpCounter = true; // MALI_GLES_WINDUMP + const int wdEventCode = counter->getEvent().asI32(); GLESWindump m; m.mDeclId = 1; - m.mSkipframes = counter->getEvent() & 0xff; - m.mMinWidth = (counter->getEvent() & 0xfff00000) >> 20; - m.mMinHeight = (counter->getEvent() & 0xfff00) >> 8; + m.mSkipframes = (wdEventCode & 0xff); + m.mMinWidth = (wdEventCode & 0xfff00000) >> 20; + m.mMinHeight = (wdEventCode & 0xfff00) >> 8; memcpy(buf + bufPos, &m, sizeof(m)); bufPos += sizeof(m); } @@ -352,10 +353,8 @@ void MidgardDriver::setupCounter(Counter & counter) return; } midgardCounter->setEnabled(true); + midgardCounter->setEvent(counter.getEventCode()); counter.setKey(midgardCounter->getKey()); - if (counter.getEvent() != -1) { - midgardCounter->setEvent(counter.getEvent()); - } if (midgardCounter->getType() == CounterData::ACTIVITY && midgardCounter->getCores() > 0) { counter.setCores(midgardCounter->getCores()); } diff --git a/daemon/Monitor.cpp b/daemon/Monitor.cpp index 51421fb7..6aa09cab 100644 --- a/daemon/Monitor.cpp +++ b/daemon/Monitor.cpp @@ -9,7 +9,7 @@ #include #include -Monitor::Monitor() : mFd(-1) +Monitor::Monitor() : mFd(-1), mSize(0) { } @@ -45,24 +45,45 @@ bool Monitor::init() if ((fdf == -1) || (fcntl(mFd, F_SETFD, fdf | FD_CLOEXEC) != 0)) { logg.logMessage("fcntl failed"); ::close(mFd); - return -1; + return false; } #endif + mSize = 0; + return true; } -bool Monitor::add(int fd) +static bool addOrRemove(int mFd, int fd, bool add) { + const int op = (add ? EPOLL_CTL_ADD : EPOLL_CTL_DEL); + struct epoll_event event; memset(&event, 0, sizeof(event)); event.data.fd = fd; event.events = EPOLLIN | EPOLLERR | EPOLLHUP; - if (epoll_ctl(mFd, EPOLL_CTL_ADD, fd, &event) != 0) { + if (epoll_ctl(mFd, op, fd, &event) != 0) { logg.logMessage("epoll_ctl failed"); return false; } + return true; +} + +bool Monitor::add(int fd) +{ + if (!addOrRemove(mFd, fd, true)) { + return false; + } + mSize += 1; + return true; +} +bool Monitor::remove(int fd) +{ + if (!addOrRemove(mFd, fd, false)) { + return false; + } + mSize -= 1; return true; } diff --git a/daemon/Monitor.h b/daemon/Monitor.h index 8f9a3c60..8b13a544 100644 --- a/daemon/Monitor.h +++ b/daemon/Monitor.h @@ -13,10 +13,13 @@ class Monitor { void close(); bool init(); bool add(int fd); + bool remove(int fd); int wait(struct epoll_event * events, int maxevents, int timeout) const; + int size() const { return mSize; } private: int mFd; + int mSize; // Intentionally unimplemented Monitor(const Monitor &) = delete; diff --git a/daemon/NetDriver.cpp b/daemon/NetDriver.cpp index 8a923e7e..bfa306e1 100644 --- a/daemon/NetDriver.cpp +++ b/daemon/NetDriver.cpp @@ -86,8 +86,12 @@ bool NetDriver::doRead() uint64_t receiveBytes; uint64_t transmitBytes; - const int count = - sscanf(colon + 1, " %" SCNu64 " %*u %*u %*u %*u %*u %*u %*u %" SCNu64, &receiveBytes, &transmitBytes); + + // NOLINTNEXTLINE(cert-err34-c) + const int count = sscanf(colon + 1, // + " %" SCNu64 " %*u %*u %*u %*u %*u %*u %*u %" SCNu64, + &receiveBytes, + &transmitBytes); if (count != 2) { return false; } diff --git a/daemon/PrimarySourceProvider.cpp b/daemon/PrimarySourceProvider.cpp index e6bf72c4..0bde7fbb 100644 --- a/daemon/PrimarySourceProvider.cpp +++ b/daemon/PrimarySourceProvider.cpp @@ -123,6 +123,7 @@ namespace { * @return */ static std::unique_ptr tryCreate(bool systemWide, + const TraceFsConstants & traceFsConstants, PmuXML & pmuXml, const char * maliFamilyName, Ids & ids, @@ -130,7 +131,7 @@ namespace { bool disableCpuOnlining) { std::unique_ptr configuration = - PerfDriverConfiguration::detect(systemWide, ids.getCpuIds(), pmuXml); + PerfDriverConfiguration::detect(systemWide, traceFsConstants.path__events, ids.getCpuIds(), pmuXml); if (configuration != nullptr) { std::vector clusters; for (const auto & perfCpu : configuration->cpus) { @@ -140,7 +141,8 @@ namespace { return std::unique_ptr {new PerfPrimarySource(std::move(*configuration), std::move(pmuXml), maliFamilyName, - std::move(cpuInfo))}; + std::move(cpuInfo), + traceFsConstants)}; } return nullptr; @@ -198,10 +200,11 @@ namespace { PerfPrimarySource(PerfDriverConfiguration && configuration, PmuXML && pmuXml, const char * maliFamilyName, - CpuInfo && cpuInfo) + CpuInfo && cpuInfo, + const TraceFsConstants & traceFsConstants) : PrimarySourceProvider(createPolledDrivers()), cpuInfo(std::move(cpuInfo)), - driver(std::move(configuration), std::move(pmuXml), maliFamilyName, this->cpuInfo) + driver(std::move(configuration), std::move(pmuXml), maliFamilyName, this->cpuInfo, traceFsConstants) { } @@ -330,6 +333,7 @@ const std::vector & PrimarySourceProvider::getAdditionalPolledDr } std::unique_ptr PrimarySourceProvider::detect(bool systemWide, + const TraceFsConstants & traceFsConstants, PmuXML && pmuXml, const char * maliFamilyName, bool disableCpuOnlining) @@ -355,7 +359,13 @@ std::unique_ptr PrimarySourceProvider::detect(bool system logg.logMessage("Trying perf API as non-root..."); } - result = PerfPrimarySource::tryCreate(systemWide, pmuXml, maliFamilyName, ids, modelNameToUse, disableCpuOnlining); + result = PerfPrimarySource::tryCreate(systemWide, + traceFsConstants, + pmuXml, + maliFamilyName, + ids, + modelNameToUse, + disableCpuOnlining); if (result != nullptr) { logg.logMessage("...Success"); logg.logSetup("Profiling Source\nUsing perf API for primary data source"); diff --git a/daemon/PrimarySourceProvider.h b/daemon/PrimarySourceProvider.h index d6561629..3e634977 100644 --- a/daemon/PrimarySourceProvider.h +++ b/daemon/PrimarySourceProvider.h @@ -17,6 +17,7 @@ class FtraceDriver; class PrimarySource; struct PmuXML; class ICpuInfo; +struct TraceFsConstants; /** * Interface for different primary source types. @@ -31,6 +32,7 @@ class PrimarySourceProvider { * Static initialization / detection */ static std::unique_ptr detect(bool systemWide, + const TraceFsConstants & traceFsConstants, PmuXML && pmuXml, const char * maliFamilyName, bool disableCpuOnlining); diff --git a/daemon/SessionData.cpp b/daemon/SessionData.cpp index 5415088a..7799ded2 100644 --- a/daemon/SessionData.cpp +++ b/daemon/SessionData.cpp @@ -65,7 +65,8 @@ SessionData::SessionData() parameterSetFlag(), mPerfMmapSizeInPages(), mSpeSampleRate(-1), - mCounters() + mCounters(), + mConstants() { } @@ -183,16 +184,3 @@ uint64_t getTime() } return (NS_PER_S * ts.tv_sec + ts.tv_nsec); } - -int getEventKey() -{ - // key 0 is reserved as a timestamp - // key 1 is reserved as the marker for thread specific counters - // key 2 is reserved as the marker for core - // Odd keys are assigned by the driver, even keys by the daemon - static int key = 4; - - const int ret = key; - key += 2; - return ret; -} diff --git a/daemon/SessionData.h b/daemon/SessionData.h index ed9b21e9..f93b0fd9 100644 --- a/daemon/SessionData.h +++ b/daemon/SessionData.h @@ -5,6 +5,7 @@ #include "Config.h" #include "Configuration.h" +#include "Constant.h" #include "Counter.h" #include "GatorCLIFlags.h" #include "lib/SharedMemory.h" @@ -20,7 +21,7 @@ #include //development version for PROTOCOL_VERSION is of format YYYYMMDD -#define PROTOCOL_VERSION 740 +#define PROTOCOL_VERSION 750 // Differentiates development versions (timestamp) from release versions #define PROTOCOL_DEV 10000000 @@ -98,6 +99,8 @@ class SessionData { // PMU Counters Counter mCounters[MAX_PERFORMANCE_COUNTERS]; + std::set mConstants; + private: // Intentionally unimplemented SessionData(const SessionData &) = delete; @@ -110,7 +113,7 @@ extern SessionData gSessionData; extern const char * const gSrcMd5; uint64_t getTime(); -int getEventKey(); + void logCpuNotFound(); #endif // SESSION_DATA_H diff --git a/daemon/Sources.mk b/daemon/Sources.mk index 33d2a64c..60d15ef1 100644 --- a/daemon/Sources.mk +++ b/daemon/Sources.mk @@ -31,6 +31,7 @@ GATORD_CXX_SRC_FILES := \ Command.cpp \ ConfigurationXML.cpp \ ConfigurationXMLParser.cpp \ + Constant.cpp \ CounterXML.cpp \ CpuUtils.cpp \ CpuUtils_Topology.cpp \ @@ -44,6 +45,7 @@ GATORD_CXX_SRC_FILES := \ FSDriver.cpp \ FtraceDriver.cpp \ GatorCLIParser.cpp \ + GetEventKey.cpp \ HwmonDriver.cpp \ BlockCounterFrameBuilder.cpp \ BlockCounterMessageConsumer.cpp \ @@ -114,6 +116,7 @@ GATORD_CXX_SRC_FILES := \ linux/perf/PerfSource.cpp \ linux/perf/PerfSyncThreadBuffer.cpp \ linux/perf/PerfSyncThread.cpp \ + linux/perf/PerfToMemoryBuffer.cpp \ linux/proc/ProcessChildren.cpp \ linux/proc/ProcessPollerBase.cpp \ linux/proc/ProcLoadAvgFileRecord.cpp \ @@ -139,6 +142,7 @@ GATORD_CXX_SRC_FILES := \ non_root/ProcessStateChangeHandler.cpp \ non_root/ProcessStateTracker.cpp \ non_root/ProcessStatsTracker.cpp \ + xml/CurrentConfigXML.cpp \ xml/EventsXML.cpp \ xml/EventsXMLProcessor.cpp \ xml/MxmlUtils.cpp \ diff --git a/daemon/StreamlineSetup.cpp b/daemon/StreamlineSetup.cpp index 5a2bf9ae..15b9641f 100644 --- a/daemon/StreamlineSetup.cpp +++ b/daemon/StreamlineSetup.cpp @@ -8,12 +8,15 @@ #include "CounterXML.h" #include "Driver.h" #include "Drivers.h" +#include "ExitStatus.h" #include "ICpuInfo.h" #include "Logging.h" #include "OlySocket.h" #include "OlyUtility.h" #include "Sender.h" #include "SessionData.h" +#include "lib/Syscall.h" +#include "xml/CurrentConfigXML.h" #include "xml/EventsXML.h" static const char TAG_SESSION[] = "session"; @@ -36,6 +39,10 @@ StreamlineSetup::StreamlineSetup(OlySocket & s, Drivers & drivers, lib::Span #include +#include class OlySocket; class Drivers; @@ -36,9 +37,12 @@ class StreamlineSetup : private IStreamlineCommandHandler { State handleApcStop() override; State handleDisconnect() override; State handlePing() override; + State handleExit() override; + State handleRequestCurrentConfig() override; void sendData(const char * data, uint32_t length, ResponseType type); void sendString(const char * string, ResponseType type) { sendData(string, strlen(string), type); } + void sendString(std::string & string, ResponseType type) { sendData(string.c_str(), string.size(), type); } void sendDefaults(); void writeConfiguration(char * xml); }; diff --git a/daemon/StreamlineSetupLoop.cpp b/daemon/StreamlineSetupLoop.cpp index 1f355867..4524888d 100644 --- a/daemon/StreamlineSetupLoop.cpp +++ b/daemon/StreamlineSetupLoop.cpp @@ -15,7 +15,11 @@ namespace { COMMAND_APC_START = 2, COMMAND_APC_STOP = 3, COMMAND_DISCONNECT = 4, - COMMAND_PING = 5 + COMMAND_PING = 5, + COMMAND_EXIT = 6, + // A request to get gatord configuration (in XML format) + // Not to be confused with configuration.xml + COMMAND_REQUEST_CURRENT_CONFIG = 7 }; struct ReadResult { @@ -41,8 +45,8 @@ namespace { return result; } - const char type = header[0]; - const int length = (header[1] << 0) | (header[2] << 8) | (header[3] << 16) | (header[4] << 24); + const auto type = header[0]; + const auto length = (header[1] << 0) | (header[2] << 8) | (header[3] << 16) | (header[4] << 24); // add artificial limit if ((length < 0) || length > 1024 * 1024) { @@ -50,7 +54,7 @@ namespace { return result; } - // alocate data for receive + // allocate data for receive result.data.resize(length + 1, 0); // receive data @@ -125,6 +129,15 @@ IStreamlineCommandHandler::State streamlineSetupCommandIteration( logg.logMessage("INVESTIGATE: Received PING command but with length = %zu", readResult.data.size()); } return handler.handlePing(); + case COMMAND_EXIT: + //No logging on length needed as there will be no additional data + return handler.handleExit(); + case COMMAND_REQUEST_CURRENT_CONFIG: + if (!readResult.data.empty()) { + logg.logMessage("INVESTIGATE: Received REQUEST_CONFIG command but with length = %zu", + readResult.data.size()); + } + return handler.handleRequestCurrentConfig(); default: logg.logError("Target error: Unknown command type, %d", readResult.commandType); return IStreamlineCommandHandler::State::EXIT_ERROR; diff --git a/daemon/StreamlineSetupLoop.h b/daemon/StreamlineSetupLoop.h index 7a6befac..4de129a7 100644 --- a/daemon/StreamlineSetupLoop.h +++ b/daemon/StreamlineSetupLoop.h @@ -15,6 +15,9 @@ class IStreamlineCommandHandler { enum class State { /** The loop should continue to process commands */ PROCESS_COMMANDS, + /** The loop should continue to process command to get current config, + * used only in main for secondary connections*/ + PROCESS_COMMANDS_CONFIG, /** The loop should terminate in a disconnect state */ EXIT_DISCONNECT, /** The loop should terminate in a no-capture state */ @@ -22,7 +25,9 @@ class IStreamlineCommandHandler { /** The loop should terminate in a start-capture state */ EXIT_APC_START, /** The loop terminated due to read failure */ - EXIT_ERROR + EXIT_ERROR, + /** The loop terminated on a request to exit*/ + EXIT_OK }; virtual ~IStreamlineCommandHandler() = default; @@ -33,6 +38,16 @@ class IStreamlineCommandHandler { virtual State handleApcStop() = 0; virtual State handleDisconnect() = 0; virtual State handlePing() = 0; + virtual State handleExit() = 0; + + /** + * Will send the configuration of gatord back to host as an XML string. + * (Not to be confused with configuration.xml) + * This will contain the following information about the current session: + * pid, uid, is system-wide, is waiting on a command, the capture working + * directory, the wait for process command, and the pids to capture. + */ + virtual State handleRequestCurrentConfig() = 0; }; /** diff --git a/daemon/Tracepoints.cpp b/daemon/Tracepoints.cpp index 1fb48294..3233c9f3 100644 --- a/daemon/Tracepoints.cpp +++ b/daemon/Tracepoints.cpp @@ -5,19 +5,24 @@ #include "Config.h" #include "Logging.h" #include "lib/Format.h" +#include "lib/FsEntry.h" +#include "lib/Syscall.h" #include "lib/Utils.h" #include "linux/perf/IPerfAttrsConsumer.h" -#include +#include +#include +#include +#include -std::string getTracepointPath(const char * name, const char * file) +std::string getTracepointPath(const char * tracefsEventsPath, const char * name, const char * file) { - return lib::Format() << EVENTS_PATH "/" << name << "/" << file; + return lib::Format() << tracefsEventsPath << "/" << name << "/" << file; } -bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer, const char * const name) +bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer, const char * tracefsEventsPath, const char * name) { - const lib::FsEntry file = lib::FsEntry::create(getTracepointPath(name, "format")); + const lib::FsEntry file = lib::FsEntry::create(getTracepointPath(tracefsEventsPath, name, "format")); if (!file.canAccess(true, false, false)) { const std::string path = file.path(); @@ -31,13 +36,146 @@ bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer, const char * const return true; } -int64_t getTracepointId(const char * const name) +int64_t getTracepointId(const char * tracefsEventsPath, const char * const name) { int64_t result; - if (lib::readInt64FromFile(getTracepointPath(name, "id").c_str(), result) != 0) { + if (lib::readInt64FromFile(getTracepointPath(tracefsEventsPath, name, "id").c_str(), result) != 0) { logg.logMessage("Unable to read tracepoint id for %s", name); return UNKNOWN_TRACEPOINT_ID; } return result; } + +namespace { + static const std::array TRACEFS_CONFIGURATIONS {{// The usual configuration on most systems + {"/sys/kernel/debug/tracing", + "/sys/kernel/debug/tracing/events", + "/sys/kernel/debug/tracing/events/enable", + "/sys/kernel/debug/tracing/events/ftrace", + "/sys/kernel/debug/tracing/trace", + "/sys/kernel/debug/tracing/trace_clock", + "/sys/kernel/debug/tracing/trace_pipe", + "/sys/kernel/debug/tracing/tracing_on"}, + // Android R (no debugfs) + {"/sys/kernel/tracing", + "/sys/kernel/tracing/events", + "/sys/kernel/tracing/events/enable", + "/sys/kernel/tracing/events/ftrace", + "/sys/kernel/tracing/trace", + "/sys/kernel/tracing/trace_clock", + "/sys/kernel/tracing/trace_pipe", + "/sys/kernel/tracing/tracing_on"}}}; + + class TraceFsConstantsWrapper { + public: + TraceFsConstantsWrapper(std::string path) + : path(std::move(path)), + path__events(this->path + "/events"), + path__events__enable(this->path + "/events/enable"), + path__events__ftrace(this->path + "/events/ftrace"), + path__trace(this->path + "/trace"), + path__trace_clock(this->path + "/trace_clock"), + path__trace_pipe(this->path + "/trace_pipe"), + path__tracing_on(this->path + "/tracing_on"), + constants() + { + constants.path = this->path.c_str(); + constants.path__events = this->path__events.c_str(); + constants.path__events__enable = this->path__events__enable.c_str(); + constants.path__events__ftrace = this->path__events__ftrace.c_str(); + constants.path__trace = this->path__trace.c_str(); + constants.path__trace_clock = this->path__trace_clock.c_str(); + constants.path__trace_pipe = this->path__trace_pipe.c_str(); + constants.path__tracing_on = this->path__tracing_on.c_str(); + } + + private: + // TRACING_PATH + std::string path; + // TRACING_PATH "/events" + std::string path__events; + // TRACING_PATH "/events/enable" + std::string path__events__enable; + // TRACING_PATH "/events/ftrace" + std::string path__events__ftrace; + // TRACING_PATH "/trace" + std::string path__trace; + // TRACING_PATH "/trace_clock" + std::string path__trace_clock; + // TRACING_PATH "/trace_pipe" + std::string path__trace_pipe; + // TRACING_PATH "/tracing_on" + std::string path__tracing_on; + + public: + TraceFsConstants constants; + }; + + /** Parse /proc/mounts, looking for tracefs mount point */ + static const TraceFsConstants * findTraceFsMount() + { + static std::unique_ptr pointer; + + // check we've not been here before + if (pointer != nullptr) { + return &pointer->constants; + } + + logg.logMessage("Reading /proc/mounts"); + + // iterate each line of /proc/mounts + std::ifstream file("/proc/mounts", std::ios_base::in); + for (std::string line; std::getline(file, line);) { + logg.logMessage(" '%s'", line.c_str()); + + // find the mount point section of the string, provided it is a tracefs mount + const auto indexOfFirstSep = line.find(" /"); + if (indexOfFirstSep == std::string::npos) { + continue; + } + const auto indexOfTraceFs = line.find(" tracefs", indexOfFirstSep + 1); + if (indexOfTraceFs == std::string::npos) { + continue; + } + + // found it + auto mountPoint = line.substr(indexOfFirstSep + 1, indexOfTraceFs - (indexOfFirstSep + 1)); + logg.logMessage("Found tracefs at '%s'", mountPoint.c_str()); + + if (lib::access(mountPoint.c_str(), R_OK) == 0) { + // check it is not one of the baked in configurations, reuse it instead of constructing a new item + for (const auto & config : TRACEFS_CONFIGURATIONS) { + if (mountPoint == config.path) { + return &config; + } + } + + // OK, construct a new item + pointer.reset(new TraceFsConstantsWrapper(std::move(mountPoint))); + return &pointer->constants; + } + } + + return nullptr; + } +} + +const TraceFsConstants & TraceFsConstants::detect() +{ + // try to read from /proc/mounts first + const auto mountPoint = findTraceFsMount(); + if (mountPoint != nullptr) { + return *mountPoint; + } + + // try some defaults + for (const auto & config : TRACEFS_CONFIGURATIONS) { + if (lib::access(config.path, R_OK) == 0) { + return config; + } + } + + // just use the first one (usual for linux) as some placeholder default + return TRACEFS_CONFIGURATIONS[0]; +} diff --git a/daemon/Tracepoints.h b/daemon/Tracepoints.h index 899046cf..3c886b89 100644 --- a/daemon/Tracepoints.h +++ b/daemon/Tracepoints.h @@ -9,17 +9,62 @@ class IPerfAttrsConsumer; class DynBuf; +/** + * Contains the set of paths we care about within tracefs. + * Refer to https://www.kernel.org/doc/Documentation/trace/ftrace.txt + * for more information about ftrace / tracefs + */ +struct TraceFsConstants { + // TRACING_PATH + const char * path; + // TRACING_PATH "/events" + const char * path__events; + // TRACING_PATH "/events/enable" + const char * path__events__enable; + // TRACING_PATH "/events/ftrace" + const char * path__events__ftrace; + // TRACING_PATH "/trace" + const char * path__trace; + // TRACING_PATH "/trace_clock" + const char * path__trace_clock; + // TRACING_PATH "/trace_pipe" + const char * path__trace_pipe; + // TRACING_PATH "/tracing_on" + const char * path__tracing_on; + + // return the appropriate path set for this machine + static const TraceFsConstants & detect(); +}; + /** * * @param name tracepoint name * @param file name of file within tracepoint directory * @return the path of the file for this tracepoint */ -std::string getTracepointPath(const char * name, const char * file); +std::string getTracepointPath(const char * tracefsEventsPath, const char * name, const char * file); -bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer, const char * name); +inline std::string getTracepointPath(const TraceFsConstants & constants, const char * name, const char * file) +{ + return getTracepointPath(constants.path__events, name, file); +} + +bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer, const char * tracefsEventsPath, const char * name); + +inline bool readTracepointFormat(IPerfAttrsConsumer & attrsConsumer, + const TraceFsConstants & constants, + const char * name) +{ + return readTracepointFormat(attrsConsumer, constants.path__events, name); +} constexpr int64_t UNKNOWN_TRACEPOINT_ID = -1; -int64_t getTracepointId(const char * name); + +int64_t getTracepointId(const char * tracefsEventsPath, const char * name); + +inline int64_t getTracepointId(const TraceFsConstants & constants, const char * name) +{ + return getTracepointId(constants.path__events, name); +} #endif // TRACEPOINTS_H diff --git a/daemon/UEvent.cpp b/daemon/UEvent.cpp index bd5331f0..02a4bb40 100644 --- a/daemon/UEvent.cpp +++ b/daemon/UEvent.cpp @@ -5,6 +5,7 @@ #include "Logging.h" #include "OlySocket.h" +#include #include #include #include @@ -30,7 +31,7 @@ bool UEvent::init() { mFd = socket_cloexec(PF_NETLINK, SOCK_RAW, NETLINK_KOBJECT_UEVENT); if (mFd < 0) { - logg.logMessage("socket failed"); + logg.logMessage("Socket failed for uevents (%d - %s)", errno, strerror(errno)); return false; } @@ -40,7 +41,7 @@ bool UEvent::init() sockaddr.nl_groups = 1; // bitmask: (1 << 0) == kernel events, (1 << 1) == udev events sockaddr.nl_pid = 0; if (bind(mFd, reinterpret_cast(&sockaddr), sizeof(sockaddr)) != 0) { - logg.logMessage("bind failed"); + logg.logMessage("Bind failed for uevents (%d - %s)", errno, strerror(errno)); return false; } diff --git a/daemon/armnn/ArmNNDriver.cpp b/daemon/armnn/ArmNNDriver.cpp index e50a72bb..5adb052f 100644 --- a/daemon/armnn/ArmNNDriver.cpp +++ b/daemon/armnn/ArmNNDriver.cpp @@ -2,6 +2,7 @@ #include "armnn/ArmNNDriver.h" +#include "GetEventKey.h" #include "Logging.h" #include "SessionData.h" #include "xml/EventsXMLProcessor.h" @@ -37,10 +38,8 @@ namespace armnn { // Enables and prepares the counter for capture void Driver::setupCounter(Counter & counter) { - lib::Optional optionalEventNo = - counter.getEvent() != -1 ? lib::Optional(counter.getEvent()) : lib::Optional(); + const int key = mGlobalState.enableCounter(counter.getType(), counter.getEventCode()); - int key = mGlobalState.enableCounter(std::string(counter.getType()), optionalEventNo); counter.setKey(key); } diff --git a/daemon/armnn/DriverSourceIpc.cpp b/daemon/armnn/DriverSourceIpc.cpp index e090c130..806dfaa5 100644 --- a/daemon/armnn/DriverSourceIpc.cpp +++ b/daemon/armnn/DriverSourceIpc.cpp @@ -1,9 +1,9 @@ /* Copyright (C) 2020 by Arm Limited. All rights reserved. */ #include "armnn/DriverSourceIpc.h" + #include "BufferUtils.h" #include "IRawFrameBuilder.h" - #include "Logging.h" #include @@ -258,7 +258,8 @@ namespace armnn { return false; } - if (isOneShot && (getBufferBytesAvailable() < IRawFrameBuilder::MAX_FRAME_HEADER_SIZE + buffer_utils::MAXSIZE_PACK32 + data.size())) { + if (isOneShot && (getBufferBytesAvailable() < + IRawFrameBuilder::MAX_FRAME_HEADER_SIZE + buffer_utils::MAXSIZE_PACK32 + data.size())) { mOneShotModeEnabledAndEnded = true; return false; } diff --git a/daemon/armnn/GlobalState.cpp b/daemon/armnn/GlobalState.cpp index 379529c3..f12f38e7 100644 --- a/daemon/armnn/GlobalState.cpp +++ b/daemon/armnn/GlobalState.cpp @@ -23,7 +23,9 @@ namespace armnn { GlobalState::CategoryId GlobalState::CategoryId::fromEventId(const EventId & eventId) { - return {.category = eventId.category, .device = eventId.device, .counterSet = eventId.counterSet}; + return GlobalState::CategoryId {.category = eventId.category, + .device = eventId.device, + .counterSet = eventId.counterSet}; } bool GlobalState::CategoryId::operator<(const CategoryId & that) const @@ -62,12 +64,12 @@ namespace armnn { const auto & eventNumber = counterNameKeyAndEventNumber.eventNumber; const int key = counterNameKeyAndEventNumber.key; - if (eventNumber) { + if (eventNumber.isValid()) { const auto eventByNumberIter = programmableCountersToCategory.find(counterName); if (eventByNumberIter != programmableCountersToCategory.end()) { const auto & categoryId = eventByNumberIter->second; const auto & category = categories.at(categoryId); - const auto iter = category.eventsByNumber.find(*eventNumber); + const auto iter = category.eventsByNumber.find(eventNumber.asI32()); if (iter != category.eventsByNumber.end()) { const auto & eventName = iter->second; // check it wasn't removed due to conflicting properties @@ -76,7 +78,9 @@ namespace armnn { } } else { - logg.logError("Unknown event number %d for counter: %s", *eventNumber, counterName.c_str()); + logg.logError("Unknown event number 0x%" PRIxEventCode " for counter: %s", + eventNumber.asU64(), + counterName.c_str()); } } else { @@ -109,7 +113,7 @@ namespace armnn { ICounterDirectoryConsumer::Interpolation interpolation) { switch (clazz) { - case ICounterDirectoryConsumer::Class::DELTA: + case ICounterDirectoryConsumer::Class::DELTA: { switch (interpolation) { case ICounterDirectoryConsumer::Interpolation::LINEAR: return Event::Class::DELTA; @@ -117,17 +121,19 @@ namespace armnn { return Event::Class::INCIDENT; } break; - case ICounterDirectoryConsumer::Class::ABSOLUTE: + } + case ICounterDirectoryConsumer::Class::ABSOLUTE: { switch (interpolation) { + // we don't currently support linear interpolation + // for absolute, steps will have to do. case ICounterDirectoryConsumer::Interpolation::LINEAR: - // we don't currently support linear interpolation - // for absolute, steps will have to do. - return Event::Class::ABSOLUTE; case ICounterDirectoryConsumer::Interpolation::STEP: return Event::Class::ABSOLUTE; } break; + } } + assert(false && "unknown Class/Interpolation"); return Event::Class::DELTA; // just to keep the compiler happy } @@ -172,7 +178,7 @@ namespace armnn { EventId GlobalState::makeEventId(const CategoryId & id, const std::string & eventName) { - return {.category = id.category, .device = id.device, .counterSet = id.counterSet, .name = eventName}; + return EventId {.category = id.category, .device = id.device, .counterSet = id.counterSet, .name = eventName}; } /// @return empty if the event doesn't have a counter name (because it's part of a counter set) @@ -315,9 +321,9 @@ namespace armnn { continue; // removed because of conflict } - auto eventNumberOrEmpty = lib::Optional {}; + EventCode eventNumberOrEmpty; if (catId.counterSet) { - eventNumberOrEmpty = eventNumberByName.at(eventName); + eventNumberOrEmpty = EventCode(eventNumberByName.at(eventName)); } xmlEvents.push_back({.eventNumber = eventNumberOrEmpty, .counter = makeCounterNameIfFixed(catId, eventName), @@ -388,7 +394,7 @@ namespace armnn { return fixedCountersToEvent.count(counterName) != 0 || programmableCountersToCategory.count(counterName) != 0; } - int GlobalState::enableCounter(const std::string & counterName, lib::Optional eventNumber) + int GlobalState::enableCounter(const std::string & counterName, EventCode eventNumber) { if (enabledIdKeyAndEventNumbers->full()) { logg.logError("Could not enable %s, limit of ArmNN counters reached", counterName.c_str()); @@ -405,9 +411,8 @@ namespace armnn { const int key = getKey(counterName); - enabledIdKeyAndEventNumbers->push_back(CounterNameKeyAndEventNumber {.counterName = counterNameRef, - .key = key, - .eventNumber = std::move(eventNumber)}); + enabledIdKeyAndEventNumbers->push_back( + CounterNameKeyAndEventNumber {.counterName = counterNameRef, .key = key, .eventNumber = eventNumber}); return key; } diff --git a/daemon/armnn/GlobalState.h b/daemon/armnn/GlobalState.h index 240f8e1f..69c0c131 100644 --- a/daemon/armnn/GlobalState.h +++ b/daemon/armnn/GlobalState.h @@ -46,7 +46,7 @@ namespace armnn { /// @param eventNumber should be empty if not a programmable counter /// @return key assigned for counter /// - int enableCounter(const std::string & counterName, lib::Optional eventNumber); + int enableCounter(const std::string & counterName, EventCode eventNumber); void disableAllCounters(); @@ -74,7 +74,7 @@ namespace armnn { struct CounterNameKeyAndEventNumber { const std::string & counterName; int key; - lib::Optional eventNumber; + EventCode eventNumber; }; static std::string eventIdToString(const EventId & id); diff --git a/daemon/armnn/SocketIO.cpp b/daemon/armnn/SocketIO.cpp index 2affe118..3ff40576 100644 --- a/daemon/armnn/SocketIO.cpp +++ b/daemon/armnn/SocketIO.cpp @@ -272,7 +272,7 @@ namespace armnn { // Failure else { - return bytesSent; + return -1; } } diff --git a/daemon/defaults.xml b/daemon/defaults.xml index 4859b9be..f4189662 100644 --- a/daemon/defaults.xml +++ b/daemon/defaults.xml @@ -36,53 +36,233 @@ + + + + + + + + + - - - + + + + + + + + + + - - - + + + + - - - + + + + + + + + + + - - - + + + + + + + + + + + + + + + + - - - + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - diff --git a/daemon/events-CMN-600.xml b/daemon/events-CMN-600.xml new file mode 100644 index 00000000..4de6b072 --- /dev/null +++ b/daemon/events-CMN-600.xml @@ -0,0 +1,213 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/daemon/events-Linux.xml b/daemon/events-Linux.xml index 9667557c..02bb9ed6 100644 --- a/daemon/events-Linux.xml +++ b/daemon/events-Linux.xml @@ -8,7 +8,6 @@ - @@ -16,7 +15,6 @@ - diff --git a/daemon/events-Mali-Midgard.xml b/daemon/events-Mali-Midgard.xml index c3c22cdb..5263f0dd 100644 --- a/daemon/events-Mali-Midgard.xml +++ b/daemon/events-Mali-Midgard.xml @@ -3,26 +3,6 @@ - @@ -35,36 +15,6 @@ power management is disabled during profiling so these counters are not useful a - - - - - - - - - - - - diff --git a/daemon/events-Mali-Midgard_hw.xml b/daemon/events-Mali-Midgard_hw.xml deleted file mode 100644 index 75502991..00000000 --- a/daemon/events-Mali-Midgard_hw.xml +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/daemon/events-Mali-T60x_hw.xml b/daemon/events-Mali-T60x_hw.xml deleted file mode 100644 index 3804ab96..00000000 --- a/daemon/events-Mali-T60x_hw.xml +++ /dev/null @@ -1,85 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/daemon/events-Mali-T62x_hw.xml b/daemon/events-Mali-T62x_hw.xml deleted file mode 100644 index 065c508e..00000000 --- a/daemon/events-Mali-T62x_hw.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/daemon/events-Mali-T72x_hw.xml b/daemon/events-Mali-T72x_hw.xml index 8c778d58..4860491b 100644 --- a/daemon/events-Mali-T72x_hw.xml +++ b/daemon/events-Mali-T72x_hw.xml @@ -1,71 +1,71 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - + + + + + + + + diff --git a/daemon/events-Mali-T76x_hw.xml b/daemon/events-Mali-T76x_hw.xml index c5fbf8d3..744966c9 100644 --- a/daemon/events-Mali-T76x_hw.xml +++ b/daemon/events-Mali-T76x_hw.xml @@ -1,86 +1,86 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/daemon/events-Mali-T82x_hw.xml b/daemon/events-Mali-T82x_hw.xml index e07feb33..a93b659d 100644 --- a/daemon/events-Mali-T82x_hw.xml +++ b/daemon/events-Mali-T82x_hw.xml @@ -1,78 +1,78 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/daemon/events-Mali-T83x_hw.xml b/daemon/events-Mali-T83x_hw.xml index f1979776..d560437f 100644 --- a/daemon/events-Mali-T83x_hw.xml +++ b/daemon/events-Mali-T83x_hw.xml @@ -1,78 +1,78 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/daemon/events-Mali-T86x_hw.xml b/daemon/events-Mali-T86x_hw.xml index c70387dd..bc3af832 100644 --- a/daemon/events-Mali-T86x_hw.xml +++ b/daemon/events-Mali-T86x_hw.xml @@ -1,86 +1,86 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/daemon/events-Mali-T88x_hw.xml b/daemon/events-Mali-T88x_hw.xml index b2dda9e5..c65a1afc 100644 --- a/daemon/events-Mali-T88x_hw.xml +++ b/daemon/events-Mali-T88x_hw.xml @@ -1,86 +1,86 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/daemon/events-SPE-Armv8_2.xml b/daemon/events-SPE-Armv8_2.xml index 7b0286a4..cdc2eca2 100644 --- a/daemon/events-SPE-Armv8_2.xml +++ b/daemon/events-SPE-Armv8_2.xml @@ -113,6 +113,8 @@ Valid enum values when 'class' is 'other': * 'conditional' - Matches only conditional operations. Equivalent to 'sub-class="0x01" sub-class-mask="0x01"' + * 'sve' - Matches only SVE instructions. + Equivalent to 'sub-class="0x08" sub-class-mask="0x89"' Valid enum values when 'class' is 'load-store': * 'load' - Matches only loads/swaps, not stores. Equivalent to 'sub-class="0x00" sub-class-mask="0x01"' @@ -124,6 +126,12 @@ Equivalent to 'sub-class="0x04" sub-class-mask="0xfe"' * 'extended' - Matches only extended loads/stores. Equivalent to 'sub-class="0x02" sub-class-mask="0xe2"' + * 'sve' - Matches only SVE loads/stores. + Equivalent to 'sub-class="0x08" sub-class-mask="0x0a"' + * 'xformed-sysreg' - Matches only ARMv8.4-NV transformed System register access. + Equivalent to 'sub-class="0x30" sub-class-mask="0xfe"' + * 'unspecified' - Matches only load/stores targeting unspecified registers. + Equivalent to 'sub-class="0x10" sub-class-mask="0xfe"' sub-class-mask: integer: (Optional) If set, then 'sub-class' must be an integer. If this attribute is not present but sub-class is an integer then it will be as if this attribute were set to '0xff'. When combined with sub-class creates a bitmask and test that is used to compare against the 'subclass' field of the Operation Type packet. @@ -199,13 +207,13 @@ - - + + - - + + diff --git a/daemon/lib/Optional.h b/daemon/lib/Optional.h index 637b9b02..f5d1c981 100644 --- a/daemon/lib/Optional.h +++ b/daemon/lib/Optional.h @@ -39,23 +39,27 @@ namespace lib { Optional & operator=(const Optional & t) { - if (t) { - set(t.get()); - } - else { - clear(); + if (&t != this) { + if (t) { + set(t.get()); + } + else { + clear(); + } } return *this; } Optional & operator=(Optional && t) noexcept { - if (t) { - set(std::move(t.get())); - t.clear(); - } - else { - clear(); + if (&t != this) { + if (t) { + set(std::move(t.get())); + t.clear(); + } + else { + clear(); + } } return *this; } diff --git a/daemon/lib/Span.h b/daemon/lib/Span.h index b0976176..6fb2eec1 100644 --- a/daemon/lib/Span.h +++ b/daemon/lib/Span.h @@ -24,8 +24,8 @@ namespace lib { using iterator = T *; using const_iterator = const T *; - T * data; - L length; + T * data = nullptr; + L length = 0; L size() const { return length; } diff --git a/daemon/lib/Syscall.cpp b/daemon/lib/Syscall.cpp index 00df997b..370eef0a 100644 --- a/daemon/lib/Syscall.cpp +++ b/daemon/lib/Syscall.cpp @@ -3,6 +3,7 @@ #include "Syscall.h" #include +#include #include #include #include @@ -48,4 +49,6 @@ namespace lib { int poll(struct pollfd * fds, nfds_t nfds, int timeout) { return ::poll(fds, nfds, timeout); } int access(const char * filename, int how) { return ::access(filename, how); } + + void exit(int status) { ::exit(status); } } diff --git a/daemon/lib/Syscall.h b/daemon/lib/Syscall.h index d0fc6015..aa8a883b 100644 --- a/daemon/lib/Syscall.h +++ b/daemon/lib/Syscall.h @@ -38,6 +38,7 @@ namespace lib { int poll(struct pollfd * __fds, nfds_t __nfds, int __timeout); int access(const char * filename, int how); + void exit(int status); } #endif // INCLUDE_LIB_SYSCALL_H diff --git a/daemon/lib/Utils.cpp b/daemon/lib/Utils.cpp index 4ed9c2f4..6aefad0d 100644 --- a/daemon/lib/Utils.cpp +++ b/daemon/lib/Utils.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -178,4 +179,33 @@ namespace lib { return result; } + + uint64_t roundDownToPowerOfTwo(uint64_t in) + { + if (in == 0) { + return 0; + } + + in |= (in >> 1); + in |= (in >> 2); + in |= (in >> 4); + in |= (in >> 8); + in |= (in >> 16); + in |= (in >> 32); + + return in - (in >> 1); + } + + int calculatePerfMmapSizeInPages(const std::uint64_t perfEventMlockKb, const std::uint64_t pageSizeBytes) + { + constexpr std::uint64_t maxPerfEventMlockKb = std::numeric_limits::max() / 1024ULL; + + if (perfEventMlockKb <= maxPerfEventMlockKb && pageSizeBytes > 0 && + perfEventMlockKb * 1024ULL > pageSizeBytes) { + const std::uint64_t bufferSize = roundDownToPowerOfTwo(perfEventMlockKb * 1024ULL - pageSizeBytes); + const std::uint64_t bufferPages = bufferSize / pageSizeBytes; + return int(std::min(bufferPages, std::numeric_limits::max())); + } + return 0; + } } diff --git a/daemon/lib/Utils.h b/daemon/lib/Utils.h index 5e55b1a0..09abfb90 100644 --- a/daemon/lib/Utils.h +++ b/daemon/lib/Utils.h @@ -25,6 +25,9 @@ namespace lib { int writeReadInt64InFile(const char * path, int64_t & value); std::set readCpuMaskFromFile(const char * path); + + uint64_t roundDownToPowerOfTwo(uint64_t in); + int calculatePerfMmapSizeInPages(const std::uint64_t perfEventMlockKb, const std::uint64_t pageSizeBytes); } #endif // INCLUDE_LIB_UTILS_H diff --git a/daemon/linux/perf/IPerfBufferConsumer.h b/daemon/linux/perf/IPerfBufferConsumer.h new file mode 100644 index 00000000..cf20b74b --- /dev/null +++ b/daemon/linux/perf/IPerfBufferConsumer.h @@ -0,0 +1,68 @@ +/* Copyright (C) 2013-2020 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Config.h" +#include "lib/Span.h" + +#include +#include +#include + +class IPerfBufferConsumer { +public: + virtual ~IPerfBufferConsumer() = default; + + using data_word_t = std::uint64_t; + + /** + * A chunk of a perf aux record + */ + struct AuxRecordChunk { + /** The pointer to the first byte of the record */ + const char * chunkPointer; + /** The number of bytes in the record */ + std::size_t byteCount; + }; + + /** + * A chunk of a perf data record + */ + struct DataRecordChunk { + /** The pointer to the first word of the record (where each word is a U64) */ + const data_word_t * chunkPointer; + /** The number of U64 words (not bytes) in the record */ + std::size_t wordCount; + }; + + /** + * A tuple of {@link DataRecordChunk}s where the first chunk is required and the second is optional. + * Each chunk specifies a sequence of words that make up the record. + * + * The second chunk is used when the record is split across the end of the ring-buffer. When it is + * not used, it will have its length set to zero. + */ + struct DataRecordChunkTuple { + DataRecordChunk firstChunk; + DataRecordChunk optionalSecondChunk; + }; + + /** + * Consume a chunk of aux data + * + * @param cpu The CPU the data came from + * @param auxTailValue The Initial 'tail' value for the aux data + * @param recordChunks The span of chunks that contains the data + */ + virtual void consumePerfAuxRecord(int cpu, + std::uint64_t auxTailValue, + lib::Span recordChunks) = 0; + + /** + * Consume a sequence of perf data record chunks + * + * @param cpu The CPU the records came from + * @param recordChunks The sequence of chunk-tuples + */ + virtual void consumePerfDataRecord(int cpu, lib::Span recordChunks) = 0; +}; diff --git a/daemon/linux/perf/PerfBuffer.cpp b/daemon/linux/perf/PerfBuffer.cpp index 25155eba..ec9b384c 100644 --- a/daemon/linux/perf/PerfBuffer.cpp +++ b/daemon/linux/perf/PerfBuffer.cpp @@ -191,131 +191,100 @@ bool PerfBuffer::isFull() return false; } -class PerfDataFrame { -public: - PerfDataFrame(ISender & sender) : mSender(sender), mWritePos(-1), mCpuSizePos(-1) {} - - void add(const int cpu, uint64_t head, uint64_t tail, const char * b, std::size_t length) - { - cpuHeader(cpu); - - const std::size_t bufferMask = length - 1; - - while (head > tail) { - const int count = - reinterpret_cast(b + (tail & bufferMask))->size / sizeof(uint64_t); - // Can this whole message be written as Streamline assumes events are not split between frames - if (int(sizeof(mBuf)) <= mWritePos + count * buffer_utils::MAXSIZE_PACK64) { - send(); - cpuHeader(cpu); - } - for (int i = 0; i < count; ++i) { - // Must account for message size - buffer_utils::packInt64(mBuf, mWritePos, *reinterpret_cast(b + (tail & bufferMask))); - tail += sizeof(uint64_t); - } - } - } +static void sendAuxFrame(IPerfBufferConsumer & bufferConsumer, + int cpu, + uint64_t headerTail, + uint64_t headerHead, + const char * buffer, + std::size_t length) +{ + const std::size_t bufferMask = length - 1; - void send() - { - if (mWritePos > 0) { - writeCpuSize(); - mSender.writeData(mBuf, mWritePos, ResponseType::APC_DATA); - mWritePos = -1; - mCpuSizePos = -1; - } - } + // will be 'length' at most otherwise somehow wrapped many times + const std::size_t totalDataSize = std::min(headerHead - headerTail, length); + const std::uint64_t head = headerHead; + // will either be the same as 'tail' or will be > if somehow wrapped multiple times + const std::uint64_t tail = (headerHead - totalDataSize); -private: - void frameHeader() - { - if (mWritePos < 0) { - mWritePos = 0; - mCpuSizePos = -1; - buffer_utils::packInt(mBuf, mWritePos, static_cast(FrameType::PERF_DATA)); - } - } + const std::size_t tailMasked = (tail & bufferMask); + const std::size_t headMasked = (head & bufferMask); - void writeCpuSize() - { - if (mCpuSizePos >= 0) { - buffer_utils::writeLEInt(mBuf + mCpuSizePos, mWritePos - mCpuSizePos - sizeof(uint32_t)); - } - } + const bool haveWrapped = headMasked < tailMasked; - void cpuHeader(const int cpu) - { - if (sizeof(mBuf) <= mWritePos + buffer_utils::MAXSIZE_PACK32 + sizeof(uint32_t)) { - send(); - } - frameHeader(); - writeCpuSize(); - buffer_utils::packInt(mBuf, mWritePos, cpu); - mCpuSizePos = mWritePos; - // Reserve space for cpu size - mWritePos += sizeof(uint32_t); - } + const std::size_t firstSize = (haveWrapped ? (length - tailMasked) : totalDataSize); + const std::size_t secondSize = (haveWrapped ? headMasked : 0); - // Pick a big size but something smaller than the chunkSize in Sender::writeData which is 100k - char mBuf[1 << 16]; - ISender & mSender; - int mWritePos; - int mCpuSizePos; + const IPerfBufferConsumer::AuxRecordChunk chunks[2] = {{buffer + tailMasked, firstSize}, {buffer, secondSize}}; - // Intentionally unimplemented - PerfDataFrame(const PerfDataFrame &) = delete; - PerfDataFrame & operator=(const PerfDataFrame &) = delete; - PerfDataFrame(PerfDataFrame &&) = delete; - PerfDataFrame & operator=(PerfDataFrame &&) = delete; -}; + bufferConsumer.consumePerfAuxRecord(cpu, tail, chunks); +} -static void sendAuxFrame(ISender & sender, - int cpu, - uint64_t tail, - uint64_t head, - const char * buffer, - std::size_t length) +template +static inline const T * ringBufferPtr(const char * base, std::size_t positionMasked) +{ + return reinterpret_cast(base + positionMasked); +} + +template +static inline const T * ringBufferPtr(const char * base, std::uint64_t position, std::size_t sizeMask) { + return ringBufferPtr(base, (position & sizeMask)); +} + +static void sendDataFrame(IPerfBufferConsumer & bufferConsumer, + int cpu, + uint64_t head, + uint64_t tail, + const char * b, + std::size_t length) +{ + static constexpr std::size_t CHUNK_BUFFER_SIZE = 256; // arbitrary, roughly 4k size stack allocation on 64-bit + static constexpr std::size_t CHUNK_WORD_SIZE = sizeof(IPerfBufferConsumer::data_word_t); + const std::size_t bufferMask = length - 1; - constexpr std::size_t maxHeaderSize = buffer_utils::MAXSIZE_PACK32 // frame type - + buffer_utils::MAXSIZE_PACK32 // cpu - + buffer_utils::MAXSIZE_PACK64 // tail - + buffer_utils::MAXSIZE_PACK32; // size - - while (tail < head) { - // frame size must fit in int - const uint64_t thisHead = std::min(tail + ISender::MAX_RESPONSE_LENGTH - maxHeaderSize, head); - const int size = thisHead - tail; - - const std::size_t tailMasked = tail & bufferMask; - const std::size_t headMasked = thisHead & bufferMask; - - const bool haveWrapped = headMasked < tailMasked; - - const int firstSize = haveWrapped ? length - tailMasked : size; - const int secondSize = haveWrapped ? headMasked : 0; - - char header[maxHeaderSize]; - int pos = 0; - buffer_utils::packInt(header, pos, static_cast(FrameType::PERF_AUX)); - buffer_utils::packInt(header, pos, cpu); - buffer_utils::packInt64(header, pos, tail); - buffer_utils::packInt(header, pos, size); - - constexpr std::size_t numberOfParts = 3; - const lib::Span parts[numberOfParts] = {{header, pos}, - {buffer + tailMasked, firstSize}, - {buffer, secondSize}}; - sender.writeDataParts({parts, numberOfParts}, ResponseType::APC_DATA); - tail = thisHead; + + std::size_t numChunksInBuffer = 0; + IPerfBufferConsumer::DataRecordChunkTuple chunkBuffer[CHUNK_BUFFER_SIZE]; + + while (head > tail) { + // write the chunks we have so far, so we can reuse the buffer + if (numChunksInBuffer == CHUNK_BUFFER_SIZE) { + bufferConsumer.consumePerfDataRecord(cpu, {chunkBuffer, numChunksInBuffer}); + numChunksInBuffer = 0; + } + + // create the next chunk + const auto * recordHeader = ringBufferPtr(b, tail, bufferMask); + const auto recordSize = (recordHeader->size + CHUNK_WORD_SIZE - 1) & ~(CHUNK_WORD_SIZE - 1); + const auto recordEnd = tail + recordSize; + const std::size_t baseMasked = (tail & bufferMask); + const std::size_t endMasked = (recordEnd & bufferMask); + + const bool haveWrapped = endMasked < baseMasked; + + const std::size_t firstSize = (haveWrapped ? (length - baseMasked) : recordSize); + const std::size_t secondSize = (haveWrapped ? endMasked : 0); + + // set chunk + chunkBuffer[numChunksInBuffer].firstChunk.chunkPointer = + ringBufferPtr(b, baseMasked); + chunkBuffer[numChunksInBuffer].firstChunk.wordCount = firstSize / CHUNK_WORD_SIZE; + chunkBuffer[numChunksInBuffer].optionalSecondChunk.chunkPointer = + ringBufferPtr(b, 0); + chunkBuffer[numChunksInBuffer].optionalSecondChunk.wordCount = secondSize / CHUNK_WORD_SIZE; + + numChunksInBuffer += 1; + tail = recordEnd; + } + + // write the remaining chunks + if (numChunksInBuffer > 0) { + bufferConsumer.consumePerfDataRecord(cpu, {chunkBuffer, numChunksInBuffer}); } } -bool PerfBuffer::send(ISender & sender) +bool PerfBuffer::send(IPerfBufferConsumer & bufferConsumer) { - PerfDataFrame frame(sender); - const std::size_t dataBufferLength = getDataBufferLength(); const std::size_t auxBufferLength = getAuxBufferLength(); @@ -345,7 +314,7 @@ bool PerfBuffer::send(ISender & sender) if (auxHead > auxTail) { const char * const b = static_cast(auxBuf); - sendAuxFrame(sender, cpu, auxTail, auxHead, b, auxBufferLength); + sendAuxFrame(bufferConsumer, cpu, auxTail, auxHead, b, auxBufferLength); // Update tail with the aux read and synchronize with the buffer writer __atomic_store_n(&pemp->aux_tail, auxHead, __ATOMIC_RELEASE); @@ -365,7 +334,7 @@ bool PerfBuffer::send(ISender & sender) if (dataHead > dataTail) { const char * const b = static_cast(dataBuf) + mConfig.pageSize; - frame.add(cpu, dataHead, dataTail, b, dataBufferLength); + sendDataFrame(bufferConsumer, cpu, dataHead, dataTail, b, dataBufferLength); // Update tail with the data read and synchronize with the buffer writer __atomic_store_n(&pemp->data_tail, dataHead, __ATOMIC_RELEASE); @@ -384,7 +353,6 @@ bool PerfBuffer::send(ISender & sender) ++cpuAndBufIt; } } - frame.send(); return true; } diff --git a/daemon/linux/perf/PerfBuffer.h b/daemon/linux/perf/PerfBuffer.h index a14c7d90..cdc3a3ad 100644 --- a/daemon/linux/perf/PerfBuffer.h +++ b/daemon/linux/perf/PerfBuffer.h @@ -4,13 +4,13 @@ #define PERF_BUFFER #include "Config.h" +#include "lib/Span.h" +#include "linux/perf/IPerfBufferConsumer.h" #include #include #include -class ISender; - class PerfBuffer { public: struct Config { @@ -28,7 +28,7 @@ class PerfBuffer { bool useFd(int fd, int cpu, bool collectAuxTrace = false); void discard(int cpu); bool isFull(); - bool send(ISender & sender); + bool send(IPerfBufferConsumer & bufferConsumer); std::size_t getDataBufferLength() const; std::size_t getAuxBufferLength() const; diff --git a/daemon/linux/perf/PerfDriver.cpp b/daemon/linux/perf/PerfDriver.cpp index ddbc9d95..30a40a3a 100644 --- a/daemon/linux/perf/PerfDriver.cpp +++ b/daemon/linux/perf/PerfDriver.cpp @@ -201,11 +201,13 @@ inline static T & neverNull(T * t) return *t; } -static long long getTracepointId(const char * const counter, const char * const name) +static long long _getTracepointId(const TraceFsConstants & traceFsConstants, const char * counter, const char * name) { - long long result = getTracepointId(name); + long long result = getTracepointId(traceFsConstants, name); if (result <= 0) { - logg.logSetup("%s is disabled\n%s was not found", counter, getTracepointPath(name, "id").c_str()); + logg.logSetup("%s is disabled\n%s was not found", + counter, + getTracepointPath(traceFsConstants, name, "id").c_str()); } return result; } @@ -213,8 +215,14 @@ static long long getTracepointId(const char * const counter, const char * const PerfDriver::PerfDriver(PerfDriverConfiguration && configuration, PmuXML && pmuXml, const char * maliFamilyName, - const ICpuInfo & cpuInfo) - : SimpleDriver("Perf"), mTracepoints(nullptr), mConfig(std::move(configuration)), mPmuXml(pmuXml), mCpuInfo(cpuInfo) + const ICpuInfo & cpuInfo, + const TraceFsConstants & traceFsConstants) + : SimpleDriver("Perf"), + traceFsConstants(traceFsConstants), + mTracepoints(nullptr), + mConfig(std::move(configuration)), + mPmuXml(pmuXml), + mCpuInfo(cpuInfo) { // add CPU PMUs for (const auto & perfCpu : mConfig.cpus) { @@ -248,7 +256,7 @@ PerfDriver::PerfDriver(PerfDriverConfiguration && configuration, char buf[40]; if (getConfig().can_access_tracepoints) { - id = getTracepointId("Interrupts: SoftIRQ", "irq/softirq_exit"); + id = _getTracepointId(traceFsConstants, "Interrupts: SoftIRQ", "irq/softirq_exit"); if (id >= 0) { for (const auto & perfCpu : mConfig.cpus) { snprintf(buf, sizeof(buf), "%s_softirq", perfCpu.gator_cpu.getId()); @@ -262,7 +270,7 @@ PerfDriver::PerfDriver(PerfDriverConfiguration && configuration, } } - id = getTracepointId("Interrupts: IRQ", "irq/irq_handler_exit"); + id = _getTracepointId(traceFsConstants, "Interrupts: IRQ", "irq/irq_handler_exit"); if (id >= 0) { for (const auto & perfCpu : mConfig.cpus) { snprintf(buf, sizeof(buf), "%s_irq", perfCpu.gator_cpu.getId()); @@ -276,7 +284,7 @@ PerfDriver::PerfDriver(PerfDriverConfiguration && configuration, } } - id = getTracepointId("Scheduler: Switch", SCHED_SWITCH); + id = _getTracepointId(traceFsConstants, "Scheduler: Switch", SCHED_SWITCH); if (id >= 0) { for (const auto & perfCpu : mConfig.cpus) { snprintf(buf, sizeof(buf), "%s_switch", perfCpu.gator_cpu.getId()); @@ -290,7 +298,7 @@ PerfDriver::PerfDriver(PerfDriverConfiguration && configuration, } } - id = getTracepointId("Clock: Frequency", CPU_FREQUENCY); + id = _getTracepointId(traceFsConstants, "Clock: Frequency", CPU_FREQUENCY); if (id >= 0 && access("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq", R_OK) == 0) { for (const auto & perfCpu : mConfig.cpus) { snprintf(buf, sizeof(buf), "%s_freq", perfCpu.gator_cpu.getId()); @@ -531,7 +539,7 @@ void PerfDriver::readEvents(mxml_node_t * const xml) const char * arg = mxmlElementGetAttr(node, "arg"); - long long id = getTracepointId(counter, tracepoint); + long long id = _getTracepointId(traceFsConstants, counter, tracepoint); if (id >= 0) { logg.logMessage("Using perf for %s", counter); setCounters(new PerfCounter(getCounters(), @@ -598,7 +606,7 @@ void PerfDriver::addMidgardHwTracepoints(const char * const maliFamilyName) "PM_L2_0", "PM_L2_1"}; - id = getTracepointId("Mali: PM Status", "mali/mali_pm_status"); + id = _getTracepointId(traceFsConstants, "Mali: PM Status", "mali/mali_pm_status"); if (id >= 0) { for (const auto * i : MALI_MIDGARD_PM_STATUS_EVENTS) { snprintf(buf, sizeof(buf), "ARM_Mali-%s_%s", maliFamilyName, i); @@ -608,9 +616,9 @@ void PerfDriver::addMidgardHwTracepoints(const char * const maliFamilyName) } } - id = getTracepointId(MALI_MMU_IN_USE, MALI_TRC_PNT_PATH[MALI_MMU_IN_USE]); + id = _getTracepointId(traceFsConstants, MALI_MMU_IN_USE, MALI_TRC_PNT_PATH[MALI_MMU_IN_USE]); if (id >= 0) { - const int id2 = getTracepointId(MALI_PM_STATUS, MALI_TRC_PNT_PATH[MALI_PM_STATUS]); + const int id2 = _getTracepointId(traceFsConstants, MALI_PM_STATUS, MALI_TRC_PNT_PATH[MALI_PM_STATUS]); for (const auto * i : MALI_MIDGARD_AS_IN_USE_RELEASED) { snprintf(buf, sizeof(buf), "ARM_Mali-%s_%s", maliFamilyName, i); addCounterWithConfigId2(buf, id, id2); @@ -619,7 +627,7 @@ void PerfDriver::addMidgardHwTracepoints(const char * const maliFamilyName) } } - id = getTracepointId(MALI_MMU_PAGE_FAULT, MALI_TRC_PNT_PATH[MALI_MMU_PAGE_FAULT]); + id = _getTracepointId(traceFsConstants, MALI_MMU_PAGE_FAULT, MALI_TRC_PNT_PATH[MALI_MMU_PAGE_FAULT]); if (id >= 0) { for (const auto * i : MALI_MIDGARD_PAGE_FAULT_INSERT_PAGES) { snprintf(buf, sizeof(buf), "ARM_Mali-%s_%s", maliFamilyName, i); @@ -628,7 +636,7 @@ void PerfDriver::addMidgardHwTracepoints(const char * const maliFamilyName) } } - id = getTracepointId(MALI_MMU_TOTAL_ALLOC, MALI_TRC_PNT_PATH[MALI_MMU_TOTAL_ALLOC]); + id = _getTracepointId(traceFsConstants, MALI_MMU_TOTAL_ALLOC, MALI_TRC_PNT_PATH[MALI_MMU_TOTAL_ALLOC]); if (id >= 0) { snprintf(buf, sizeof(buf), "ARM_Mali-%s_%s", maliFamilyName, MALI_MIDGARD_TOTAL_ALLOC_PAGES); addCounter(buf, id); @@ -636,7 +644,7 @@ void PerfDriver::addMidgardHwTracepoints(const char * const maliFamilyName) } // for activity counters - id = getTracepointId(MALI_JOB_SLOT, MALI_TRC_PNT_PATH[MALI_JOB_SLOT]); + id = _getTracepointId(traceFsConstants, MALI_JOB_SLOT, MALI_TRC_PNT_PATH[MALI_JOB_SLOT]); if (id >= 0) { snprintf(buf, sizeof(buf), "ARM_Mali-%s_fragment", maliFamilyName); addCounter(buf, id); @@ -740,11 +748,15 @@ void PerfDriver::setupCounter(Counter & counter) return; } - logg.logMessage("Configuring perf counter %s with event (%d)", perfCounter->getName(), counter.getEvent()); + const auto & optionalEventCode = counter.getEventCode(); + + logg.logMessage("Configuring perf counter %s with event (0x%" PRIxEventCode ")", + perfCounter->getName(), + (optionalEventCode.isValid() ? optionalEventCode.asU64() : 0)); // Don't use the config from counters XML if it's not set, ex: software counters - if (counter.getEvent() != -1) { - perfCounter->setConfig(counter.getEvent()); + if (optionalEventCode.isValid()) { + perfCounter->setConfig(optionalEventCode.asU64()); } if (counter.getCount() > 0) { // EBS @@ -810,7 +822,7 @@ lib::Optional PerfDriver::setupSpe(int sampleRate, const SpeConfigu bool PerfDriver::enable(IPerfGroups & group, IPerfAttrsConsumer & attrsConsumer) const { const uint64_t id = getConfig().can_access_tracepoints - ? getTracepointId("Mali: Job slot events", "mali/mali_job_slots_event") + ? _getTracepointId(traceFsConstants, "Mali: Job slot events", "mali/mali_job_slots_event") : 0 /* never used */; bool sentMaliJobSlotEvents = false; @@ -874,14 +886,15 @@ void PerfDriver::read(IPerfAttrsConsumer & attrsConsumer, const int cpu) bool PerfDriver::sendTracepointFormats(IPerfAttrsConsumer & attrsConsumer) { - if (!readTracepointFormat(attrsConsumer, SCHED_SWITCH) || !readTracepointFormat(attrsConsumer, CPU_IDLE) || - !readTracepointFormat(attrsConsumer, CPU_FREQUENCY)) { + if (!readTracepointFormat(attrsConsumer, traceFsConstants, SCHED_SWITCH) || + !readTracepointFormat(attrsConsumer, traceFsConstants, CPU_IDLE) || + !readTracepointFormat(attrsConsumer, traceFsConstants, CPU_FREQUENCY)) { return false; } for (PerfTracepoint * tracepoint = mTracepoints; tracepoint != nullptr; tracepoint = tracepoint->getNext()) { if (tracepoint->getCounter()->isEnabled() && - !readTracepointFormat(attrsConsumer, tracepoint->getTracepoint())) { + !readTracepointFormat(attrsConsumer, traceFsConstants, tracepoint->getTracepoint())) { return false; } } diff --git a/daemon/linux/perf/PerfDriver.h b/daemon/linux/perf/PerfDriver.h index 81db1ff1..b69fb105 100644 --- a/daemon/linux/perf/PerfDriver.h +++ b/daemon/linux/perf/PerfDriver.h @@ -24,6 +24,7 @@ class IPerfAttrsConsumer; class PerfTracepoint; class UncorePmu; class ICpuInfo; +struct TraceFsConstants; static const char * MALI_MMU_IN_USE = "Mali: MMU address space in use"; static const char * MALI_PM_STATUS = "Mali: PM Status"; @@ -43,7 +44,8 @@ class PerfDriver : public SimpleDriver { PerfDriver(PerfDriverConfiguration && configuration, PmuXML && pmuXml, const char * maliFamilyName, - const ICpuInfo & cpuInfo); + const ICpuInfo & cpuInfo, + const TraceFsConstants & traceFsConstants); ~PerfDriver() override; const PerfConfig & getConfig() const { return mConfig.config; } @@ -59,9 +61,10 @@ class PerfDriver : public SimpleDriver { void read(IPerfAttrsConsumer & attrsConsumer, int cpu); bool sendTracepointFormats(IPerfAttrsConsumer & attrsConsumer); + const TraceFsConstants & getTraceFsConstants() const { return traceFsConstants; }; + private: - void addCpuCounters(const PerfCpu & cpu); - void addUncoreCounters(const PerfUncore & uncore); + const TraceFsConstants & traceFsConstants; PerfTracepoint * mTracepoints; PerfDriverConfiguration mConfig; PmuXML mPmuXml; @@ -73,6 +76,8 @@ class PerfDriver : public SimpleDriver { PerfDriver(PerfDriver &&) = delete; PerfDriver & operator=(PerfDriver &&) = delete; + void addCpuCounters(const PerfCpu & cpu); + void addUncoreCounters(const PerfUncore & uncore); void addMidgardHwTracepoints(const char * maliFamilyName); }; diff --git a/daemon/linux/perf/PerfDriverConfiguration.cpp b/daemon/linux/perf/PerfDriverConfiguration.cpp index 74b081cc..70c49d7a 100644 --- a/daemon/linux/perf/PerfDriverConfiguration.cpp +++ b/daemon/linux/perf/PerfDriverConfiguration.cpp @@ -3,6 +3,7 @@ #include "linux/perf/PerfDriverConfiguration.h" #include "Logging.h" +#include "PerfUtils.h" #include "SessionData.h" #include "k/perf_event.h" #include "lib/FileDescriptor.h" @@ -23,11 +24,14 @@ constexpr int PerfDriverConfiguration::UNKNOWN_CPUID; constexpr char PerfDriverConfiguration::ARMV82_SPE[]; +static const std::string debugPerfEventMlockKbPropString = "debug.perf_event_mlock_kb"; +static const std::string securityPerfHardenPropString = "security.perf_harden"; + using lib::FsEntry; static bool getPerfHarden() { - const char * const command[] = {"getprop", "security.perf_harden", nullptr}; + const char * const command[] = {"getprop", securityPerfHardenPropString.c_str(), nullptr}; const lib::PopenResult getprop = lib::popen(command); if (getprop.pid < 0) { logg.logMessage("lib::popen(%s %s) failed: %s. Probably not android", @@ -43,17 +47,21 @@ static bool getPerfHarden() return value == '1'; } -static void setPerfHarden(bool on) +static void setProp(const std::string & prop, const std::string & value) { - const char * const command[] = {"setprop", "security.perf_harden", on ? "1" : "0", nullptr}; - - const lib::PopenResult setprop = lib::popen(command); - if (setprop.pid < 0) { - logg.logError("lib::popen(%s %s %s) failed: %s", command[0], command[1], command[2], strerror(-setprop.pid)); + const char * const command[] = {"setprop", prop.c_str(), value.c_str(), nullptr}; + + const lib::PopenResult setPropResult = lib::popen(command); + if (setPropResult.pid < 0) { + logg.logError("lib::popen(%s %s %s) failed: %s", + command[0], + command[1], + command[2], + strerror(-setPropResult.pid)); return; } - const int status = lib::pclose(setprop); + const int status = lib::pclose(setPropResult); if (!WIFEXITED(status)) { logg.logError("'%s %s %s' exited abnormally", command[0], command[1], command[2]); return; @@ -65,6 +73,50 @@ static void setPerfHarden(bool on) } } +static void setPerfHarden(bool on) +{ + setProp(securityPerfHardenPropString, on ? "1" : "0"); +} + +static bool setPerfEventMlockKb(int newValue) +{ + lib::Optional fileValue = perf_utils::readPerfEventMlockKb(); + + if (fileValue.valid() && fileValue.get() == newValue) { + return true; + } + + logg.logWarning("setting property %s to %d", debugPerfEventMlockKbPropString.c_str(), newValue); + setProp(debugPerfEventMlockKbPropString, std::to_string(newValue)); + + // Trigger debug property update + setPerfHarden(false); + + // Give time for the debug property update to finish + sleep(1); + + fileValue = perf_utils::readPerfEventMlockKb(); + const bool result = fileValue.valid() ? fileValue.get() == newValue : false; + + if (!result) { + logg.logWarning("failed to set property %s to %d", debugPerfEventMlockKbPropString.c_str(), newValue); + } + + return result; +} + +static void setSuitablePerfEventMlockKbValue(int numCpus) +{ + // This function needs further consideration because "largeBufferSize" might be less than the default + const int largeBufferSize = (1 + numCpus * 64) * (gSessionData.mPageSize / 1024); + + if (!setPerfEventMlockKb(largeBufferSize)) { + const int smallerBufferSize = 129 * (gSessionData.mPageSize / 1024); + + setPerfEventMlockKb(smallerBufferSize); + } +} + /** * @return true if perf harden in now off */ @@ -74,7 +126,7 @@ static bool disablePerfHarden() return true; } - logg.logWarning("disabling property security.perf_harden"); + logg.logWarning("disabling property %s", securityPerfHardenPropString.c_str()); setPerfHarden(false); @@ -103,6 +155,7 @@ void logCpuNotFound() } std::unique_ptr PerfDriverConfiguration::detect(bool systemWide, + const char * tracefsEventsPath, lib::Span cpuIds, const PmuXML & pmuXml) { @@ -130,10 +183,14 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy const bool isRoot = (lib::geteuid() == 0); if (!isRoot && !disablePerfHarden()) { - logg.logSetup("Failed to disable property security.perf_harden\n" // - "Try 'adb shell setprop security.perf_harden 0'"); - logg.logError("Failed to disable property security.perf_harden\n" // - "Try 'setprop security.perf_harden 0' as the shell or root user."); + logg.logSetup("Failed to disable property %s\n" // + "Try 'adb shell setprop %s 0'", + securityPerfHardenPropString.c_str(), + securityPerfHardenPropString.c_str()); + logg.logError("Failed to disable property %s\n" // + "Try 'setprop %s 0' as the shell or root user.", + securityPerfHardenPropString.c_str(), + securityPerfHardenPropString.c_str()); return nullptr; } @@ -185,7 +242,7 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy return nullptr; } - const bool can_access_tracepoints = (lib::access(EVENTS_PATH, R_OK) == 0); + const bool can_access_tracepoints = (lib::access(tracefsEventsPath, R_OK) == 0); const bool can_access_raw_tracepoints = can_access_tracepoints && (isRoot || perf_event_paranoid == -1); if (can_access_tracepoints) { logg.logMessage("Have access to tracepoints"); @@ -208,22 +265,24 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy } else { if (isRoot) { - logg.logSetup(EVENTS_PATH - " does not exist\nIs CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?"); - logg.logError(EVENTS_PATH " is not available.\n" - "Try:\n" - " - mount -t debugfs none /sys/kernel/debug"); + logg.logSetup("%s does not exist\nIs CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?", + tracefsEventsPath); + logg.logError("%s is not available.\n" + "Try:\n" + " - mount -t debugfs none /sys/kernel/debug", + tracefsEventsPath); } else { - logg.logSetup(EVENTS_PATH - " does not exist\nIs CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?"); - logg.logError(EVENTS_PATH " is not available.\n" - "Try:\n" - " * --system-wide=no,\n" - " * run gatord as root,\n" - " * or (as root):\n" - " - mount -o remount,mode=755 /sys/kernel/debug\n" - " - mount -o remount,mode=755 /sys/kernel/debug/tracing"); + logg.logSetup("%s does not exist\nIs CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?", + tracefsEventsPath); + logg.logError("%s is not available.\n" + "Try:\n" + " * --system-wide=no,\n" + " * run gatord as root,\n" + " * or (as root):\n" + " - mount -o remount,mode=755 /sys/kernel/debug\n" + " - mount -o remount,mode=755 /sys/kernel/debug/tracing", + tracefsEventsPath); } } return nullptr; @@ -440,5 +499,7 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy "configured in the target device tree."); } + setSuitablePerfEventMlockKbValue(cpuIds.size()); + return configuration; } diff --git a/daemon/linux/perf/PerfDriverConfiguration.h b/daemon/linux/perf/PerfDriverConfiguration.h index d12bc9ec..68787258 100644 --- a/daemon/linux/perf/PerfDriverConfiguration.h +++ b/daemon/linux/perf/PerfDriverConfiguration.h @@ -34,6 +34,7 @@ struct PerfDriverConfiguration { PerfConfig config {false, false, false, false, false, false, false, false, false, false, false, false, false}; static std::unique_ptr detect(bool systemWide, + const char * tracefsEventsPath, lib::Span cpuIds, const PmuXML & pmuXml); diff --git a/daemon/linux/perf/PerfEventGroup.cpp b/daemon/linux/perf/PerfEventGroup.cpp index 612401d0..19d944d5 100644 --- a/daemon/linux/perf/PerfEventGroup.cpp +++ b/daemon/linux/perf/PerfEventGroup.cpp @@ -386,10 +386,10 @@ std::pair PerfEventGroup::onlineCPU(int cpu, case PerfEventGroupIdentifier::Type::UNCORE_PMU: { groupLabel = uncorePmu->getCoreName(); const std::set cpuMask = perf_utils::readCpuMask(uncorePmu->getId()); - if ((!cpuMask.empty()) && (cpuMask.count(cpu) == 0)) { - return std::make_pair(OnlineResult::SUCCESS, ""); - } - else if (cpuMask.empty() && (cpu != 0)) { + const bool currentCpuNotInMask = ((!cpuMask.empty()) && (cpuMask.count(cpu) == 0)); + const bool maskIsEmptyAndCpuNotDefault = (cpuMask.empty() && (cpu != 0)); + if (currentCpuNotInMask || maskIsEmptyAndCpuNotDefault) { + // SKIP this core without marking an error return std::make_pair(OnlineResult::SUCCESS, ""); } break; diff --git a/daemon/linux/perf/PerfSource.cpp b/daemon/linux/perf/PerfSource.cpp index 1f9c9646..6ae9aed6 100644 --- a/daemon/linux/perf/PerfSource.cpp +++ b/daemon/linux/perf/PerfSource.cpp @@ -1,9 +1,11 @@ /* Copyright (C) 2010-2020 by Arm Limited. All rights reserved. */ +#define BUFFER_USE_SESSION_DATA #include "linux/perf/PerfSource.h" #include "Child.h" #include "DynBuf.h" +#include "FtraceDriver.h" #include "ICpuInfo.h" #include "Logging.h" #include "OlyUtility.h" @@ -55,6 +57,8 @@ PerfSource::PerfSource(PerfDriver & driver, bool enableOnCommandExec, ICpuInfo & cpuInfo) : mSummary(1024 * 1024, senderSem), + mMemoryBuffer(16 * 1024 * 1024, senderSem), + mPerfToMemoryBuffer(mMemoryBuffer, gSessionData.mOneShot), mCountersBuf(createPerfBufferConfig()), mCountersGroup(driver.getConfig(), mCountersBuf.getDataBufferLength(), @@ -66,7 +70,7 @@ PerfSource::PerfSource(PerfDriver & driver, !gSessionData.mIsEBS, cpuInfo.getClusters(), cpuInfo.getClusterIds(), - getTracepointId(SCHED_SWITCH)), + getTracepointId(driver.getTraceFsConstants(), SCHED_SWITCH)), mMonitor(), mUEvent(), mAppTids(std::move(appTids)), @@ -88,10 +92,23 @@ PerfSource::PerfSource(PerfDriver & driver, mAppTids.insert(getpid()); } + // allow self profiling +#if (defined(GATOR_SELF_PROFILE) && (GATOR_SELF_PROFILE != 0)) + const bool profileGator = true; +#else + const bool profileGator = (mAppTids.erase(0) != 0); // user can set --pid 0 to dynamically enable this feature +#endif + if (profileGator) { + // track child and parent process + mAppTids.insert(getpid()); + mAppTids.insert(getppid()); + } + // was !enableOnCommandExec but this causes us to miss the exec comm record associated with the // enable on exec doesn't work for cpu-wide events. + // additionally, when profiling gator, must be turned off this->enableOnCommandExec = (enableOnCommandExec && !mConfig.is_system_wide && mConfig.has_attr_clockid_support && - mConfig.has_attr_comm_exec); + mConfig.has_attr_comm_exec && !profileGator); } bool PerfSource::prepare() @@ -122,7 +139,8 @@ bool PerfSource::prepare() return false; } - if (mConfig.is_system_wide && (!mUEvent.init() || !mMonitor.add(mUEvent.getFd()))) { + // always try uevents, event as non-root, but continue if not supported + if (mUEvent.init() && !mMonitor.add(mUEvent.getFd())) { logg.logMessage("uevent setup failed"); return false; } @@ -227,6 +245,8 @@ static const char CPU_DEVPATH[] = "/devices/system/cpu/cpu"; void PerfSource::run(std::uint64_t monotonicStart, std::function endSession) { + prctl(PR_SET_NAME, reinterpret_cast(&"gatord-perf"), 0, 0, 0); + pthread_t procThread; ProcThreadArgs procThreadArgs; @@ -293,52 +313,89 @@ void PerfSource::run(std::uint64_t monotonicStart, std::function endSess // start profiling mProfilingStartedCallback(); - const uint64_t NO_RATE = ~0ULL; - const uint64_t rate = gSessionData.mLiveRate > 0 && gSessionData.mSampleRate > 0 ? gSessionData.mLiveRate : NO_RATE; - uint64_t nextTime = 0; - int timeout = rate != NO_RATE ? 0 : -1; - while (true) { - // +1 for uevents, +1 for pipe - std::vector events {mCpuInfo.getNumberOfCores() + 2}; - int ready = mMonitor.wait(events.data(), events.size(), timeout); + static constexpr uint64_t NO_RATE = ~0ULL; + const bool isLive = (gSessionData.mLiveRate > 0 && gSessionData.mSampleRate > 0); + const uint64_t rate = (isLive ? gSessionData.mLiveRate : NO_RATE); + int timeout = (rate != NO_RATE ? 0 : -1); + bool complete = false; + std::vector events; + while (!complete) { + // allocate enough space for all the FDs in the monitor + events.resize(std::min(2, mMonitor.size())); + + // wait for some events + const int ready = mMonitor.wait(events.data(), events.size(), timeout); if (ready < 0) { logg.logError("Monitor::wait failed"); handleException(); } - const uint64_t currTime = getTime() - monotonicStart; + const uint64_t currTimeMonotonicDelta = (getTime() - monotonicStart); + + // validate the events + bool hasCoreData = false; for (int i = 0; i < ready; ++i) { if (events[i].data.fd == mUEvent.getFd()) { - if (!handleUEvent(currTime)) { + if (!handleUEvent(currTimeMonotonicDelta)) { logg.logError("PerfSource::handleUEvent failed"); handleException(); } - break; } else if (events[i].data.fd == *mInterruptRead) { - goto exitOuterLoop; + complete = true; + break; + } + else { + // at least one core has overflowed its watermark + hasCoreData |= ((events[i].events & EPOLLIN) == EPOLLIN); + + // remove error or expired items + if (((events[i].events & EPOLLHUP) == EPOLLHUP) || ((events[i].events & EPOLLERR) == EPOLLERR)) { + mMonitor.remove(events[i].data.fd); + } } } + const bool liveTimedOut = (isLive && !hasCoreData); + // send a notification that data is ready - sem_post(&mSenderSem); + // in live mode, we flush the perf ring buffer periodically so that the UI can + // show data in a timely manner. + // when complete, perform one final flush, regardless of whether or not the + // watermark is met + // otherwise just flush when a buffer watermark notification happens + if (liveTimedOut || complete || hasCoreData) { + if (!mCountersBuf.send(mPerfToMemoryBuffer)) { + logg.logError("PerfBuffer::send failed"); + handleException(); + } - // In one shot mode, stop collection once all the buffers are filled - if (gSessionData.mOneShot && ((mSummary.bytesAvailable() <= 0) || (mAttrsBuffer->bytesAvailable() <= 0) || - (mProcBuffer->bytesAvailable() <= 0) || mCountersBuf.isFull())) { - logg.logMessage("One shot (perf)"); - endSession(); + if (isLive) { + mMemoryBuffer.flush(); + } } - if (rate != NO_RATE) { - while (currTime > nextTime) { - nextTime += rate; + // In one shot mode, stop collection once all the buffers are filled + if (!complete) { + if (gSessionData.mOneShot && ((mSummary.bytesAvailable() <= 0) || (mAttrsBuffer->bytesAvailable() <= 0) || + (mProcBuffer->bytesAvailable() <= 0) || mPerfToMemoryBuffer.isFull())) { + logg.logMessage("One shot (perf)"); + endSession(); + } + + if (rate != NO_RATE) { + const auto nowMonotonicDelta = (getTime() - monotonicStart); + const auto nextExpectedMonotonicDelta = ((currTimeMonotonicDelta + rate - 1) / rate) * rate; + const auto nowMonotonicDeltaRoundedToRate = ((nowMonotonicDelta + rate - 1) / rate) * rate; + const auto nextMonotonicDelta = + (nextExpectedMonotonicDelta > nowMonotonicDelta ? nextExpectedMonotonicDelta + : nowMonotonicDeltaRoundedToRate); + + // + NS_PER_MS - 1 to ensure always rounding up + timeout = std::max(0, ((nextMonotonicDelta + NS_PER_MS - 1) - nowMonotonicDelta) / NS_PER_MS); } - // + NS_PER_MS - 1 to ensure always rounding up - timeout = std::max(0, (nextTime + NS_PER_MS - 1 - getTime() + monotonicStart) / NS_PER_MS); } } -exitOuterLoop: if (onlineMonitorThread) { onlineMonitorThread->terminate(); @@ -346,13 +403,25 @@ void PerfSource::run(std::uint64_t monotonicStart, std::function endSess procThreadArgs.mIsDone = true; pthread_join(procThread, nullptr); + + // stop all the perf events mCountersGroup.stop(); + // send any final remaining data now that the events are stopped + if (!mCountersBuf.send(mPerfToMemoryBuffer)) { + logg.logError("PerfBuffer::send failed"); + handleException(); + } + // terminate all remaining sync threads if (mSyncThread != nullptr) { mSyncThread->terminate(); } + // close off the buffer + mMemoryBuffer.flush(); + mPerfToMemoryBuffer.setDone(); + mIsDone = true; // send a notification that data is ready @@ -462,10 +531,8 @@ bool PerfSource::write(ISender & sender) mSummary.write(sender); mAttrsBuffer->write(sender); mProcBuffer->write(sender); - if (!mCountersBuf.send(sender)) { - logg.logError("PerfBuffer::send failed"); - handleException(); - } + mPerfToMemoryBuffer.write(sender); + // This is racey, unless we assume no one posts reader sem before profiling started if (mSyncThread != nullptr) { mSyncThread->send(sender); diff --git a/daemon/linux/perf/PerfSource.h b/daemon/linux/perf/PerfSource.h index 92443fec..d8fccb9c 100644 --- a/daemon/linux/perf/PerfSource.h +++ b/daemon/linux/perf/PerfSource.h @@ -3,6 +3,7 @@ #ifndef PERFSOURCE_H #define PERFSOURCE_H +#include "Buffer.h" #include "Monitor.h" #include "Source.h" #include "SummaryBuffer.h" @@ -12,6 +13,7 @@ #include "linux/perf/PerfBuffer.h" #include "linux/perf/PerfGroups.h" #include "linux/perf/PerfSyncThreadBuffer.h" +#include "linux/perf/PerfToMemoryBuffer.h" #include #include @@ -45,6 +47,8 @@ class PerfSource : public PrimarySource { bool handleCpuOffline(uint64_t currTime, unsigned cpu); SummaryBuffer mSummary; + Buffer mMemoryBuffer; + PerfToMemoryBuffer mPerfToMemoryBuffer; PerfBuffer mCountersBuf; PerfGroups mCountersGroup; Monitor mMonitor; diff --git a/daemon/linux/perf/PerfToMemoryBuffer.cpp b/daemon/linux/perf/PerfToMemoryBuffer.cpp new file mode 100644 index 00000000..bf84ef37 --- /dev/null +++ b/daemon/linux/perf/PerfToMemoryBuffer.cpp @@ -0,0 +1,177 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#include "linux/perf/PerfToMemoryBuffer.h" + +#include "BufferUtils.h" +#include "ISender.h" +#include "lib/Assert.h" + +PerfToMemoryBuffer::PerfToMemoryBuffer(IRawFrameBuilderWithDirectAccess & builder, + IBufferControl & controller, + bool oneShot) + : builder(builder), controller(controller), bufferSem(), full(false), done(false), oneShot(oneShot) +{ + sem_init(&bufferSem, 0, 0); +} + +bool PerfToMemoryBuffer::waitFor(std::size_t bytes) +{ + while (std::size_t(builder.bytesAvailable()) <= bytes) { + if (oneShot || done) { + full = true; + return false; + } + builder.flush(); + sem_wait(&bufferSem); + } + return true; +} + +bool PerfToMemoryBuffer::isFull() const +{ + return full || controller.isFull(); +} + +void PerfToMemoryBuffer::setDone() +{ + controller.setDone(); + done = true; + sem_post(&bufferSem); +} + +bool PerfToMemoryBuffer::write(ISender & sender) +{ + const auto result = controller.write(sender); + sem_post(&bufferSem); + return result; +} + +void PerfToMemoryBuffer::consumePerfAuxRecord(int cpu, + std::uint64_t auxTailValue, + lib::Span recordChunks) +{ + static constexpr int MAX_HEADER_SIZE = buffer_utils::MAXSIZE_PACK32 // frame type + + buffer_utils::MAXSIZE_PACK32 // cpu + + buffer_utils::MAXSIZE_PACK64 // tail + + buffer_utils::MAXSIZE_PACK32; // size + static constexpr int MAX_FRAME_SIZE = ISender::MAX_RESPONSE_LENGTH - MAX_HEADER_SIZE; + + // skip if complete + if (full) { + return; + } + + for (auto & recordChunk : recordChunks) { + for (std::size_t offset = 0; offset < recordChunk.byteCount;) { + if (!waitFor(MAX_HEADER_SIZE)) { + return; + } + + const std::size_t bytesRemaining = recordChunk.byteCount - offset; + const int maxWriteLength = std::min(bytesRemaining, MAX_FRAME_SIZE); + const int actualWriteLength = std::min(maxWriteLength, builder.bytesAvailable() - MAX_HEADER_SIZE); + + if (actualWriteLength <= 0) { + runtime_assert(actualWriteLength == 0, "Negative write length???"); + continue; + } + + builder.beginFrame(FrameType::PERF_AUX); + builder.packInt(cpu); + builder.packInt64(auxTailValue); + builder.packInt(actualWriteLength); + builder.writeBytes(recordChunk.chunkPointer + offset, actualWriteLength); + builder.endFrame(); + + offset += actualWriteLength; + auxTailValue += actualWriteLength; + } + } +} + +void PerfToMemoryBuffer::consumePerfDataRecord(int cpu, lib::Span recordChunks) +{ + static constexpr int MAX_HEADER_SIZE = buffer_utils::MAXSIZE_PACK32 // frame type + + buffer_utils::MAXSIZE_PACK32 // cpu + + 4; // blob length + + // skip if complete + if (full) { + return; + } + + static_assert(sizeof(IPerfBufferConsumer::data_word_t) == 8, "Expected word size is 64-bit"); + + bool inFrame = false; + int lengthWriteIndex = 0; + std::uint32_t totalWrittenSinceFrameEnd = 0; + for (auto & recordChunk : recordChunks) { + const std::size_t totalWordCount = + recordChunk.firstChunk.wordCount + + (recordChunk.optionalSecondChunk.chunkPointer != nullptr ? recordChunk.optionalSecondChunk.wordCount : 0); + const std::size_t requiredBytesForRecord = totalWordCount * buffer_utils::MAXSIZE_PACK64; + + // are we in a frame, is there space to push another record? + if (inFrame) { + if (std::size_t(builder.bytesAvailable()) >= requiredBytesForRecord) { + // yes, append the frame data and continue + totalWrittenSinceFrameEnd += appendData(recordChunk); + continue; + } + else { + // no, just end the current frame + endDataFrame(lengthWriteIndex, totalWrittenSinceFrameEnd); + inFrame = false; + totalWrittenSinceFrameEnd = 0; + } + } + + const std::size_t totalRequiredBytes = MAX_HEADER_SIZE + requiredBytesForRecord; + if (!waitFor(totalRequiredBytes)) { + return; + } + + // write the header + builder.beginFrame(FrameType::PERF_DATA); + builder.packInt(cpu); + lengthWriteIndex = builder.getWriteIndex(); + builder.advanceWrite(4); // skip the length field for now + + // write the record + inFrame = true; + totalWrittenSinceFrameEnd = appendData(recordChunk); + } + + if (inFrame) { + endDataFrame(lengthWriteIndex, totalWrittenSinceFrameEnd); + } +} + +void PerfToMemoryBuffer::endDataFrame(int lengthWriteIndex, std::uint32_t totalWrittenSinceFrameEnd) +{ + const char lengthBuffer[4] = {char(totalWrittenSinceFrameEnd >> 0), + char(totalWrittenSinceFrameEnd >> 8), + char(totalWrittenSinceFrameEnd >> 16), + char(totalWrittenSinceFrameEnd >> 24)}; + + builder.writeDirect(lengthWriteIndex, lengthBuffer, 4); + builder.endFrame(); +} + +std::uint32_t PerfToMemoryBuffer::appendData(const DataRecordChunkTuple & recordChunk) +{ + return appendData(recordChunk.firstChunk) + appendData(recordChunk.optionalSecondChunk); +} + +std::uint32_t PerfToMemoryBuffer::appendData(const DataRecordChunk & recordChunk) +{ + std::uint32_t result = 0; + + if (recordChunk.chunkPointer != nullptr) { + for (std::size_t index = 0; index < recordChunk.wordCount; ++index) { + result += builder.packInt64(recordChunk.chunkPointer[index]); + } + } + + return result; +} diff --git a/daemon/linux/perf/PerfToMemoryBuffer.h b/daemon/linux/perf/PerfToMemoryBuffer.h new file mode 100644 index 00000000..75a9239b --- /dev/null +++ b/daemon/linux/perf/PerfToMemoryBuffer.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Buffer.h" +#include "IBufferControl.h" +#include "IRawFrameBuilder.h" +#include "linux/perf/IPerfBufferConsumer.h" + +#include +#include + +class PerfToMemoryBuffer : public IPerfBufferConsumer, public IBufferControl { +public: + PerfToMemoryBuffer(Buffer & buffer, bool oneShot) : PerfToMemoryBuffer(buffer, buffer, oneShot) {} + PerfToMemoryBuffer(IRawFrameBuilderWithDirectAccess & builder, IBufferControl & controller, bool oneShot); + + void consumePerfAuxRecord(int cpu, + std::uint64_t auxTailValue, + lib::Span recordChunks) override; + void consumePerfDataRecord(int cpu, lib::Span recordChunks) override; + + bool write(ISender & sender) override; + bool isFull() const override; + void setDone() override; + +private: + IRawFrameBuilderWithDirectAccess & builder; + IBufferControl & controller; + sem_t bufferSem; + std::atomic full; + std::atomic done; + bool oneShot; + + bool waitFor(std::size_t bytes); + void endDataFrame(int lengthWriteIndex, std::uint32_t totalWrittenSinceFrameEnd); + std::uint32_t appendData(const DataRecordChunkTuple & recordChunk); + std::uint32_t appendData(const DataRecordChunk & recordChunk); +}; diff --git a/daemon/linux/perf/PerfUtils.h b/daemon/linux/perf/PerfUtils.h index 2cfd3edf..dc4c7d7a 100644 --- a/daemon/linux/perf/PerfUtils.h +++ b/daemon/linux/perf/PerfUtils.h @@ -4,6 +4,7 @@ #define PERF_UTILS_H #include "lib/Format.h" +#include "lib/Optional.h" #include "lib/Utils.h" #include @@ -15,6 +16,18 @@ namespace perf_utils { std::string path = lib::Format() << "/sys/bus/event_source/devices/" << pmncName << "/cpumask"; return lib::readCpuMaskFromFile(path.c_str()); } + + inline lib::Optional readPerfEventMlockKb() + { + std::int64_t perfEventMlockKb = 0; + + if (lib::readInt64FromFile("/proc/sys/kernel/perf_event_mlock_kb", perfEventMlockKb) == 0) { + return lib::Optional(perfEventMlockKb); + } + else { + return lib::Optional(); + } + } } #endif // PERF_UTILS_H diff --git a/daemon/linux/proc/ProcessChildren.cpp b/daemon/linux/proc/ProcessChildren.cpp index a71fc5b4..cc600664 100644 --- a/daemon/linux/proc/ProcessChildren.cpp +++ b/daemon/linux/proc/ProcessChildren.cpp @@ -38,7 +38,7 @@ namespace lnx { while ((taskEntry = readdir(taskDir.get())) != nullptr) { // no point recursing if we're relying on the fall back if (std::strcmp(taskEntry->d_name, ".") != 0 && std::strcmp(taskEntry->d_name, "..") != 0) { - const int child = atoi(taskEntry->d_name); + const auto child = std::strtol(taskEntry->d_name, nullptr, 10); if (child > 0) { tids.insert(child); } diff --git a/daemon/main.cpp b/daemon/main.cpp index c9e1c9f8..d12fe7db 100644 --- a/daemon/main.cpp +++ b/daemon/main.cpp @@ -5,6 +5,7 @@ #include "ConfigurationXML.h" #include "CounterXML.h" #include "Drivers.h" +#include "ExitStatus.h" #include "GatorCLIParser.h" #include "ICpuInfo.h" #include "Logging.h" @@ -17,6 +18,8 @@ #include "lib/FileDescriptor.h" #include "lib/Memory.h" #include "lib/Utils.h" +#include "xml/CurrentConfigXML.h" +#include "linux/perf/PerfUtils.h" #include "xml/EventsXML.h" #include "xml/PmuXMLParser.h" @@ -97,6 +100,11 @@ static StateAndPid handleSigchld(StateAndPid currentStateAndChildPid, Drivers & if (WIFEXITED(status)) { exitStatus = WEXITSTATUS(status); logg.logMessage("Child process %d terminated normally with status %d", pid, exitStatus); + if (exitStatus == OK_TO_EXIT_GATOR_EXIT_CODE) { + logg.logMessage("Received EXIT_OK command. exiting gatord"); + cleanUp(); + exit(0); + } } else { assert(WIFSIGNALED(status)); @@ -287,6 +295,16 @@ namespace { logg.logMessage("INVESTIGATE: Received unknown command type COMMAND_PING"); return State::PROCESS_COMMANDS; } + State handleExit() override + { + logg.logMessage("INVESTIGATE: Received unknown command type COMMAND_EXIT_OK"); + return State::EXIT_OK; + } + State handleRequestCurrentConfig() override + { + logg.logMessage("INVESTIGATE: Received unknown command type COMMAND_REQUEST_CURRENT_CONFIG"); + return State::PROCESS_COMMANDS_CONFIG; + } }; /** @@ -309,7 +327,19 @@ namespace { // Wait to receive a single command StreamlineCommandHandler commandHandler; const auto result = streamlineSetupCommandIteration(client, commandHandler, [](bool) -> void {}); - if (result != IStreamlineCommandHandler::State::EXIT_DISCONNECT) { + + if (result == IStreamlineCommandHandler::State::PROCESS_COMMANDS_CONFIG) { + auto currentConfigXML = + current_config_xml::generateCurrentConfigXML(getpid(), // since its main get the pid, instead of ppid + getuid(), + gSessionData.mSystemWide, + gSessionData.mWaitingOnCommand, + gSessionData.mWaitForProcessCommand, + gSessionData.mCaptureWorkingDir, + gSessionData.mPids); + sender.writeData(currentConfigXML.data(), currentConfigXML.size(), ResponseType::CURRENT_CONFIG, true); + } + else if (result != IStreamlineCommandHandler::State::EXIT_DISCONNECT) { // the expectation is that the user sends COMMAND_DISCONNECT, so anything else is an error logg.logError("Session already in progress"); sender.writeData(logg.getLastError(), strlen(logg.getLastError()), ResponseType::ERROR, true); @@ -464,27 +494,6 @@ void updateSessionData(const ParserResult & result) gSessionData.mPerfMmapSizeInPages = result.mPerfMmapSizeInPages; gSessionData.mSpeSampleRate = result.mSpeSampleRate; - // use value from perf_event_mlock_kb - if ((gSessionData.mPerfMmapSizeInPages <= 0) && (geteuid() != 0) && (gSessionData.mPageSize >= 1024)) { - std::int64_t perfEventMlockKb = 0; - if (lib::readInt64FromFile("/proc/sys/kernel/perf_event_mlock_kb", perfEventMlockKb) == 0) { - if (perfEventMlockKb > 0) { - const std::uint64_t perfEventMlockPages = (perfEventMlockKb / (gSessionData.mPageSize / 1024)); - gSessionData.mPerfMmapSizeInPages = int(std::min(perfEventMlockPages - 1, INT_MAX)); - logg.logMessage("Default perf mmap size set to %d pages (%llukb)", - gSessionData.mPerfMmapSizeInPages, - gSessionData.mPerfMmapSizeInPages * gSessionData.mPageSize / 1024ULL); - } - } - else { - // the default seen on most setups is 516kb, if user cannot read the file it is probably - // because they are on Android in locked down setup so use default value of 128 pages - gSessionData.mPerfMmapSizeInPages = 128; - logg.logMessage("Default perf mmap size set to %d pages (%llukb)", - gSessionData.mPerfMmapSizeInPages, - gSessionData.mPerfMmapSizeInPages * gSessionData.mPageSize / 1024ULL); - } - } //These values are set from command line and are alos part of session.xml //and hence cannot be modified during parse session if ((result.parameterSetFlag & USE_CMDLINE_ARG_SAMPLE_RATE) != 0) { @@ -507,6 +516,32 @@ void updateSessionData(const ParserResult & result) } } +void updatePerfMmapSize() +{ + // use value from perf_event_mlock_kb + if ((gSessionData.mPerfMmapSizeInPages <= 0) && (geteuid() != 0) && (gSessionData.mPageSize >= 1024)) { + + // the default seen on most setups is 516kb, if user cannot read the file it is probably + // because they are on Android in locked down setup so use default value of 128 pages + gSessionData.mPerfMmapSizeInPages = 128; + + const lib::Optional perfEventMlockKb = perf_utils::readPerfEventMlockKb(); + + if (perfEventMlockKb.valid() && perfEventMlockKb.get() > 0) { + const int perfMmapSizeInPages = lib::calculatePerfMmapSizeInPages(std::uint64_t(perfEventMlockKb.get()), + std::uint64_t(gSessionData.mPageSize)); + + if (perfMmapSizeInPages > 0) { + gSessionData.mPerfMmapSizeInPages = perfMmapSizeInPages; + } + } + + logg.logMessage("Default perf mmap size set to %d pages (%llukb)", + gSessionData.mPerfMmapSizeInPages, + gSessionData.mPerfMmapSizeInPages * gSessionData.mPageSize / 1024ULL); + } +} + // Gator data flow: collector -> collector fifo -> sender int main(int argc, char ** argv) { @@ -586,13 +621,14 @@ int main(int argc, char ** argv) if (result.mode == ParserResult::ExecutionMode::EXIT) { handleException(); } + updateSessionData(result); - PmuXML pmuXml = readPmuXml(result.pmuPath); // detect the primary source // Call before setting up the SIGCHLD handler, as system() spawns child processes + Drivers drivers {result.mSystemWide, readPmuXml(result.pmuPath), result.mDisableCpuOnlining, TraceFsConstants::detect()}; - Drivers drivers {result.mSystemWide, std::move(pmuXml), result.mDisableCpuOnlining}; + updatePerfMmapSize(); if (result.mode == ParserResult::ExecutionMode::PRINT) { if (result.printables.count(ParserResult::Printable::EVENTS_XML) == 1) { @@ -674,6 +710,17 @@ int main(int argc, char ** argv) } } + // This line has to be printed because Streamline needs to detect when + // gator is ready to listen and accept socket connections via adb forwarding. Without this + // print out there is a chance that Streamline establishes a connection to the adb forwarder, + // but the forwarder cannot establish a connection to a gator, because gator is not up and listening + // for sockets yet. If the adb forwarder cannot establish a connection to gator, what streamline + // experiences is a successful socket connection, but when it attempts to read from the socket + // it reads an empty line when attempting to read the gator protocol header, and terminates the + // connection. + std::cout << "Gator ready" << std::endl; + std::cout.flush(); + // Forever loop, can be exited via a signal or exception while (1) { struct epoll_event events[3]; diff --git a/daemon/mali_userspace/MaliDevice.cpp b/daemon/mali_userspace/MaliDevice.cpp index f1fba146..b26cf159 100644 --- a/daemon/mali_userspace/MaliDevice.cpp +++ b/daemon/mali_userspace/MaliDevice.cpp @@ -2,6 +2,7 @@ #include "mali_userspace/MaliDevice.h" +#include "GetEventKey.h" #include "Logging.h" #include "lib/Assert.h" #include "mali_userspace/MaliHwCntrNames.h" @@ -12,6 +13,23 @@ #include namespace mali_userspace { + + static const Constant maliBusWidthBits = Constant(getEventKey(), + "ARM_Mali-CONST_BUS_WIDTH_BITS", + "Mali Constants", + "Bus Width Bits", + ConstantMode::PerCore); + static const Constant maliCacheSliceCount = Constant(getEventKey(), + "ARM_Mali-CONST_L2_SLICE_COUNT", + "Mali Constants", + "L2 Slice Count", + ConstantMode::PerCore); + static const Constant maliShaderCoreCount = Constant(getEventKey(), + "ARM_Mali-CONST_SHADER_CORE_COUNT", + "Mali Constants", + "Shader Core Count", + ConstantMode::PerCore); + enum class MaliCounterBlockName : uint32_t { JM = 0, TILER = 1, @@ -635,4 +653,18 @@ namespace mali_userspace { } } } + + void MaliDevice::insertConstants(std::set & dest) + { + dest.insert(maliBusWidthBits); + dest.insert(maliCacheSliceCount); + dest.insert(maliShaderCoreCount); + }; + + std::map MaliDevice::getConstantValues() const + { + return {{maliBusWidthBits.getKey(), deviceApi->getExternalBusWidth()}, + {maliCacheSliceCount.getKey(), deviceApi->getNumberOfL2Slices()}, + {maliShaderCoreCount.getKey(), deviceApi->getNumberOfUsableShaderCores()}}; + }; } diff --git a/daemon/mali_userspace/MaliDevice.h b/daemon/mali_userspace/MaliDevice.h index c1b6425b..99329d62 100644 --- a/daemon/mali_userspace/MaliDevice.h +++ b/daemon/mali_userspace/MaliDevice.h @@ -3,6 +3,7 @@ #ifndef NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIDEVICE_H_ #define NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIDEVICE_H_ +#include "Constant.h" #include "IBlockCounterFrameBuilder.h" #include "lib/AutoClosingFd.h" #include "mali_userspace/MaliDeviceApi.h" @@ -10,7 +11,9 @@ #include #include #include +#include #include +#include #include namespace mali_userspace { @@ -223,6 +226,10 @@ namespace mali_userspace { std::uint32_t mmuL2Bitmask, bool & failedDueToBufferCount) const; + static void insertConstants(std::set & dest); + + std::map getConstantValues() const; + private: /** Init a block in the enable list */ static void initCounterList(uint32_t gpuId, diff --git a/daemon/mali_userspace/MaliDeviceApi.cpp b/daemon/mali_userspace/MaliDeviceApi.cpp index 7fcc5886..3cc26c0c 100644 --- a/daemon/mali_userspace/MaliDeviceApi.cpp +++ b/daemon/mali_userspace/MaliDeviceApi.cpp @@ -10,6 +10,7 @@ #include "mali_userspace/MaliDeviceApi_DdkDefines.h" #include +#include #include #include #include @@ -50,6 +51,7 @@ namespace mali_userspace { uint32_t minor, uint32_t frequency, uint32_t l2Slices, + uint32_t busWidth, uint64_t shaderCoreMask) { const char * const productName = findMaliProductNameFromId(productId); @@ -69,13 +71,16 @@ namespace mali_userspace { if (frequency > 0) { formatter << " clocked at " << frequency << "MHz"; } - formatter << ", " << l2Slices << " L2 Slices, " << shaderCores << " Shader Cores"; } else { formatter << " but it is not recognized (id: 0x" << std::hex << productId << std::dec << " r" << major - << "p" << minor << ", " << l2Slices << " L2 slices, " << shaderCores << " Shader Cores"; + << "p" << minor; } + formatter << ", " << l2Slices << " L2 Slices, "; + formatter << busWidth << "-bit Bus, "; + formatter << shaderCores << " Shader Cores"; + if (shaderCoreMask != ((1ull << shaderCores) - 1)) { formatter << " (sparse layout, mask is 0x" << std::hex << shaderCoreMask << std::dec << ")"; } @@ -89,6 +94,17 @@ namespace mali_userspace { logg.logSetup("%s", std::string(formatter).c_str()); } + + static uint32_t extractBusWidth(uint32_t raw_l2_features) + { + uint32_t log2_bus_width = raw_l2_features >> 24; + + // If the log2 is >31 then the exp2 of it will not fit in our 32-bit result + runtime_assert(log2_bus_width <= 31, "Unexpectedly large bus width value"); + + // The value is log2 of the real value, so use a bitshift to invert that + return (1u << log2_bus_width); + } } /** @@ -122,7 +138,8 @@ namespace mali_userspace { numberOfL2Slices(props.props.l2_props.num_l2_slices), gpuId(props.props.core_props.product_id), hwVersion((uint32_t(props.props.core_props.major_revision) << 16) | - props.props.core_props.minor_revision) + props.props.core_props.minor_revision), + busWidth(extractBusWidth(props.props.raw_props.l2_features)) { logDetectedMaliDevice(maliDevicePath, props.props.core_props.product_id, @@ -130,6 +147,7 @@ namespace mali_userspace { props.props.core_props.minor_revision, props.props.core_props.gpu_speed_mhz, props.props.l2_props.num_l2_slices, + busWidth, shaderCoreAvailabilityMask); } @@ -191,6 +209,8 @@ namespace mali_userspace { virtual uint32_t getHwVersion() const override { return hwVersion; } + virtual uint32_t getExternalBusWidth() const override { return busWidth; } + private: static uint64_t calcShaderCoreMask(const kbase_uk_gpuprops & props) { @@ -206,6 +226,7 @@ namespace mali_userspace { const uint32_t numberOfL2Slices; const uint32_t gpuId; const uint32_t hwVersion; + const uint32_t busWidth; }; std::unique_ptr probe(const char * maliDevicePath, lib::AutoClosingFd devFd) @@ -336,15 +357,15 @@ namespace mali_userspace { * @param size * @return The useful decoded fields */ - static gpu_propeties decodeProperties(uint8_t * buffer, int size) + static gpu_properties decodeProperties(uint8_t * buffer, int size) { - gpu_propeties result {}; + gpu_properties result {}; for (int pos = 0; pos < size;) { const uint32_t token = readU32(buffer, pos, size); const auto key = KBaseGpuPropKey(token >> 2); - const auto value_type = KBaseGpuPropValueSize(token & 3); - const uint64_t value = readValue(value_type, buffer, pos, size); + const auto value_size = KBaseGpuPropValueSize(token & 3); + const uint64_t value = readValue(value_size, buffer, pos, size); switch (key) { case KBaseGpuPropKey::PRODUCT_ID: @@ -356,6 +377,10 @@ namespace mali_userspace { case KBaseGpuPropKey::MAJOR_REVISION: result.major_revision = value; break; + case KBaseGpuPropKey::RAW_L2_FEATURES: + runtime_assert(value_size == KBaseGpuPropValueSize::U32, "Unexpected L2 features size"); + result.bus_width = extractBusWidth(value); + break; case KBaseGpuPropKey::COHERENCY_NUM_CORE_GROUPS: runtime_assert(value <= BASE_MAX_COHERENT_GROUPS, "Too many core groups"); result.num_core_groups = value; @@ -424,12 +449,13 @@ namespace mali_userspace { */ class MaliDeviceApi final : public IMaliDeviceApi { public: - MaliDeviceApi(const char * maliDevicePath, lib::AutoClosingFd devFd, const gpu_propeties & props) + MaliDeviceApi(const char * maliDevicePath, lib::AutoClosingFd devFd, const gpu_properties & props) : devFd(std::move(devFd)), shaderCoreAvailabilityMask(calcShaderCoreMask(props)), numberOfL2Slices(props.num_l2_slices), gpuId(props.product_id), - hwVersion((uint32_t(props.major_revision) << 16) | props.minor_revision) + hwVersion((uint32_t(props.major_revision) << 16) | props.minor_revision), + busWidth(props.bus_width) { logDetectedMaliDevice(maliDevicePath, props.product_id, @@ -437,6 +463,7 @@ namespace mali_userspace { props.minor_revision, 0, props.num_l2_slices, + busWidth, shaderCoreAvailabilityMask); } @@ -484,8 +511,10 @@ namespace mali_userspace { virtual uint32_t getHwVersion() const override { return hwVersion; } + virtual uint32_t getExternalBusWidth() const override { return busWidth; } + private: - static uint64_t calcShaderCoreMask(const gpu_propeties & props) + static uint64_t calcShaderCoreMask(const gpu_properties & props) { uint64_t core_mask = 0; for (uint32_t i = 0; i < props.num_core_groups; i++) { @@ -499,6 +528,7 @@ namespace mali_userspace { const uint32_t numberOfL2Slices; const uint32_t gpuId; const uint32_t hwVersion; + const uint32_t busWidth; }; std::unique_ptr probe(const char * maliDevicePath, lib::AutoClosingFd devFd) @@ -514,7 +544,7 @@ namespace mali_userspace { logg.logMessage("MaliDeviceApi: Failed setting ABI version ioctl"); return {}; } - else if (version_check.major < 11) { + else if ((version_check.major != 1) && (version_check.major != 11)) { logg.logMessage("MaliDeviceApi: Unsupported ABI version %u.%u", version_check.major, version_check.minor); @@ -559,7 +589,7 @@ namespace mali_userspace { // decode the properties data { - const gpu_propeties properties = decodeProperties(buffer.get(), size); + const gpu_properties properties = decodeProperties(buffer.get(), size); return std::unique_ptr { new MaliDeviceApi(maliDevicePath, std::move(devFd), properties)}; } diff --git a/daemon/mali_userspace/MaliDeviceApi.h b/daemon/mali_userspace/MaliDeviceApi.h index 2cf12bd0..b7dcc0b3 100644 --- a/daemon/mali_userspace/MaliDeviceApi.h +++ b/daemon/mali_userspace/MaliDeviceApi.h @@ -59,6 +59,8 @@ namespace mali_userspace { virtual std::uint32_t getGpuId() const = 0; /** @return The hardware version of the device */ virtual std::uint32_t getHwVersion() const = 0; + /** @return The cache's external data bus size */ + virtual std::uint32_t getExternalBusWidth() const = 0; }; } diff --git a/daemon/mali_userspace/MaliDeviceApi_DdkDefines.h b/daemon/mali_userspace/MaliDeviceApi_DdkDefines.h index eb7ce214..12c546db 100644 --- a/daemon/mali_userspace/MaliDeviceApi_DdkDefines.h +++ b/daemon/mali_userspace/MaliDeviceApi_DdkDefines.h @@ -194,13 +194,14 @@ namespace mali_userspace { static constexpr std::size_t BASE_MAX_COHERENT_GROUPS = 16; /** GPU properties decoded from data blob */ - struct gpu_propeties { + struct gpu_properties { uint32_t product_id; uint32_t minor_revision; uint32_t major_revision; uint32_t num_core_groups; uint32_t num_l2_slices; uint32_t core_mask[BASE_MAX_COHERENT_GROUPS]; + uint32_t bus_width; }; /** Identify the size of a gpuprop value */ @@ -211,6 +212,7 @@ namespace mali_userspace { PRODUCT_ID = 1, MINOR_REVISION = 3, MAJOR_REVISION = 4, + RAW_L2_FEATURES = 29, COHERENCY_NUM_CORE_GROUPS = 62, COHERENCY_GROUP_0 = 64, COHERENCY_GROUP_1 = 65, diff --git a/daemon/mali_userspace/MaliHwCntrDriver.cpp b/daemon/mali_userspace/MaliHwCntrDriver.cpp index faf1fa3a..58cb800b 100644 --- a/daemon/mali_userspace/MaliHwCntrDriver.cpp +++ b/daemon/mali_userspace/MaliHwCntrDriver.cpp @@ -117,6 +117,8 @@ namespace mali_userspace { counter.setKey(malihwcCounter->getKey()); } + void MaliHwCntrDriver::insertConstants(std::set & dest) { MaliDevice::insertConstants(dest); } + int MaliHwCntrDriver::getCounterKey(uint32_t nameBlockIndex, uint32_t counterIndex, uint32_t gpuId) const { if (counterIndex < MaliDevice::NUM_COUNTERS_PER_BLOCK) { diff --git a/daemon/mali_userspace/MaliHwCntrDriver.h b/daemon/mali_userspace/MaliHwCntrDriver.h index 91836deb..c0f86984 100644 --- a/daemon/mali_userspace/MaliHwCntrDriver.h +++ b/daemon/mali_userspace/MaliHwCntrDriver.h @@ -4,13 +4,14 @@ #define NATIVE_GATOR_DAEMON_MIDGARDHWCOUNTERDRIVER_H_ #include "PolledDriver.h" +#include "SessionData.h" #include "SimpleDriver.h" #include "lib/Optional.h" #include "mali_userspace/MaliHwCntrReader.h" #include #include -#include +#include namespace mali_userspace { /** * Implements a counter driver for all Mali Midgard devices with r8p0 or later driver installed. @@ -34,6 +35,7 @@ namespace mali_userspace { inline const std::map> & getDevices() const { return mDevices; } + void insertConstants(std::set & dest) override; int getCounterKey(uint32_t nameBlockIndex, uint32_t counterIndex, uint32_t gpuId) const; const char * getSupportedDeviceFamilyName() const; diff --git a/daemon/mali_userspace/MaliHwCntrSource.cpp b/daemon/mali_userspace/MaliHwCntrSource.cpp index 2bd62045..f598009b 100644 --- a/daemon/mali_userspace/MaliHwCntrSource.cpp +++ b/daemon/mali_userspace/MaliHwCntrSource.cpp @@ -66,7 +66,8 @@ namespace mali_userspace { std::move(frameBuilder), deviceNumber, *this, - readerRef)); + readerRef, + device.getConstantValues())); tasks.push_back(std::move(task)); } } diff --git a/daemon/mali_userspace/MaliHwCntrTask.cpp b/daemon/mali_userspace/MaliHwCntrTask.cpp index dfcf9697..b8f0a26d 100644 --- a/daemon/mali_userspace/MaliHwCntrTask.cpp +++ b/daemon/mali_userspace/MaliHwCntrTask.cpp @@ -16,12 +16,14 @@ namespace mali_userspace { std::unique_ptr frameBuilder, std::int32_t deviceNumber, IMaliDeviceCounterDumpCallback & callback_, - IMaliHwCntrReader & reader) + IMaliHwCntrReader & reader, + const std::map & constantValues) : mBuffer(std::move(buffer)), mFrameBuilder(std::move(frameBuilder)), mCallback(callback_), mReader(reader), - deviceNumber(deviceNumber) + deviceNumber(deviceNumber), + mConstantValues(constantValues) { } @@ -35,10 +37,21 @@ namespace mali_userspace { const uint32_t sampleIntervalNs = (sampleRate > 0 ? (sampleRate < 1000000000 ? (1000000000U / sampleRate) : 1U) : 10000000U); + if (mConstantValues.size() > 0) { + bool wroteConstants = writeConstants(); + if (!wroteConstants) { + logg.logError("Failed to send constants for device %d", deviceNumber); + mFrameBuilder->flush(); + mBuffer->setDone(); + return; + } + } + if (!mReader.startPeriodicSampling(sampleIntervalNs)) { logg.logError("Could not enable periodic sampling"); terminated = true; } + // create the list of enabled counters const MaliDeviceCounterList countersList(mReader.getDevice().createCounterList(mCallback)); while (!terminated) { @@ -81,8 +94,28 @@ namespace mali_userspace { if (!mReader.startPeriodicSampling(0)) { logg.logError("Could not disable periodic sampling"); } + + mFrameBuilder->flush(); mBuffer->setDone(); } bool MaliHwCntrTask::write(ISender & sender) { return mBuffer->write(sender); } + + bool MaliHwCntrTask::writeConstants() + { + constexpr uint64_t constantsTimestamp = 0; + if (mFrameBuilder->eventHeader(constantsTimestamp) && mFrameBuilder->eventCore(deviceNumber)) { + for (const auto & pair : mConstantValues) { + const auto & keyOfConstant = pair.first; + const int64_t value = pair.second; + + if (!mFrameBuilder->event64(keyOfConstant, value)) { + return false; + } + } + mFrameBuilder->flush(); + return true; + } + return false; + } } diff --git a/daemon/mali_userspace/MaliHwCntrTask.h b/daemon/mali_userspace/MaliHwCntrTask.h index 876081a3..918dc57b 100644 --- a/daemon/mali_userspace/MaliHwCntrTask.h +++ b/daemon/mali_userspace/MaliHwCntrTask.h @@ -24,7 +24,8 @@ namespace mali_userspace { std::unique_ptr frameBuilder, std::int32_t deviceNumber, IMaliDeviceCounterDumpCallback & callback, - IMaliHwCntrReader & reader); + IMaliHwCntrReader & reader, + const std::map & constants); void execute(int sampleRate, bool isOneShot, std::uint64_t monotonicStart, std::function endSession); bool write(ISender & sender); @@ -34,12 +35,15 @@ namespace mali_userspace { IMaliDeviceCounterDumpCallback & mCallback; IMaliHwCntrReader & mReader; std::int32_t deviceNumber; + const std::map mConstantValues; // Intentionally unimplemented MaliHwCntrTask(const MaliHwCntrTask &) = delete; MaliHwCntrTask & operator=(const MaliHwCntrTask &) = delete; MaliHwCntrTask(MaliHwCntrTask &&) = delete; MaliHwCntrTask & operator=(MaliHwCntrTask &&) = delete; + + bool writeConstants(); }; } diff --git a/daemon/mali_userspace/MaliInstanceLocator.cpp b/daemon/mali_userspace/MaliInstanceLocator.cpp index 7bda92ac..afd63d67 100644 --- a/daemon/mali_userspace/MaliInstanceLocator.cpp +++ b/daemon/mali_userspace/MaliInstanceLocator.cpp @@ -44,6 +44,7 @@ namespace mali_userspace { if (childStats.type() == lib::FsEntry::Type::DIR) { // check name is 'mali#' unsigned int id = 0; + // NOLINTNEXTLINE(cert-err34-c) if (dirIsCalledMisc && sscanf(childEntry->name().c_str(), "mali%u", &id) == 1) { // don't repeat your self if (gpuClockPaths.count(id) > 0) { diff --git a/daemon/non_root/NonRootSource.cpp b/daemon/non_root/NonRootSource.cpp index d11cec88..1aed3a19 100644 --- a/daemon/non_root/NonRootSource.cpp +++ b/daemon/non_root/NonRootSource.cpp @@ -60,7 +60,7 @@ namespace non_root { // process related stuff BlockCounterFrameBuilder processCounterBuilder {mProcessCounterBuffer, gSessionData.mLiveRate}; - BlockCounterMessageConsumer processCounterConsumer {globalCounterBuilder}; + BlockCounterMessageConsumer processCounterConsumer {processCounterBuilder}; ProcessStateChangeHandler processChangeHandler(processCounterConsumer, mMiscBuffer, mSwitchBuffers, @@ -95,6 +95,9 @@ namespace non_root { usleep(sleepUs); } + processCounterBuilder.flush(); + globalCounterBuilder.flush(); + mGlobalCounterBuffer.setDone(); mProcessCounterBuffer.setDone(); mMiscBuffer.setDone(); diff --git a/daemon/pmus.xml b/daemon/pmus.xml index 43e066e1..3762b469 100644 --- a/daemon/pmus.xml +++ b/daemon/pmus.xml @@ -143,6 +143,7 @@ + diff --git a/daemon/xml/CurrentConfigXML.cpp b/daemon/xml/CurrentConfigXML.cpp new file mode 100644 index 00000000..e0e5af7e --- /dev/null +++ b/daemon/xml/CurrentConfigXML.cpp @@ -0,0 +1,69 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ + +#include "CurrentConfigXML.h" + +#include "MxmlUtils.h" + +namespace current_config_xml { + static constexpr char TAG_CURRENT_CONFIG[] = "current_config"; + static constexpr char TAG_PIDS_TO_CAPTURE[] = "pids_to_capture"; + static constexpr char TAG_STATE[] = "state"; + + static constexpr char PID[] = "pid"; + static constexpr char ATTR_UID[] = "uid"; + static constexpr char ATTR_VALUE[] = "value"; + static constexpr char ATTR_SYSTEM_WIDE[] = "is_system_wide"; + static constexpr char ATTR_WAITING_ON_CMD[] = "is_waiting_on_command"; + static constexpr char ATTR_WAIT_FOR_PROCESS[] = "wait_for_process"; + static constexpr char ATTR_CAPTURE_WORKING_DIR[] = "capture_working_directory"; + + std::string generateCurrentConfigXML(std::int32_t pid, + std::uint32_t uid, + bool isSystemWide, + bool isWaitingOnCommand, + const char * waitForProcessCommand, + const char * captureWorkingDir, + std::set & pidsToCapture) + { + // Construct current config XML + mxml_unique_ptr currentConfigNode {makeMxmlUniquePtr(mxmlNewElement(MXML_NO_PARENT, TAG_CURRENT_CONFIG))}; + + mxmlElementSetAttrf(currentConfigNode.get(), PID, "%i", pid); + mxmlElementSetAttrf(currentConfigNode.get(), ATTR_UID, "%u", uid); + + mxml_unique_ptr stateNode {makeMxmlUniquePtr(mxmlNewElement(currentConfigNode.get(), TAG_STATE))}; + if (isSystemWide) { + mxmlElementSetAttr(stateNode.get(), ATTR_SYSTEM_WIDE, "yes"); + } + else { + mxmlElementSetAttr(stateNode.get(), ATTR_SYSTEM_WIDE, "no"); + } + + if (isWaitingOnCommand) { + mxmlElementSetAttr(stateNode.get(), ATTR_WAITING_ON_CMD, "yes"); + } + else { + mxmlElementSetAttr(stateNode.get(), ATTR_WAITING_ON_CMD, "no"); + } + + if (waitForProcessCommand != nullptr) { + mxmlElementSetAttr(stateNode.get(), ATTR_WAIT_FOR_PROCESS, waitForProcessCommand); + } + + if (captureWorkingDir != nullptr) { + mxmlElementSetAttr(stateNode.get(), ATTR_CAPTURE_WORKING_DIR, captureWorkingDir); + } + + // Loop through the pids to capture and add to XML + mxml_unique_ptr pidsToCapNode {makeMxmlUniquePtr(nullptr)}; + if (!pidsToCapture.empty()) { + pidsToCapNode.reset(mxmlNewElement(currentConfigNode.get(), TAG_PIDS_TO_CAPTURE)); + for (int pid : pidsToCapture) { + auto pidToCap = mxmlNewElement(pidsToCapNode.get(), PID); + mxmlElementSetAttrf(pidToCap, ATTR_VALUE, "%i", pid); + } + } + + return mxmlSaveAsStdString(currentConfigNode.get(), mxmlWhitespaceCB); + } +}; \ No newline at end of file diff --git a/daemon/xml/CurrentConfigXML.h b/daemon/xml/CurrentConfigXML.h new file mode 100644 index 00000000..6467eea5 --- /dev/null +++ b/daemon/xml/CurrentConfigXML.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2020 by Arm Limited. All rights reserved. */ +#pragma once + +#include +#include + +namespace current_config_xml { + + /** + * Generates the current config XML of gatord. + * Not to be confused with configuration.xml. This XML is sent + * directly to Streamline to inform it of gatord's current configuration + * so it can determine whether gatord should be killed. + * + * @param gatorMainPid the pid of gator-main not child + * @param uid + * @param isSystemWide + * @param isWaitingOnCommand + * @param waitForProcessCommand is the command used with -Q + * @param captureWorkingDir + * @param pidsToCapture what PIDs have been specified to profile + * @returns XML string + */ + std::string generateCurrentConfigXML(std::int32_t gatorMainPid, + std::uint32_t uid, + bool isSystemWide, + bool isWaitingOnCommand, + const char * waitForProcessCommand, + const char * captureWorkingDir, + std::set & pidsToCapture); +}; \ No newline at end of file diff --git a/daemon/xml/EventsXML.cpp b/daemon/xml/EventsXML.cpp index e064ffb6..344aa05a 100644 --- a/daemon/xml/EventsXML.cpp +++ b/daemon/xml/EventsXML.cpp @@ -88,10 +88,10 @@ namespace events_xml { return {mxmlSaveAllocString(xml.get(), mxmlWhitespaceCB), &free}; } - std::map getCounterToEventMap(lib::Span drivers, - lib::Span clusters) + std::map getCounterToEventMap(lib::Span drivers, + lib::Span clusters) { - std::map counterToEventMap {}; + std::map counterToEventMap {}; auto xml = events_xml::getDynamicTree(drivers, clusters); @@ -109,11 +109,11 @@ namespace events_xml { } if (event != nullptr) { - const int eventNo = (int) strtol(event, nullptr, 0); - counterToEventMap[counter] = eventNo; + const auto eventNo = strtoull(event, nullptr, 0); + counterToEventMap[counter] = EventCode(eventNo); } else { - counterToEventMap[counter] = -1; + counterToEventMap[counter] = EventCode(); } } return counterToEventMap; diff --git a/daemon/xml/EventsXML.h b/daemon/xml/EventsXML.h index a2184133..291582fb 100644 --- a/daemon/xml/EventsXML.h +++ b/daemon/xml/EventsXML.h @@ -3,6 +3,7 @@ #ifndef EVENTS_XML_H #define EVENTS_XML_H +#include "EventCode.h" #include "lib/Span.h" #include "mxml/mxml.h" @@ -21,8 +22,8 @@ namespace events_xml { std::unique_ptr getDynamicXML(lib::Span drivers, lib::Span clusters); - std::map getCounterToEventMap(lib::Span drivers, - lib::Span clusters); + std::map getCounterToEventMap(lib::Span drivers, + lib::Span clusters); void write(const char * path, lib::Span drivers, lib::Span clusters); }; diff --git a/daemon/xml/EventsXMLProcessor.cpp b/daemon/xml/EventsXMLProcessor.cpp index 1c43a3c4..359775c6 100644 --- a/daemon/xml/EventsXMLProcessor.cpp +++ b/daemon/xml/EventsXMLProcessor.cpp @@ -28,6 +28,7 @@ namespace events_xml { const char ATTR_NAME[] = "name"; const char ATTR_TITLE[] = "title"; const char ATTR_UNITS[] = "units"; + const char ATTR_EVENT[] = "event"; const char CLUSTER_VAR[] = "${cluster}"; @@ -456,8 +457,8 @@ namespace events_xml { for (auto event : category.events) { mxml_node_t * eventNode {mxmlNewElement(categoryNode.get(), TAG_EVENT)}; - if (event.eventNumber) { - mxmlElementSetAttrf(eventNode, TAG_EVENT, "0x%x", event.eventNumber.get()); + if (event.eventNumber.isValid()) { + mxmlElementSetAttrf(eventNode, ATTR_EVENT, "0x%" PRIxEventCode, event.eventNumber.asU64()); } if (event.counter) { mxmlElementSetAttr(eventNode, ATTR_COUNTER, event.counter.get().c_str()); diff --git a/daemon/xml/MxmlUtils.cpp b/daemon/xml/MxmlUtils.cpp index caeba6e0..1ded633c 100644 --- a/daemon/xml/MxmlUtils.cpp +++ b/daemon/xml/MxmlUtils.cpp @@ -62,3 +62,26 @@ const char * mxmlWhitespaceCB(mxml_node_t * node, int loc) return nullptr; } + +std::string mxmlSaveAsStdString(mxml_node_t * node, mxml_save_cb_t whiteSpaceCB) +{ + std::string result; + result.resize(8192); + + // Try writing to string data + int length = mxmlSaveString(node, &result.front(), result.size(), whiteSpaceCB); + + if (length < static_cast(result.size()) - 1) { + // The node fits inside the buffer, shrink and return + result.resize(length); + return result; + } + + // Node is too large so change the string size and return that. + result.resize(length + 1); + mxmlSaveString(node, &result.front(), result.size(), whiteSpaceCB); + // mxmlSaveString will replace the last character will null terminator, + // so we need to resize again + result.resize(length); + return result; +} diff --git a/daemon/xml/MxmlUtils.h b/daemon/xml/MxmlUtils.h index fe8d04bd..80e03a44 100644 --- a/daemon/xml/MxmlUtils.h +++ b/daemon/xml/MxmlUtils.h @@ -6,6 +6,7 @@ #include "mxml/mxml.h" #include +#include /** unique_ptr for mxml nodes */ using mxml_unique_ptr = std::unique_ptr; @@ -21,6 +22,17 @@ inline mxml_unique_ptr makeMxmlUniquePtr(mxml_node_t * node) const char * mxmlWhitespaceCB(mxml_node_t * node, int loc); void copyMxmlElementAttrs(mxml_node_t * dest, mxml_node_t * src); +/** + * Save an XML tree to a std::string + * Similar implementation to mxml-file::mxmlSaveAllocString but returns a std::string + * rather than a char* + * + * @param node to write + * @param whiteSpaceCB the callback or MXML_NO_CALLBACK + * @returns the std::string containing the XML + */ +std::string mxmlSaveAsStdString(mxml_node_t * node, mxml_save_cb_t whiteSpaceCB); + /** * Forward iterator that calls mxmlFindElement */ diff --git a/gator_me.py b/gator_me.py index e5d07f2a..519ff7df 100755 --- a/gator_me.py +++ b/gator_me.py @@ -187,11 +187,10 @@ def adb_async(self, *args): if DEBUG_GATORD: stde = sys.stderr process = sp.Popen(commands, universal_newlines=True, - stdin=stde, stdout=stde, encoding="utf-8") + stdin=stde, stdout=stde) else: devn = sp.DEVNULL - process = sp.Popen(commands, stdin=devn, stdout=devn, stderr=devn, - encoding="utf-8") + process = sp.Popen(commands, stdin=devn, stdout=devn, stderr=devn) return process @@ -208,8 +207,7 @@ def adb_quiet(self, *args): commands.extend(args) # Note do not use shell=True; arguments are not safely escaped - ret = sp.run(commands, stdout=sp.DEVNULL, stderr=sp.DEVNULL, - encoding="utf-8") + ret = sp.run(commands, stdout=sp.DEVNULL, stderr=sp.DEVNULL) def adb(self, *args, **kwargs): """ @@ -244,7 +242,7 @@ def adb(self, *args, **kwargs): commands = " ".join(quotedCommands) rep = sp.run(commands, check=True, shell=shell, stdout=sp.PIPE, - stderr=sp.PIPE, universal_newlines=text, encoding="utf-8") + stderr=sp.PIPE, universal_newlines=text) return rep.stdout @@ -665,6 +663,8 @@ def run_gatord_headless(device, package, outputName, timeout): "--wait-process", package, "--stop-on-exit", "yes", "--max-duration", "%u" % timeout, "--output", apcName) + print(" Capture complete, downloading from target") + with tempfile.NamedTemporaryFile() as fileHandle: # Fetch the results by streaming a tar file; we can't "adb pull" # directly for new Android applications due to SELinux policy @@ -783,6 +783,12 @@ def is_a_directory(device, path_to_test): is_directory = device.adb("shell", "if [ -d %s ] ; then echo d ; fi" % path_to_test) return len(is_directory) > 0 +def has_adb(): + """ + Check that the user has adb on PATH + """ + return shutil.which("adb") is not None + def main(): """ Script main function. @@ -827,6 +833,11 @@ def main(): else: shutil.rmtree(args.headless) + # Now check that adb is present + if not has_adb(): + print("ERROR: adb not found. Make sure adb is installed and on your PATH") + return 1 + # Select a specific target device, or fail if we cannot deviceName = get_device_name(args.device, not args.headless) if not deviceName: diff --git a/python/gator.py b/python/gator.py index e3e405bb..8a8ca94d 100644 --- a/python/gator.py +++ b/python/gator.py @@ -260,7 +260,7 @@ def __pack_int(self, n): def run(self, script_or_code): """Equivalent to `runctx(script_or_code, __main__.__dict__, __main__.__dict__)`""" - self.runctx(script_or_code, __main__.__dict__, __main__.__dict__) + self.runctx(script_or_code, dict, dict) def runctx(self, script_or_code, globals = {}, locals = {}): """