Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added long running commands runtime to log files. #5260

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
2 changes: 2 additions & 0 deletions src/drt/src/TritonRoute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "sta/StaMain.hh"
#include "stt/SteinerTreeBuilder.h"
#include "ta/FlexTA.h"
#include "utl/timer.h"

namespace sta {
// Tcl files encoded into strings.
Expand Down Expand Up @@ -917,6 +918,7 @@ void TritonRoute::sendDesignUpdates(const std::string& globals_path)

int TritonRoute::main()
{
utl::ScopedStatistics stat(logger_);
if (DBPROCESSNODE == "GF14_13M_3Mx_2Cx_4Kx_2Hx_2Gx_LB") {
USENONPREFTRACKS = false;
}
Expand Down
2 changes: 2 additions & 0 deletions src/gpl/src/nesterovPlace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "routeBase.h"
#include "timingBase.h"
#include "utl/Logger.h"
#include "utl/timer.h"

namespace gpl {
using utl::GPL;
Expand All @@ -62,6 +63,7 @@ NesterovPlace::NesterovPlace(const NesterovPlaceVars& npVars,
utl::Logger* log)
: NesterovPlace()
{
utl::ScopedStatistics stat(log);
npVars_ = npVars;
pbc_ = pbc;
nbc_ = nbc;
Expand Down
2 changes: 2 additions & 0 deletions src/grt/src/GlobalRouter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "stt/SteinerTreeBuilder.h"
#include "utl/Logger.h"
#include "utl/algorithms.h"
#include "utl/timer.h"

namespace grt {

Expand Down Expand Up @@ -248,6 +249,7 @@ void GlobalRouter::globalRoute(bool save_guides,
bool start_incremental,
bool end_incremental)
{
utl::ScopedStatistics stat(logger_);
if (start_incremental && end_incremental) {
logger_->error(GRT,
251,
Expand Down
2 changes: 2 additions & 0 deletions src/mpl2/src/rtl_mp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "hier_rtlmp.h"
#include "object.h"
#include "odb/db.h"
#include "utl/timer.h"

namespace mpl2 {
using odb::dbDatabase;
Expand Down Expand Up @@ -94,6 +95,7 @@ bool MacroPlacer2::place(const int num_threads,
const bool bus_planning_on,
const char* report_directory)
{
utl::ScopedStatistics stat(logger_);
hier_rtlmp_->setClusterSize(
max_num_macro, min_num_macro, max_num_inst, min_num_inst);
hier_rtlmp_->setClusterSizeTolerance(tolerance);
Expand Down
2 changes: 2 additions & 0 deletions src/rsz/src/RepairDesign.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "sta/Search.hh"
#include "sta/SearchPred.hh"
#include "sta/Units.hh"
#include "utl/timer.h"

namespace rsz {

Expand Down Expand Up @@ -93,6 +94,7 @@ void RepairDesign::repairDesign(double max_wire_length,
bool verbose)
{
init();
utl::ScopedStatistics stat(logger_);
int repaired_net_count, slew_violations, cap_violations;
int fanout_violations, length_violations;
repairDesign(max_wire_length,
Expand Down
3 changes: 3 additions & 0 deletions src/rsz/src/RepairHold.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "sta/TimingArc.hh"
#include "sta/Units.hh"
#include "utl/Logger.h"
#include "utl/timer.h"

namespace rsz {

Expand Down Expand Up @@ -93,6 +94,7 @@ void RepairHold::repairHold(
const bool verbose)
{
init();
utl::ScopedStatistics stat(logger_);
sta_->checkSlewLimitPreamble();
sta_->checkCapacitanceLimitPreamble();
LibertyCell* buffer_cell = findHoldBuffer();
Expand Down Expand Up @@ -132,6 +134,7 @@ void RepairHold::repairHold(const Pin* end_pin,
const int max_passes)
{
init();
utl::ScopedStatistics stat(logger_);
sta_->checkSlewLimitPreamble();
sta_->checkCapacitanceLimitPreamble();
LibertyCell* buffer_cell = findHoldBuffer();
Expand Down
13 changes: 13 additions & 0 deletions src/utl/include/utl/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

#pragma once

#include <unistd.h>

#include <chrono>
#include <iostream>

Expand Down Expand Up @@ -78,4 +80,15 @@ class DebugScopedTimer : public Timer

std::ostream& operator<<(std::ostream& os, const Timer& t);

class ScopedStatistics : public Timer
{
public:
ScopedStatistics(utl::Logger* logger);
~ScopedStatistics() override;

private:
size_t getPeakMemoryUsage();
utl::Logger* logger_;
};

} // namespace utl
21 changes: 21 additions & 0 deletions src/utl/src/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

#include "utl/timer.h"

#include <sys/resource.h>

namespace utl {

void Timer::reset()
Expand Down Expand Up @@ -74,4 +76,23 @@ DebugScopedTimer::~DebugScopedTimer()
debugPrint(logger_, tool_, group_.c_str(), level_, msg_, *this);
}

ScopedStatistics::ScopedStatistics(utl::Logger* logger)
: Timer(), logger_(logger)
{
}

size_t ScopedStatistics::getPeakMemoryUsage()
{
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

error handling for getrusage missing. There may not be access rights to get this information or otherwise it is not available?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, what I want to know is maximum virtual memory, not maximum resident memory set. The only reason why ORFS is settling for maximum resident memory set is because ps doesn't measure maximum virtual memory.

A discussion on cores and max virtual memory vs. maximum resident memory set.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you can't measure it you can't report it so I'm not sure what is requested here.

I would argue that max virtual memory is not very interesting in 64bit computing. You'll never run out of virtual address space and thrashing is a resident set size issue.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max virtual address space can be measured in C++ easily enough, it just the time command that doesn't support it, which makes it awkward for ORFS to report it.

I disagree that virtual memory size isn't interesting. resident set size will always be smaller than physical memory and and doesn't give any indication that you have too large a working set and that the system could be thrashing. That said, even if max virtual memory is larger than physical memory, doesn't mean that there will be thrashing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is it easily measured?

rss ~= physical memory is a warning sign. vsz > physical memory doesn't say much.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rss says something about what is happening on the computer it is running on, vsz says something about how much memory is required. As an OpenROAD developer, I don't care about my own computer, I only care about how much memory OpenROAD is using. As a user of OpenROAD, I don't care about the computer that is failing, I already know it is failing, I want to know how much memory is needed.

So, I'm interested in knowing vsz, more than rss. I did run tests with rss and it only tells me something about how much memory is used when I'm not overcommitted. If I'm overcommitted, rss doesn't tell me anything as it will be somewhat close to physical memory. It doesn't tell me how much memory I need.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a mac, but when I compile and run this on Ubuntu, I get the output below:

#include <iostream>
#include <unistd.h>

#ifdef __linux__
#include <fstream>
#elif __APPLE__
#include <mach/mach.h>
#endif

void getMemoryUsage(size_t &rss, size_t &vsz) {
#ifdef __linux__
    std::ifstream statm("/proc/self/statm");
    if (!statm.is_open()) {
        std::cerr << "Error opening /proc/self/statm" << std::endl;
        return;
    }

    size_t pageSize = sysconf(_SC_PAGESIZE);
    size_t size, resident, share, text, lib, data, dt;

    statm >> size >> resident >> share >> text >> lib >> data >> dt;

    rss = resident * pageSize; // Convert from pages to bytes
    vsz = size * pageSize;     // Convert from pages to bytes

    statm.close();
#elif __APPLE__
    mach_task_basic_info info;
    mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;

    if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) != KERN_SUCCESS) {
        std::cerr << "Error getting task info" << std::endl;
        return;
    }

    rss = info.resident_size;
    vsz = info.virtual_size;
#endif
}

int main() {
    size_t rss = 0, vsz = 0;
    getMemoryUsage(rss, vsz);
    std::cout << "RSS: " << rss << " bytes" << std::endl;
    std::cout << "VSZ: " << vsz << " bytes" << std::endl;
    return 0;
}

Output:

$ ./a.out
RSS: 3440640 bytes
VSZ: 6377472 bytes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be more convenient to have both RSS and VSZ information?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my part, I think we need both. Over time, we may develop better understanding of how they interact with OpenROAD and decide we only need rsz or rss and why only one is sufficient, but for now I'm definitely missing rsz when analyzing how much more resources I need to give a build server...

return (size_t) (rusage.ru_maxrss * 1024L);
}
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved

ScopedStatistics::~ScopedStatistics()
{
logger_->report("Runtime {} seconds, memory used {} KB.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This output leaves me with questions...

Scope? What took this long and used this much memory. What was the memory usage at start of scope compared to the peak usage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The request to add runtime info is originated from #3539 (comment) , so the scope is going to be clear, however I agree with your note about start memory consumption, so I suggest the following format for the log:
Runtime: 0.000545613 seconds, memory: start - 96123222 KB, peak - 105279488 KB.

Will this work?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't describe the scope, what was run during this time.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I mostly would care to know the peak on top of the starting memory consumption and in megabytes. kilobytes is too small a unit in 2024 for humans...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DRT already outputs in this format

[INFO DRT-0267] cpu time = 00:00:04, elapsed time = 00:00:01, memory = 2197.27 (MB), peak = 4031.35 (MB)

Is there a reason to have another?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That prefix is nice and gives some context, but there should be a human description of the scope of what is being measured. In some cases it would be unmabigious from that prefix, but e.g. detailed routing measures for each iteration, for instance. So some further description of context could be useful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can also add the command name before reporting its runtime to be clear about the scope.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine to add a prefix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The command name before reporting its runtime is already added. But by adding prefix like this [INFO DRT-0267] we should also create a unique message ID for each tool as Logger::info requires it, is this your suggestion?

Timer::elapsed(),
getPeakMemoryUsage());
}

} // namespace utl
1 change: 1 addition & 0 deletions src/utl/test/regression_tests.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ record_tests {
test_error
test_suppress_message
test_metrics
test_statistics
#utl_man_tcl_check
#utl_readme_msgs_check
#test_error_exception
Expand Down
1 change: 1 addition & 0 deletions src/utl/test/test_statistics.ok
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Passed: Statistics are logged.
42 changes: 42 additions & 0 deletions src/utl/test/test_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hook up to CI?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't using pathlib or os a matter of taste ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean with "CI" is continuous integration, I wanted to know if this test is hooked up to the tests run on pull requests in github.

Regarding os vs pathlib, I think pathlib is more recent and a bit more elegant(but a little bit more complex than os) in the Python world but that os is more familiar(I know it better). I have no strong opinion on either for such simple scripts as these. It depends on what my colleagues prefer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is included in the regression, so it's in CI.

import subprocess
import re

def run_regression_script(regression_script_path, argument):
try:
subprocess.run([regression_script_path, argument], check=True)
except subprocess.CalledProcessError as e:
print(f"Error running regression script: {e}")
return False
return True

def check_log_file(log_path):
pattern = re.compile(r"Runtime\s+\d+\.\d+\s+seconds,\s+memory used\s+\d+\s+KB")
runtime_count = 0

with open(log_path, 'r') as log_file:
for line in log_file:
if pattern.search(line):
runtime_count += 1
if runtime_count == 2:
print("Passed: Statistics are logged.")
exit(0)
else:
print("Statistics are not logged enough times.")
exit(1)

def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
base_dir = os.path.abspath(os.path.join(script_dir, "../../rsz/test"))
log_path = os.path.join(base_dir, "results", "repair_design4-tcl.log")
regression_script_path = os.path.join(base_dir, "regression")

if not os.path.exists(log_path):
if not run_regression_script(regression_script_path, "repair_design4"):
exit(1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is a regression test calling regression again? This will create a conflict with the results of running repair_design4 (especially if run in parallel with this test). It is better to make this test self-contained.


check_log_file(log_path)

if __name__ == "__main__":
main()

28 changes: 22 additions & 6 deletions test/regression.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,30 @@ proc run_test_lang { test lang } {
set tmp_file [file join $result_dir $test.tmp]
exec tr -d "\r" < $log_file > $tmp_file
file rename -force $tmp_file $log_file
if [catch [concat exec diff $diff_options $ok_file $log_file \
>> $diff_file]] {
puts " *FAIL*$error_msg"
append_failure $test
incr errors(fail)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this proc is already quite long. Break out a new proc and give the proc a good name so that it is clear what is being tested. Testing for regressions in maximum resident set is bound to have very significant variations...

# Create a temporary filtered log file that ignores lines with the specific pattern
set tmp_log_file [file join $result_dir "${test}_log.tmp"]

set sed_result [catch {exec sed {/Runtime.*seconds.*memory.*used.*KB.*/d} \
$log_file > $tmp_log_file} sed_error]
if {$sed_result != 0} {
puts "Error running sed"
puts "Error message: $sed_error"
incr errors(fail)
} else {
puts " pass$error_msg"
# Proceed with the diff operation
set result [catch {exec diff $diff_options $ok_file $tmp_log_file >> $diff_file}]
if {$result != 0} {
exec cat $diff_file
puts " *FAIL*$error_msg"
append_failure $test
incr errors(fail)
} else {
puts " pass$error_msg"
}
}
# Clean up the temporary filtered log file.
file delete -force $tmp_log_file
} else {
puts " *NO OK FILE*$error_msg"
append_failure $test
Expand Down
Loading