Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(PE-38408) Remove expensive Regexes from puppet profiler Java impl #2880

Merged
merged 3 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/clj/puppetlabs/services/jruby/jruby_metrics_core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@
(do
(.update wait-timer
(- (System/currentTimeMillis) (:time ta))
(TimeUnit/MILLISECONDS))
TimeUnit/MILLISECONDS)
(swap! requested-instances dissoc requested-event))
(log/warn (trs "Unable to find request event for borrowed JRuby instance: {0}" event))))

Expand All @@ -281,8 +281,8 @@
(if-let [ta (get @borrowed-instances worker-id)]
(let [elapsed-time (- (System/currentTimeMillis) (:time ta))
per-reason-timer (timer-for-borrow-reason metrics ta)]
(.update borrow-timer elapsed-time (TimeUnit/MILLISECONDS))
(.update per-reason-timer elapsed-time (TimeUnit/MILLISECONDS))
(.update borrow-timer elapsed-time TimeUnit/MILLISECONDS)
(.update per-reason-timer elapsed-time TimeUnit/MILLISECONDS)
(MDC/put "jruby.borrow-time" (Long/toString elapsed-time))
(swap! borrowed-instances dissoc worker-id))
(log/warn (trs "JRuby instance ''{0}'' returned, but no record of when it was borrowed!" worker-id)))))
Expand Down Expand Up @@ -314,7 +314,7 @@
(do
(.update lock-wait-timer
(- (System/currentTimeMillis) (:time lock-request))
(TimeUnit/MILLISECONDS))
TimeUnit/MILLISECONDS)
(swap! lock-requests assoc
lock-request-id
{:state :acquired
Expand All @@ -330,7 +330,7 @@
(do
(.update lock-held-timer
(- (System/currentTimeMillis) (:time lock-request))
(TimeUnit/MILLISECONDS))
TimeUnit/MILLISECONDS)
(swap! lock-requests dissoc lock-request-id))
(log/warn (trs "Lock request ''{0}'' released, but no record of when it was acquired!"
lock-request-id)))
Expand Down
127 changes: 101 additions & 26 deletions src/java/com/puppetlabs/puppetserver/MetricsPuppetProfiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.apache.commons.lang.StringUtils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -21,17 +22,21 @@ public class MetricsPuppetProfiler implements PuppetProfiler {
private final MetricRegistry registry;
private final Set<String> metric_ids;

private static final Pattern FUNCTION_PATTERN = Pattern.compile(".*\\.functions\\.([\\w\\d_]+)$");
private static final Pattern RESOURCE_PATTERN = Pattern.compile(".*\\.compiler\\.evaluate_resource\\.([\\w\\d_]+\\[([\\w\\d_]+::)*[\\w\\d_]+\\])$");
private static final Pattern CATALOG_PATTERN = Pattern.compile(".*\\.compiler\\.(static_compile_postprocessing|static_compile|compile|find_node)$");
private static final Pattern INLINING_PATTERN = Pattern.compile(".*\\.compiler\\.static_compile_inlining\\.(.*)$");
private static final Pattern PUPPETDB_PATTERN = Pattern.compile(".*\\.puppetdb\\.(resource\\.search|facts\\.encode|command\\.submit\\.replace facts|catalog\\.munge|command\\.submit\\.replace catalog|report\\.convert_to_wire_format_hash|command\\.submit\\.store report|query)$");

private final Map<String, Timer> function_timers;
private final Map<String, Timer> resource_timers;
private final Map<String, Timer> catalog_timers;
private final Map<String, Timer> inlining_timers;
private final Map<String, Timer> puppetdb_timers;

public MetricsPuppetProfiler(String hostname, MetricRegistry registry) {
this.hostname = hostname;
this.registry = registry;
this.metric_ids = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
this.function_timers = new ConcurrentHashMap<String, Timer>();
this.resource_timers = new ConcurrentHashMap<String, Timer>();
this.catalog_timers = new ConcurrentHashMap<String, Timer>();
this.inlining_timers = new ConcurrentHashMap<String, Timer>();
this.puppetdb_timers = new ConcurrentHashMap<String, Timer>();
}

@Override
Expand All @@ -43,9 +48,12 @@ public Object start(String message, String[] metric_id) {
public void finish(Object context, String message, String[] metric_id) {
if (shouldTime(metric_id)) {
Long elapsed = System.currentTimeMillis() - (Long)context;
for (Timer t : getTimers(metric_id)) {
Map<String, Timer> metricsByID = getOrCreateTimersByIDs(metric_id);
for (Timer t : metricsByID.values()) {
t.update(elapsed, TimeUnit.MILLISECONDS);
}

updateMetricsTrackers(metric_id, metricsByID);
}
}

Expand All @@ -54,29 +62,107 @@ public Set<String> getAllMetricIds() {
}

public Map<String, Timer> getFunctionTimers() {
return getTimers(FUNCTION_PATTERN);
return this.function_timers;
}

public Map<String, Timer> getResourceTimers() {
return getTimers(RESOURCE_PATTERN);
return this.resource_timers;
}

public Map<String, Timer> getCatalogTimers() {
return getTimers(CATALOG_PATTERN);
return this.catalog_timers;
}

public Map<String, Timer> getInliningTimers() {
return getTimers(INLINING_PATTERN);
return this.inlining_timers;
}

public Map<String, Timer> getPuppetDBTimers() {
return getTimers(PUPPETDB_PATTERN);
return this.puppetdb_timers;
}

@Override
public void shutdown() {
}

private List<String> sliceOfArrayToList(String[] idSegments, int lengthOfID) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cant see how subList does not work for this?

Seems like for example the getMetricName could be called with the result of a metricId.subList(0,2) as the result of subList I think would be mutable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We start with a String[] which doesn't have the subList() method defined, but we can turn an array into a List with Arrays.asList().

I originally tried Arrays.asList(ids).subList(0,2). But the problem is asList() returns a fixed sized List and subList() simply returns a view of the original fixed size list. So we end up erring when getMetricName attempts to prepend the hostname to the list here: https://github.com/puppetlabs/puppetserver/pull/2880/files#diff-300731d432bcbd4678957c962c2aa20d068698fff7a4441434c3d1ac5e55fdfdR196

I should probably clean up getMetricName() but I didn't want to lead with a larger refactor than was strictly necessary (at least after I had changed getTimers)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, i see, i misread that parameter type.

// Callers expect a mutable List returned, but Arrays.asList() returns a
// fix length array, which is why we have to create a List and then add to it.
List<String> idList = new ArrayList<String>();
idList.addAll(Arrays.asList(Arrays.copyOf(idSegments, lengthOfID)));

return idList;
}

private String safeGet(String[] collection, int i) {
try {
return collection[i];
} catch (IndexOutOfBoundsException _ex) {
return "";
}
}

private void updateMetricsTrackers(String[] metricId, Map<String, Timer> metricsByID) {
String firstElement = safeGet(metricId, 0);
String secondElement = safeGet(metricId, 1);

if ("functions".equals(firstElement)) {
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 2)));
this.function_timers.put(secondElement, metric);

} else if ("compiler".equals(firstElement)) {
String thirdElemet = safeGet(metricId, 2);

if ("evaluate_resource".equals(secondElement)) {
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 3)));
this.resource_timers.put(thirdElemet, metric);

} else if ("static_compile_inlining".equals(secondElement)) {
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 3)));
this.inlining_timers.put(thirdElemet, metric);

} else {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous regex based selection explicitly grabbed these submetrics: static_compile_postprocessing, static_compile, compile, and find_node. This else will grab all metrics that aren't evaluate_resource or static_compile_inlining, don't know if that is a good thing or a bad thing. Thoughts @Sharpie ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean these more specific metrics will no longer be counted in catalog timers? Does that matter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They will still be there, but we'll report a lot more now. I don't know if that will be helpful or just noise. Here are the ones I can easily find with grep:

init_server_facts
find_facts
iterate_on_generators
evaluate_definitions
evaluate_collections
validate_final
finish_catalog
validate_pre_finish
evaluate_generators
evaluate_node_classes
evaluate_ast_node
evaluate_main
create_settings_scope
set_node_params

Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 2)));
this.catalog_timers.put(secondElement, metric);
}

} else if ("puppetdb".equals(firstElement)) {
if ("query".equals(secondElement)) {
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 2)));
this.puppetdb_timers.put(secondElement, metric);

} else {
String thirdElemet = safeGet(metricId, 2);

if (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its probably overkill, but i wonder if it would be worth comparing metrics from two reference workflows. Matching the regex to this logic is hurting my brain. It may not matter too much though... These are hard to reason about 🫠

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex matching definitely hurt my brain as well! I got it wrong a couple of times. I can run the acceptance test and collect what metrics are reported before and after this change. I've ran it after to make sure they look right (see this comment #2880 (comment)) , but haven't done a full comparison. I'll do that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here are the previous metrics from 7.17.2:

{"service_version":"7.17.2","service_status_version":1,"detail_level":"debug","state":"running","status":{"experimental":{"function-metrics":[{"function":"puppetdb_query","count":1,"mean":369,"aggregate":369},{"function":"lookup","count":2,"mean":45,"aggregate":90}],"resource-metrics":[{"resource":"Class[main]","count":2,"mean":193,"aggregate":386}],"catalog-metrics":[{"metric":"compile","count":2,"mean":886,"aggregate":1772},{"metric":"find_node","count":2,"mean":11,"aggregate":22}],"puppetdb-metrics":[{"metric":"query","count":1,"mean":366,"aggregate":366},{"metric":"resource_search","count":8,"mean":31,"aggregate":248},{"metric":"command_submit_replace_catalog","count":2,"mean":68,"aggregate":136},{"metric":"command_submit_store_report","count":2,"mean":51,"aggregate":102},{"metric":"command_submit_replace_facts","count":2,"mean":43,"aggregate":86},{"metric":"catalog_munge","count":2,"mean":9,"aggregate":18},{"metric":"report_convert_to_wire_format_hash","count":2,"mean":5,"aggregate":10},{"metric":"facts_encode","count":2,"mean":2,"aggregate":4}],"inline-metrics":[]}},"active_alerts":[],"service_name":"puppet-profiler"}

and here are the metrics returned after this pr:

{"service_version":"7.17.3-SNAPSHOT","service_status_version":1,"detail_level":"debug","state":"running","status":{"experimental":{"function-metrics":[{"function":"puppetdb_query","count":1,"mean":411,"aggregate":411},{"function":"lookup","count":2,"mean":46,"aggregate":92}],"resource-metrics":[{"resource":"Node[weakest-byword.delivery.puppetlabs.net]","count":1,"mean":421,"aggregate":421},{"resource":"Class[main]","count":2,"mean":184,"aggregate":368},{"resource":"Node[resource-exporter.test]","count":1,"mean":208,"aggregate":208}],"catalog-metrics":[{"metric":"compile","count":2,"mean":912,"aggregate":1824},{"metric":"evaluate_ast_node","count":2,"mean":321,"aggregate":642},{"metric":"evaluate_main","count":2,"mean":188,"aggregate":376},{"metric":"evaluate_generators","count":2,"mean":115,"aggregate":230},{"metric":"iterate_on_generators","count":3,"mean":75,"aggregate":225},{"metric":"evaluate_collections","count":2,"mean":108,"aggregate":216},{"metric":"create_settings_scope","count":2,"mean":54,"aggregate":108},{"metric":"set_node_params","count":2,"mean":14,"aggregate":28},{"metric":"find_facts","count":2,"mean":7,"aggregate":14},{"metric":"find_node","count":2,"mean":6,"aggregate":12},{"metric":"finish_catalog","count":2,"mean":1,"aggregate":2},{"metric":"init_server_facts","count":1,"mean":0,"aggregate":0},{"metric":"validate_pre_finish","count":2,"mean":0,"aggregate":0},{"metric":"evaluate_node_classes","count":2,"mean":0,"aggregate":0},{"metric":"validate_final","count":2,"mean":0,"aggregate":0},{"metric":"evaluate_definitions","count":3,"mean":0,"aggregate":0}],"puppetdb-metrics":[{"metric":"query","count":1,"mean":409,"aggregate":409},{"metric":"resource_search","count":8,"mean":36,"aggregate":288},{"metric":"facts_find","count":2,"mean":139,"aggregate":278},{"metric":"catalog_save","count":2,"mean":131,"aggregate":262},{"metric":"facts_save","count":2,"mean":91,"aggregate":182},{"metric":"command_submit_replace_catalog","count":2,"mean":67,"aggregate":134},{"metric":"command_submit_replace_facts","count":2,"mean":56,"aggregate":112},{"metric":"report_process","count":2,"mean":50,"aggregate":100},{"metric":"command_submit_store_report","count":2,"mean":39,"aggregate":78},{"metric":"payload_format","count":8,"mean":3,"aggregate":24},{"metric":"facts_encode","count":2,"mean":10,"aggregate":20},{"metric":"catalog_munge","count":2,"mean":10,"aggregate":20},{"metric":"report_convert_to_wire_format_hash","count":2,"mean":4,"aggregate":8}],"inline-metrics":[]}},"active_alerts":[],"service_name":"puppet-profiler"}

These aren't pretty printed, not as an f-u, but because I think putting them pretty printed in a github comment would make the content very hard to compare. I put them into files, piped them through jq, and compared two terminals side by side. I think they look correct.

("resource".equals(secondElement) && "search".equals(thirdElemet)) ||
("payload".equals(secondElement) && "format".equals(thirdElemet)) ||
// Set.of would be preferrable but 7.x still support Java 8, which does not have Set.of
("facts".equals(secondElement) && Arrays.asList("save", "find", "search", "encode").contains(thirdElemet)) ||
("catalog".equals(secondElement) && Arrays.asList("save", "munge").contains(thirdElemet)) ||
("report".equals(secondElement) && Arrays.asList("convert_to_wire_format_hash", "process").contains(thirdElemet))
) {
String key = String.join(".", secondElement, thirdElemet);
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 3)));
this.puppetdb_timers.put(key, metric);

} else if ("command".equals(secondElement) && "submit".equals(thirdElemet)) {
String fourthElement = safeGet(metricId, 3);

if (
"store report".equals(fourthElement) ||
"replace facts".equals(fourthElement) ||
"replace catalog".equals(fourthElement)
) {
String key = String.join(".", secondElement, thirdElemet, fourthElement);
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 4)));
this.puppetdb_timers.put(key, metric);
}
}
}
}
}

private boolean shouldTime(String[] metric_id) {
if (metric_id == null) {
return false;
Expand All @@ -90,8 +176,8 @@ private boolean shouldTime(String[] metric_id) {
return true;
}

private List<Timer> getTimers(String[] metric_id) {
List<Timer> timers = new ArrayList<Timer>();
private Map<String, Timer> getOrCreateTimersByIDs(String[] metric_id) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name is not used anywhere else right? Its not important for

?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure i'm following the "or" in the name here. It seems like this just creates a new map and returns it unconditionally. Am I missing something?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name is not used anywhere else right? Its not important for

It's not, the method is private and external callers are calling this method on the MetricRegistry https://www.javadoc.io/doc/io.dropwizard.metrics/metrics-core/4.1.0-rc2/com/codahale/metrics/MetricRegistry.html#getTimers--

I think the two cases of "getTimers()" here where inspired by that method. But, the call to MetricRegister.timer() in this method is actually the place in this class were we create timers (if they don't exist). See https://www.javadoc.io/doc/io.dropwizard.metrics/metrics-core/4.1.0-rc2/com/codahale/metrics/MetricRegistry.html#timer-java.lang.String-

The getOrCreateTimers is in reference to that. A more accurate name would probably be getOrCreateTimersAndReturnThemById , maybe?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks for the reference. I think leaving it as is will be fine!

Map<String, Timer> timers = new HashMap<String, Timer>();
// If this is turns out to be a performance hit, we could cache these in a
// map or something.
for (int i = 0; i < metric_id.length; i++) {
Expand All @@ -101,7 +187,7 @@ private List<Timer> getTimers(String[] metric_id) {
}
String metric_name = getMetricName(current_id);
registerMetricName(metric_name);
timers.add(registry.timer(metric_name));
timers.put(metric_name, registry.timer(metric_name));
}
return timers;
}
Expand All @@ -114,15 +200,4 @@ private String getMetricName(List<String> metric_id) {
private void registerMetricName(String metric_name) {
this.metric_ids.add(metric_name);
}

private Map<String, Timer> getTimers(Pattern pattern) {
Map<String, Timer> rv = new HashMap<>();
for (String metric_id : this.metric_ids) {
Matcher matcher = pattern.matcher(metric_id);
if (matcher.matches()) {
rv.put(matcher.group(1), registry.timer(metric_id));
}
}
return rv;
}
}
Loading