Skip to content

handle oome when TransportContext throws it spawning a thread #4082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,29 +136,51 @@ private void handleShutdownEvent() {
endRequest();
}

private long lastExplicitGcTimestampInMilliseconds;
private void handleIntakeEvent(ReportingEvent event, long sequence, boolean endOfBatch) {
processorEventHandler.onEvent(event, sequence, endOfBatch);
try {
inflightEvents.increment(event.getType());
if (connection == null) {
connection = startRequest(INTAKE_V2_URL);
}
if (connection != null) {
writeEvent(event);
} else {
if (logger.isDebugEnabled()) {
logger.debug("Failed to get APM server connection, dropping event: {}", event);
processorEventHandler.onEvent(event, sequence, endOfBatch);
try {
inflightEvents.increment(event.getType());
if (connection == null) {
connection = startRequest(INTAKE_V2_URL);
}
dropped++;
if (reporter != null) {
inflightEvents.reset(); //we never actually created a request when connection is null
reporter.getReporterMonitor().eventDroppedAfterDequeue(event.getType());
if (connection != null) {
writeEvent(event);
} else {
if (logger.isDebugEnabled()) {
logger.debug("Failed to get APM server connection, dropping event: {}", event);
}
dropped++;
if (reporter != null) {
inflightEvents.reset(); //we never actually created a request when connection is null
reporter.getReporterMonitor().eventDroppedAfterDequeue(event.getType());
}
}
} catch (Exception e) {
handleConnectionError(event, e);
}
} catch (OutOfMemoryError e) {
//sun.security.ssl.TransportContext.finishHandshake spawns a thread on each
//connection attempt, and the thread is just used to notify listeners then dies.
//Hypothesis is that if the GC frequency is very low, the spawned dead threads
//could accumulate and cause native memory exhaustion. Here we'll just catch it
//and trigger a GC but drop the event. The GC should clean up the dead threads
long nowInMilliseconds = System.currentTimeMillis();
//don't call the GC more than once in any 10 second window
if ( (nowInMilliseconds - lastExplicitGcTimestampInMilliseconds) > 10_000L) {
lastExplicitGcTimestampInMilliseconds = nowInMilliseconds;
System.gc();
}
if (logger.isDebugEnabled()) {
logger.error("OutOfMemoryError trying to get APM server connection, called System.gc() and dropping event: {}", event);
}
dropped++;
if (reporter != null) {
inflightEvents.reset(); //there was no request when the connection failed to create
reporter.getReporterMonitor().eventDroppedAfterDequeue(event.getType());
}
} catch (Exception e) {
handleConnectionError(event, e);
}

if (shouldEndRequest()) {
endRequest();
}
Expand Down
Loading