From 6e8ffaed6a2271ba78e478c8be4c080168d07c1c Mon Sep 17 00:00:00 2001 From: Jesse Glick Date: Tue, 12 Dec 2017 18:21:45 -0500 Subject: [PATCH 1/2] Make Timeout repeatedly interrupt a stuck thread, and note that this is happening. --- pom.xml | 5 +- .../workflow/support/concurrent/Timeout.java | 64 ++++++++++++++----- .../support/concurrent/TimeoutTest.java | 36 +++++++++++ 3 files changed, 87 insertions(+), 18 deletions(-) diff --git a/pom.xml b/pom.xml index 639d0d7f..0ec54fcd 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ org.jenkins-ci.plugins plugin - 2.33 + 3.2 org.jenkins-ci.plugins.workflow @@ -36,7 +36,7 @@ 2.17-SNAPSHOT hpi Pipeline: Supporting APIs - https://wiki.jenkins-ci.org/display/JENKINS/Pipeline+Supporting+APIs+Plugin + https://wiki.jenkins.io/display/JENKINS/Pipeline+Supporting+APIs+Plugin MIT License @@ -63,6 +63,7 @@ 2.60.2 + 8 false 3.0.5 2.4 diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java b/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java index 1dc41c48..3d774337 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java @@ -25,8 +25,8 @@ package org.jenkinsci.plugins.workflow.support.concurrent; import hudson.FilePath; +import hudson.Util; import hudson.remoting.VirtualChannel; -import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; @@ -41,28 +41,60 @@ public class Timeout implements AutoCloseable { private static final Logger LOGGER = Logger.getLogger(Timeout.class.getName()); - private final ScheduledFuture task; + private final Thread thread; + private volatile boolean completed; + private long endTime; + /* + private final String originalName; + */ - private Timeout(ScheduledFuture task) { - this.task = task; + private Timeout(long time, TimeUnit unit) { + thread = Thread.currentThread(); + LOGGER.log(Level.FINER, "Might interrupt {0} after {1} {2}", new Object[] {thread.getName(), time, unit}); + /* see below: + originalName = thread.getName(); + thread.setName(String.format("%s (Timeout@%h: %s)", originalName, this, Util.getTimeSpanString(unit.toMillis(time)))); + */ + ping(time, unit); } @Override public void close() { - task.cancel(false); + completed = true; + /* + thread.setName(originalName); + */ + LOGGER.log(Level.FINER, "completed {0}", thread.getName()); } - public static Timeout limit(final long time, final TimeUnit unit) { - final Thread thread = Thread.currentThread(); - return new Timeout(Timer.get().schedule(new Runnable() { - @Override public void run() { - if (LOGGER.isLoggable(Level.FINE)) { - Throwable t = new Throwable(); - t.setStackTrace(thread.getStackTrace()); - LOGGER.log(Level.FINE, "Interrupting " + thread + " after " + time + " " + unit, t); - } - thread.interrupt(); + private void ping(final long time, final TimeUnit unit) { + Timer.get().schedule(() -> { + if (completed) { + LOGGER.log(Level.FINER, "{0} already finished, no need to interrupt", thread.getName()); + return; + } + if (LOGGER.isLoggable(Level.FINE)) { + Throwable t = new Throwable(); + t.setStackTrace(thread.getStackTrace()); + LOGGER.log(Level.FINE, "Interrupting " + thread.getName() + " after " + time + " " + unit, t); + } + thread.interrupt(); + if (endTime == 0) { + // First interruption. + endTime = System.nanoTime(); + } else { + // Not dead yet? + String unresponsiveness = Util.getTimeSpanString((System.nanoTime() - endTime) / 1_000_000); + LOGGER.log(Level.INFO, "{0} unresponsive for {1}", new Object[] {thread.getName(), unresponsiveness}); + /* TODO does not work; thread.getName() does not seem to return the current value when called from another thread, even w/ synchronized access, and running with -Xint + thread.setName(thread.getName().replaceFirst(String.format("(Timeout@%h: )[^)]+", this), "$1unresponsive for " + unresponsiveness)); + */ } - }, time, unit)); + ping(5, TimeUnit.SECONDS); + }, time, unit); + } + + public static Timeout limit(final long time, final TimeUnit unit) { + return new Timeout(time, unit); } // TODO JENKINS-32986 offer a variant that will escalate to Thread.stop diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java index d360df45..2745d2ae 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java @@ -25,10 +25,15 @@ package org.jenkinsci.plugins.workflow.support.concurrent; import java.util.concurrent.TimeUnit; +import java.util.logging.Level; import org.junit.Test; import static org.junit.Assert.*; +import org.junit.Rule; +import org.jvnet.hudson.test.LoggerRule; public class TimeoutTest { + + @Rule public LoggerRule logging = new LoggerRule().record(Timeout.class, Level.FINER); @Test public void passed() throws Exception { try (Timeout timeout = Timeout.limit(5, TimeUnit.SECONDS)) { @@ -47,4 +52,35 @@ public class TimeoutTest { Thread.sleep(1_000); } + @Test public void hung() throws Exception { + /* see disabled code in Timeout: + final AtomicBoolean stop = new AtomicBoolean(); + Thread t = Thread.currentThread(); + Timer.get().submit(() -> { + while (!stop.get()) { + System.err.println(t.getName()); + try { + Thread.sleep(1_000); + } catch (InterruptedException x) { + x.printStackTrace(); + } + } + }); + */ + try (Timeout timeout = Timeout.limit(1, TimeUnit.SECONDS)) { + for (int i = 0; i < 5; i++) { + try /* (WithThreadName naming = new WithThreadName(" cycle #" + i)) */ { + Thread.sleep(10_000); + fail("should have timed out"); + } catch (InterruptedException x) { + // OK + } + } + } + Thread.sleep(6_000); + /* + stop.set(true); + */ + } + } From 0c9d73ddc1a56ad8cf0d870b738e7b17b8cc5a02 Mon Sep 17 00:00:00 2001 From: Jesse Glick Date: Thu, 14 Dec 2017 16:12:19 -0500 Subject: [PATCH 2/2] Switching a different executor service for delivering interruptions, as a support bundle showed that all Timer threads were occupied. --- .../workflow/support/concurrent/Timeout.java | 9 ++++++-- .../support/concurrent/TimeoutTest.java | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java b/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java index 3d774337..e99271bc 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/concurrent/Timeout.java @@ -27,10 +27,13 @@ import hudson.FilePath; import hudson.Util; import hudson.remoting.VirtualChannel; +import hudson.util.DaemonThreadFactory; +import hudson.util.NamingThreadFactory; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; -import jenkins.util.Timer; /** * Allows operations to be limited in execution time. @@ -41,6 +44,8 @@ public class Timeout implements AutoCloseable { private static final Logger LOGGER = Logger.getLogger(Timeout.class.getName()); + private static final ScheduledExecutorService interruptions = Executors.newSingleThreadScheduledExecutor(new NamingThreadFactory(new DaemonThreadFactory(), "Timeout.interruptions")); + private final Thread thread; private volatile boolean completed; private long endTime; @@ -67,7 +72,7 @@ private Timeout(long time, TimeUnit unit) { } private void ping(final long time, final TimeUnit unit) { - Timer.get().schedule(() -> { + interruptions.schedule(() -> { if (completed) { LOGGER.log(Level.FINER, "{0} already finished, no need to interrupt", thread.getName()); return; diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java index 2745d2ae..4ddeea19 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/concurrent/TimeoutTest.java @@ -24,8 +24,13 @@ package org.jenkinsci.plugins.workflow.support.concurrent; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.logging.Level; +import java.util.stream.IntStream; +import jenkins.util.Timer; import org.junit.Test; import static org.junit.Assert.*; import org.junit.Rule; @@ -83,4 +88,22 @@ public class TimeoutTest { */ } + @Test public void starvation() throws Exception { + Map> hangers = new TreeMap<>(); + IntStream.range(0, 15).forEachOrdered(i -> hangers.put(i, Timer.get().submit(() -> { + try (Timeout timeout = Timeout.limit(5, TimeUnit.SECONDS)) { + System.err.println("starting #" + i); + Thread.sleep(Long.MAX_VALUE); + fail("should have timed out"); + } catch (InterruptedException x) { + System.err.println("interrupted #" + i); + } + }))); + for (Map.Entry> hanger : hangers.entrySet()) { + System.err.println("joining #" + hanger.getKey()); + hanger.getValue().get(30, TimeUnit.SECONDS); + System.err.println("joined #" + hanger.getKey()); + } + } + }