66import static org .dcache .pinmanager .model .Pin .State .FAILED_TO_UNPIN ;
77import static org .dcache .pinmanager .model .Pin .State .PINNED ;
88import static org .dcache .pinmanager .model .Pin .State .PINNING ;
9+ import static org .dcache .pinmanager .model .Pin .State .READY_TO_PIN ;
910import static org .dcache .pinmanager .model .Pin .State .READY_TO_UNPIN ;
1011import static org .dcache .pinmanager .model .Pin .State .UNPINNING ;
1112
@@ -35,6 +36,7 @@ public class PinManager implements CellMessageReceiver, LeaderLatchListener, Cel
3536
3637 private static final Logger LOGGER = LoggerFactory .getLogger (PinManager .class );
3738 private static final long INITIAL_EXPIRATION_DELAY = SECONDS .toMillis (15 );
39+ private static final long INITIAL_PIN_DELAY = SECONDS .toMillis (30 );
3840 private static final long INITIAL_UNPIN_DELAY = SECONDS .toMillis (30 );
3941
4042 private ScheduledExecutorService executor ;
@@ -44,7 +46,8 @@ public class PinManager implements CellMessageReceiver, LeaderLatchListener, Cel
4446
4547 private long expirationPeriod ;
4648 private TimeUnit expirationPeriodUnit ;
47- // Period in which to reset all pins that failed to be unpinned from state FAILED_TO_UNPIN to READY_TO_UNPIN
49+
50+ /* Period in which to reset all pins that failed to be unpinned from state FAILED_TO_UNPIN to READY_TO_UNPIN */
4851 private Duration resetFailedUnpinsPeriod ;
4952 private int maxUnpinsPerRun = -1 ;
5053
@@ -94,11 +97,32 @@ public PnfsDeleteEntryNotificationMessage messageArrived(
9497 return message ;
9598 }
9699
100+ /**
101+ * Resets all pins in state PINNING to READY_TO_PIN, because after a PinManager HA master
102+ * change, these are abandoned and need to be retried by the new HA master.
103+ */
104+ private void resetTransitivePinningStates () {
105+ try {
106+ // TODO: cancel these pins in PoolManager!
107+ dao .update (dao .where ()
108+ .state (PINNING ),
109+ dao .set ()
110+ .state (READY_TO_PIN )
111+ .pool (null ));
112+ } catch (JDOException | DataAccessException e ) {
113+ LOGGER .error ("Database failure while trying to reset failed PINNING: {}" ,
114+ e .getMessage ());
115+ } catch (RuntimeException e ) {
116+ LOGGER .error ("Unexpected failure while resetting PINNING pins" , e );
117+ }
118+ }
119+
97120 /**
98121 * Resets all pins in state UNPINNING and FAILED_TO_UNPIN to READY_TO_UNPIN.
99122 */
100- private void markAllExpiredPinsReadyToUnpin () {
123+ private void resetTransitiveUnpinningStates () {
101124 dao .update (dao .where ()
125+ .stateIsNot (READY_TO_PIN )
102126 .stateIsNot (PINNING )
103127 .stateIsNot (PINNED )
104128 .stateIsNot (READY_TO_UNPIN ),
@@ -107,18 +131,19 @@ private void markAllExpiredPinsReadyToUnpin() {
107131 }
108132
109133 /**
110- * This task transitions all pins that have exceeded their lifetime and are in state PINNING or
111- * PINNED to state READY_TO_UNPIN. It removes the pool, which expires pins on its own and does
112- * not need to be contacted for regular expiries. As PoolManager is aware of the timeout for
113- * pins in state PINNING, it should also delete the request on its own if it is still ongoing.
134+ * This task transitions all pins that have exceeded their lifetime and are in state
135+ * READY_TO_PIN, PINNING or PINNED to state READY_TO_UNPIN. It removes the pool field, as the
136+ * pool will remove these sticky bits on its own and does not need to be contacted for regular
137+ * expiries. As PoolManager is aware of the timeout for PINNING requests, PoolManager should
138+ * delete these ongoing request on its own as well.
114139 */
115140 private class ExpirationTask implements Runnable {
116141
117- private AtomicInteger count = new AtomicInteger ();
142+ private final AtomicInteger count = new AtomicInteger ();
118143
119144 @ Override
120145 public void run () {
121- NDC .push ("BackgroundExpiration -" + count .incrementAndGet ());
146+ NDC .push ("PinExpiration -" + count .incrementAndGet ());
122147 try {
123148 dao .update (dao .where ()
124149 .expirationTimeBefore (new Date ())
@@ -145,11 +170,11 @@ public void run() {
145170 */
146171 private class ResetFailedUnpinsTask implements Runnable {
147172
148- private AtomicInteger count = new AtomicInteger ();
173+ private final AtomicInteger count = new AtomicInteger ();
149174
150175 @ Override
151176 public void run () {
152- NDC .push ("BackgroundResetFailedUnpins -" + count .incrementAndGet ());
177+ NDC .push ("UnpinExpiration -" + count .incrementAndGet ());
153178 try {
154179 dao .update (dao .where ()
155180 .state (FAILED_TO_UNPIN ),
@@ -168,30 +193,40 @@ public void run() {
168193 }
169194 }
170195
196+ // create PinProcessor here or inject? dao issue
197+ private PinProcessor pinTask ;
171198 private UnpinProcessor unpinTask ;
172199 private final ExpirationTask expirationTask = new ExpirationTask ();
173200 private final ResetFailedUnpinsTask resetFailedUnpinsTask = new ResetFailedUnpinsTask ();
174201
202+ private ScheduledFuture <?> pinFuture ;
175203 private ScheduledFuture <?> unpinFuture ;
176204 private ScheduledFuture <?> expirationFuture ;
177205 private ScheduledFuture <?> resetFailedUnpinsFuture ;
178206
179207 public void init () {
180208 // Needs to be assigned after dao has been initialized
209+ pinTask = new PinProcessor (dao , poolStub , poolMonitor , maxUnpinsPerRun );
181210 unpinTask = new UnpinProcessor (dao , poolStub , poolMonitor , maxUnpinsPerRun );
182211 }
183212
184213 @ Override
185214 public void isLeader () {
186215 LOGGER .info ("Resetting existing intermediate pin states." );
187- markAllExpiredPinsReadyToUnpin ();
216+ resetTransitivePinningStates ();
217+ resetTransitiveUnpinningStates ();
188218
189219 LOGGER .info ("Scheduling Expiration and Unpin tasks." );
190220 expirationFuture = executor .scheduleWithFixedDelay (
191221 new FireAndForgetTask (expirationTask ),
192222 INITIAL_EXPIRATION_DELAY ,
193223 expirationPeriodUnit .toMillis (expirationPeriod ),
194224 MILLISECONDS );
225+ pinFuture = executor .scheduleWithFixedDelay (
226+ new FireAndForgetTask (pinTask ),
227+ INITIAL_PIN_DELAY ,
228+ expirationPeriodUnit .toMillis (expirationPeriod ),
229+ MILLISECONDS );
195230 unpinFuture = executor .scheduleWithFixedDelay (
196231 new FireAndForgetTask (unpinTask ),
197232 INITIAL_UNPIN_DELAY ,
@@ -208,16 +243,18 @@ public void isLeader() {
208243 public void notLeader () {
209244 LOGGER .info ("Cancelling Expiration, ResetFailedUnpins and Unpin tasks." );
210245 expirationFuture .cancel (false );
246+ pinFuture .cancel (true );
211247 unpinFuture .cancel (true );
212248 resetFailedUnpinsFuture .cancel (true );
213249 }
214250
215251 @ Override
216252 public void getInfo (PrintWriter pw ) {
217- pw .printf ("Expiration and unpin period: %s %s\n " , expirationPeriod ,
253+ pw .printf ("Period for expiration and unpinning: %s %s\n " , expirationPeriod ,
218254 expirationPeriodUnit );
219- pw .printf ("Reset pins that failed to unpin period: %s\n " ,
255+ pw .printf ("Period for pinning: %s %s\n " , expirationPeriod , expirationPeriodUnit );
256+ pw .printf ("Max. unpin operations per run: %s\n " , maxUnpinsPerRun );
257+ pw .printf ("Period for resetting pins that failed to unpin: %s\n " ,
220258 TimeUtils .describe (resetFailedUnpinsPeriod ).orElse ("-" ));
221- pw .printf ("Max unpin operations per run: %s\n " , maxUnpinsPerRun );
222259 }
223260}
0 commit comments