Message7469
I recently upgraded from jython 2.5.2 to jython 2.5.3 standalone jars.
After this upgrade, we found that threading is broken in 2.5.3, when a
process tries to join an existing thread.
Logs from 2.5.2:
Oct 1 15:18:27 zre-ldap002 zmconfigd[28096]: Found 3 threads
Oct 1 15:18:27 zre-ldap002 zmconfigd[28096]: Active Thread listener found
Oct 1 15:18:27 zre-ldap002 zmconfigd[28096]: Active Thread Thread found
Oct 1 15:18:27 zre-ldap002 zmconfigd[28096]: Active Thread MainThread found
Oct 1 15:18:27 zre-ldap002 zmconfigd[28096]: Fetching All configs
Logs from 2.5.3:
Oct 1 15:04:06 zre-ldap002 zmconfigd[23938]: Found 3 threads
Oct 1 15:04:06 zre-ldap002 zmconfigd[23938]: Active Thread listener found
Oct 1 15:04:06 zre-ldap002 zmconfigd[23938]: Active Thread Thread-8329
found
Oct 1 15:04:06 zre-ldap002 zmconfigd[23938]: Attempting to join()
Thread-8329
Oct 1 15:04:11 zre-ldap002 zmconfigd[23938]: join() Thread-8329 FAILED
Oct 1 15:04:11 zre-ldap002 zmconfigd[23938]: Hung threads detected (3
total), exiting
This is infinitely repeatable.
Our loop has:
while True:
Log.logMsg (4, "Found %d threads" % threading.activeCount())
for th in threading.enumerate():
Log.logMsg(4, "Active Thread %s found" % th.getName())
if (th.getName() != "listener" and th.getName() != "MainThread" and th.getName() != "Thread" and th.getName != "SIGUSR2 handler"):
Log.logMsg(4, "Attempting to join() %s" % th.getName())
th.join(5);
if (th.isAlive()):
Log.logMsg(1, "join() %s FAILED" % th.getName())
Log.logMsg(1, "Hung threads detected (%d total), exiting" % threading.activeCount());
sys.exit(1)
t1 = time.clock()
try:
# read all the configs
myState.getAllConfigs(myConfig)
except Exception, e:
[Log.logMsg(1,t) for t in traceback.format_tb(sys.exc_info()[2])]
if myState.forced:
Log.logMsg(0, "Key lookup failed.")
Log.logMsg(1, "Sleeping...Key lookup failed (%s)" % (e,))
time.sleep(60)
continue
try:
# read zmconfigd config
myState.getMtaConfig(myConfig.configFile)
# watchdog restarts apps if they are not running
watchdog()
# check for config changes
myState.compareKeys()
Log.logMsg (5, "LOCK myState.lAction requested")
myState.lAction.acquire()
Log.logMsg (5, "LOCK myState.lAction acquired")
myState.compileActions()
myState.requestedconfig = {}
myState.doConfigRewrites()
myState.lAction.notifyAll()
myState.lAction.release()
Log.logMsg (5, "LOCK myState.lAction released")
# executes rewrites/postconf/restarts
if myConfig.restartconfig:
myState.doRestarts()
except Exception, e:
[Log.logMsg(1,t) for t in traceback.format_tb(sys.exc_info()[2])]
if myState.forced and myState.forced < 100:
Log.logMsg(0, "Configuration inconsistency detected (%s)" % (e,))
Log.logMsg(1, "Sleeping...Configuration inconsistency detected (%s)" % (e,));
time.sleep(60)
continue
if myState.forced:
break
# start the listener after we have the lock, or an early request can cause problems
# start the listener after the rewrites, so the start script doesn't return before they're complete
if myState.firstRun and not myState.forced:
requestListener()
Log.logMsg (5, "LOCK myState.lAction released")
myState.firstRun = False
lt = time.clock()-t1
Log.logMsg(4, "Loop completed in %.2f seconds" % (lt,));
Log.logMsg(4, "Sleeping for %d." % (myConfig.interval,));
# Jython won't wake up from time.sleep() when a signal is received and caught.
# Uncaught signals seem to cause the JVM to exit
myState.sleepTimer = myConfig.interval
Log.logMsg (5, "Sleeping for %d" % myState.sleepTimer)
while myState.sleepTimer > 0:
time.sleep(sleepinterval)
myState.sleepTimer -= sleepinterval
Log.logMsg (5, "Waking up")
dt = time.clock()-t0
Log.logMsg(5, "%s completed in %.2f seconds" % (myConfig.progname,dt));
sys.exit(0) |
|
Date |
User |
Action |
Args |
2012-10-02 17:09:23 | mishikal@yahoo.com | set | recipients:
+ mishikal@yahoo.com |
2012-10-02 17:09:22 | mishikal@yahoo.com | set | messageid: <1349197762.96.0.0476433904521.issue1974@psf.upfronthosting.co.za> |
2012-10-02 17:09:22 | mishikal@yahoo.com | link | issue1974 messages |
2012-10-02 17:09:21 | mishikal@yahoo.com | create | |
|