blob: 5c3130095a779ae18bed28d71fe68ca058951501 [file] [log] [blame]
#!/usr/bin/python
"""
Poll server-status on cautotest to watch for RPCs taking longer than 10s. Then
we go and ssh around to figure out what the command line of the process that
caused the RPC was so that one can track down what is generating the expensive
RPC load.
"""
try:
from bs4 import BeautifulSoup
except ImportError:
print 'Run `apt-get install python-bs4`'
raise
import time
import subprocess
import multiprocessing
import common
import requests
def check_cautotest():
page = requests.get('http://cautotest/server-status').text
soup = BeautifulSoup(page)
pids = []
for row in soup.table.findAll('tr'):
cols = [x.text.strip() for x in row.findAll('td')]
if not cols:
continue
if cols[3] == 'W' and int(cols[5]) > 10 and cols[1] != '-':
pids.append((cols[1], cols[3], cols[5]))
return pids
def pull_cautotest_info(proc_id):
try:
conn = subprocess.check_output('become chromeos-test@cautotest -- '
'"sudo lsof -i | grep -e %s | grep -e ESTABLISHED"' % proc_id,
shell=True)
remote_info = conn.split()[8].split('->')[1].split(':')
except Exception:
remote_info = None
return remote_info
def strace_cautotest(proc_id):
try:
straced = subprocess.check_output('become chromeos-test@cautotest -- '
'"sudo strace -s 500 -p %s 2>&1 | head -n 20"' % proc_id,
shell=True)
except subprocess.CalledProcessError:
straced = ""
return straced
def pull_drone_info(host, port):
try:
lsof = subprocess.check_output('become chromeos-test@%s -- '
'"sudo lsof -i | grep -e :%s | grep -e ESTABLISHED"'
% (host, port), shell=True)
proc_id = lsof.split()[1]
cmdline = subprocess.check_output('become chromeos-test@%s -- '
'"cat /proc/%s/cmdline"' % (host,proc_id), shell=True)
except Exception:
cmdline = ''
return cmdline
def pull_all_data(pid, queue):
try:
remote_info = pull_cautotest_info(pid[0])
if remote_info:
drone_info = pull_drone_info(*remote_info)
else:
drone_info = None
straced = strace_cautotest(pid[0])
queue.put((pid, remote_info, drone_info, straced))
except Exception:
queue.put(None)
def print_data(x):
(pid, remote_info, drone_info, straced) = x
print "*** %s stuck in %s for %s secs" % pid
print remote_info
print drone_info
print straced
print '\a'
while True:
queue = multiprocessing.Queue()
processes = []
pids = check_cautotest()
for pid in pids:
proc = multiprocessing.Process(target=pull_all_data, args=(pid, queue))
proc.start()
processes.append(proc)
for proc in processes:
x = queue.get()
if x:
print_data(x)
for proc in processes:
proc.terminate()
time.sleep(5)