log_system_usage.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. #!/usr/bin/python3
  2. #
  3. import time
  4. import threading
  5. import subprocess
  6. import re
  7. import sys
  8. import os
  9. import pickle
  10. import gzip
  11. #
  12. PROC_STAT_HEADERS = ('user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'steal', 'guest', 'guest_nice')
  13. PROC_PID_STAT_HEADERS = ('pid', 'comm', 'state', 'ppid', 'pgrp', 'session', 'tty_nr', 'tpgid', 'flags', 'minflt',
  14. 'cminflt', 'majflt', 'cmajflt', 'utime', 'stime', 'cutime', 'cstime', 'priority', 'nice',
  15. 'num_threads', 'itrealvalue', 'starttime', 'vsize', 'rss', 'rsslim', 'startcode',
  16. 'endcode', 'startstack', 'kstkesp', 'kstkeip', 'signal', 'blocked', 'sigignore',
  17. 'sigcatch', 'wchan', 'nswap', 'cnswap', 'exit_signal', 'processor', 'rt_priority',
  18. 'policy', 'delayacct_blkio_ticks', 'guest_time', 'cguest_time', 'start_data', 'end_data',
  19. 'start_brk', 'arg_start', 'arg_end', 'env_start', 'env_end', 'exit_code')
  20. #
  21. def get_cpu_stats(path='/proc/stat'):
  22. """
  23. Get CPU statistics from /proc/stat. Output is returned for the system and each CPU.
  24. Ex:
  25. {'system': {'user': 8696397, 'nice': 22431, ...},
  26. 'cpus': {
  27. 0: {'user': 4199206, 'nice': 11615, ...},
  28. 1: {'user': 4199308, 'nice': 10642, ...}
  29. }
  30. }
  31. """
  32. #
  33. with open(path, 'r') as f:
  34. lines = f.readlines()
  35. #
  36. cpu_lines = [l for l in lines if l.startswith('cpu')]
  37. stats = {'system':None, 'cpus':{}}
  38. #
  39. for l in cpu_lines:
  40. l_split = l.split()
  41. cpu_index = l_split[0][3:]
  42. cpu_stats = {x[0]: int(x[1]) for x in zip(PROC_STAT_HEADERS, l_split[1:])}
  43. #
  44. if cpu_index == '':
  45. stats['system'] = cpu_stats
  46. else:
  47. stats['cpus'][int(cpu_index)] = cpu_stats
  48. #
  49. #
  50. return stats
  51. #
  52. def parse_stat_file(path):
  53. with open(path, 'r') as f:
  54. contents = f.read()
  55. #
  56. raw_stats = re.findall("(\(.*\)|\S+)", contents, flags=re.DOTALL)
  57. proc_stats = {x[0]: x[1] for x in zip(PROC_PID_STAT_HEADERS, raw_stats)}
  58. for k in proc_stats:
  59. if k != 'comm' and k != 'state':
  60. proc_stats[k] = int(proc_stats[k])
  61. #
  62. #
  63. return proc_stats
  64. #
  65. def get_proc_stats(pid):
  66. pid = int(pid)
  67. path = os.path.join('/proc', str(pid), 'stat')
  68. #
  69. return parse_stat_file(path)
  70. #
  71. def get_thread_stats(tid):
  72. tid = int(tid)
  73. path = os.path.join('/proc', str(tid), 'task', str(tid), 'stat')
  74. #
  75. return parse_stat_file(path)
  76. #
  77. def calculate_cpu_stats(stats):
  78. idle = stats['idle'] + stats['iowait']
  79. non_idle = stats['user'] + stats['nice'] + stats['system'] + stats['irq'] + stats['softirq'] + stats['steal']
  80. #
  81. return (idle, non_idle)
  82. #
  83. def calculate_cpu_usage(initial, current):
  84. """
  85. Calculation adapted from: https://stackoverflow.com/questions/23367857/accurate-calculation-of-cpu-usage-given-in-percentage-in-linux/
  86. """
  87. #
  88. (initial_idle, initial_non_idle) = calculate_cpu_stats(initial)
  89. initial_total = initial_idle + initial_non_idle
  90. #
  91. (current_idle, current_non_idle) = calculate_cpu_stats(current)
  92. current_total = current_idle + current_non_idle
  93. #
  94. return (current_non_idle-initial_non_idle)/(current_total-initial_total)
  95. #
  96. def calculate_process_cpu_usage(process_initial, process_current, cpu_initial, cpu_current):
  97. (initial_idle, initial_non_idle) = calculate_cpu_stats(cpu_initial)
  98. initial_total = initial_idle + initial_non_idle
  99. #
  100. (current_idle, current_non_idle) = calculate_cpu_stats(cpu_current)
  101. current_total = current_idle + current_non_idle
  102. #
  103. process_initial_non_idle = process_initial['utime'] + process_initial['stime']
  104. process_current_non_idle = process_current['utime'] + process_current['stime']
  105. #
  106. return (process_current_non_idle-process_initial_non_idle)/(current_total-initial_total)
  107. #
  108. def calculate_cpu_usage_continuous(stats):
  109. cpu_usages = []
  110. for i in range(len(stats)-1):
  111. cpu_usages.append(calculate_cpu_usage(stats[i], stats[i+1]))
  112. #
  113. return cpu_usages
  114. #
  115. def calculate_process_cpu_usage_continuous(process_stats, cpu_stats):
  116. process_usages = []
  117. assert all([len(process_stats) == len(cpu_stats[i]) for i in cpu_stats])
  118. for i in range(len(process_stats)-1):
  119. using_core_0 = process_stats[i]['processor']
  120. using_core_1 = process_stats[i+1]['processor']
  121. usage_0 = calculate_process_cpu_usage(process_stats[i], process_stats[i+1], cpu_stats[using_core_0][i], cpu_stats[using_core_0][i+1])
  122. usage_1 = calculate_process_cpu_usage(process_stats[i], process_stats[i+1], cpu_stats[using_core_1][i], cpu_stats[using_core_1][i+1])
  123. process_usages.append((usage_0+usage_1)/2)
  124. #
  125. return process_usages
  126. #
  127. #def get_running_processes():
  128. # lines = subprocess.check_output(['ps', '-a', '-x', '-o', 'pid,state,args', '--no-headers']).decode('utf-8').split('\n')
  129. # lines = [line.strip() for line in lines]
  130. # lines = [line.split(' ', 2) for line in lines if len(line) != 0]
  131. # #
  132. # data = []
  133. # for line in lines:
  134. # data.append({'pid':int(line[0]), 'state':line[1], 'args':line[2]})
  135. # #
  136. # return data
  137. #
  138. def log_cpu_stats(path, interval, pids, stop_event):
  139. """
  140. Log the cpu stats to a gz compressed JSON file. Storing JSON
  141. seems to only use about 10% more disk space than storing
  142. bytes directly (4 bytes per value), so JSON is used for
  143. simplicity.
  144. path: file to save to
  145. interval: how many seconds to wait before getting more data
  146. stop_event: a threading.Event which stops the function
  147. """
  148. #
  149. pids = [int(pid) for pid in pids]
  150. threads = {pid: [int(tid) for tid in os.listdir('/proc/{}/task'.format(pid))] for pid in pids}
  151. stats = {'timestamps':[],
  152. 'cpu':{'system':[],
  153. 'id':{x: [] for x in get_cpu_stats()['cpus'].keys()}},
  154. 'process':{x: {'pid': [],
  155. 'tid': {y: [] for y in threads[x]}} for x in pids}}
  156. #
  157. while not stop_event.is_set():
  158. current_time = time.time()
  159. stats['timestamps'].append(current_time)
  160. #
  161. current_stats = get_cpu_stats()
  162. stats['cpu']['system'].append(current_stats['system'])
  163. for cpu in current_stats['cpus']:
  164. stats['cpu']['id'][cpu].append(current_stats['cpus'][cpu])
  165. #
  166. for pid in pids:
  167. stats['process'][pid]['pid'].append(get_proc_stats(pid))
  168. for tid in threads[pid]:
  169. stats['process'][pid]['tid'][tid].append(get_thread_stats(tid))
  170. #
  171. #
  172. wait_time = max(0, interval-(time.time()-current_time))
  173. stop_event.wait(wait_time)
  174. #
  175. with gzip.GzipFile(path, 'wb') as f:
  176. pickle.dump(stats, f, protocol=4)
  177. #
  178. #
  179. def load_cpu_stats(path):
  180. with gzip.GzipFile(path, 'rb') as f:
  181. return pickle.load(f)
  182. #
  183. #
  184. '''
  185. def log_cpu_stats(path, interval, stop_event):
  186. path: file to save to
  187. interval: how many seconds to wait before getting more data
  188. stop_event: a threading.Event which stops the function
  189. #
  190. with gzip.GzipFile(path+'.2.gz', 'w') as f:
  191. f.write(' '.join(PROC_STAT_HEADERS).encode('utf-8'))
  192. f.write('\n\n'.encode('utf-8'))
  193. #
  194. while not stop_event.is_set():
  195. f.write(str(time.time()).encode('utf-8'))
  196. f.write('\n'.encode('utf-8'))
  197. stats = get_cpu_stats()
  198. f.write('cpu '.encode('utf-8'))
  199. #f.write(' '.join([str(stats['system'][x]) for x in PROC_STAT_HEADERS]).encode('utf-8'))
  200. f.write(b''.join([stats['system'][x].to_bytes(4, byteorder='big') for x in PROC_STAT_HEADERS]))
  201. f.write('\n'.encode('utf-8'))
  202. for cpu in stats['cpus']:
  203. f.write('cpu{} '.format(cpu).encode('utf-8'))
  204. #f.write(' '.join([str(stats['cpus'][cpu][x]) for x in PROC_STAT_HEADERS]).encode('utf-8'))
  205. f.write(b''.join([stats['cpus'][cpu][x].to_bytes(4, byteorder='big') for x in PROC_STAT_HEADERS]))
  206. f.write('\n'.encode('utf-8'))
  207. #
  208. f.write('\n'.encode('utf-8'))
  209. time.sleep(interval)
  210. #
  211. #
  212. #
  213. '''
  214. if __name__ == '__main__':
  215. stop_event = threading.Event()
  216. #
  217. assert len(sys.argv) >= 3
  218. interval = float(sys.argv[1])
  219. file_name = sys.argv[2]
  220. if len(sys.argv) > 3:
  221. pids = sys.argv[3].split(',')
  222. else:
  223. pids = []
  224. #
  225. t = threading.Thread(target=log_cpu_stats, args=(file_name, interval, pids, stop_event))
  226. t.start()
  227. #
  228. try:
  229. while True:
  230. time.sleep(100)
  231. #
  232. except KeyboardInterrupt:
  233. stop_event.set()
  234. print()
  235. #
  236. t.join()
  237. #