log_system_usage.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. #!/usr/bin/python3
  2. import time
  3. import threading
  4. import subprocess
  5. import re
  6. import sys
  7. import os
  8. import pickle
  9. import gzip
  10. import argparse
  11. PROC_STAT_HEADERS = ('user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'steal', 'guest', 'guest_nice')
  12. PROC_PID_STAT_HEADERS = ('pid', 'comm', 'state', 'ppid', 'pgrp', 'session', 'tty_nr', 'tpgid', 'flags', 'minflt',
  13. 'cminflt', 'majflt', 'cmajflt', 'utime', 'stime', 'cutime', 'cstime', 'priority', 'nice',
  14. 'num_threads', 'itrealvalue', 'starttime', 'vsize', 'rss', 'rsslim', 'startcode',
  15. 'endcode', 'startstack', 'kstkesp', 'kstkeip', 'signal', 'blocked', 'sigignore',
  16. 'sigcatch', 'wchan', 'nswap', 'cnswap', 'exit_signal', 'processor', 'rt_priority',
  17. 'policy', 'delayacct_blkio_ticks', 'guest_time', 'cguest_time', 'start_data', 'end_data',
  18. 'start_brk', 'arg_start', 'arg_end', 'env_start', 'env_end', 'exit_code')
  19. pid_stat_regex = re.compile('(\(.*\)|\S+)', flags=re.DOTALL)
  20. def read_proc_stat_file(f):
  21. f.seek(0)
  22. cpu_lines = []
  23. found_cpu_line = False
  24. for line in f:
  25. # only read as much of the file as we need
  26. if len(line) == 0:
  27. continue
  28. #
  29. if line[0:3] == 'cpu':
  30. cpu_lines.append(line)
  31. found_cpu_line = True
  32. elif found_cpu_line:
  33. break
  34. #
  35. #
  36. return cpu_lines
  37. #
  38. def parse_proc_stat_file(cpu_lines):
  39. stats = {'system': None, 'cpus': {}}
  40. for l in cpu_lines:
  41. l_split = l.split()
  42. cpu_index = int(l_split[0][3:]) if len(l_split[0][3:]) != 0 else None
  43. cpu_stats = {x[0]: int(x[1]) for x in zip(PROC_STAT_HEADERS, l_split[1:])}
  44. if cpu_index == None:
  45. stats['system'] = cpu_stats
  46. else:
  47. stats['cpus'][cpu_index] = cpu_stats
  48. #
  49. #
  50. return stats
  51. #
  52. def read_proc_pid_stat_file(f):
  53. f.seek(0)
  54. return f.read()
  55. #
  56. def parse_proc_pid_stat_file(contents):
  57. raw_stats = pid_stat_regex.findall(contents)
  58. proc_stats = {x[0]: x[1] for x in zip(PROC_PID_STAT_HEADERS, raw_stats)}
  59. for k in proc_stats:
  60. if k != 'comm' and k != 'state':
  61. proc_stats[k] = int(proc_stats[k])
  62. #
  63. #
  64. return proc_stats
  65. #
  66. def calculate_cpu_idle_stats(stats):
  67. idle = stats['idle'] + stats['iowait']
  68. non_idle = stats['user'] + stats['nice'] + stats['system'] + stats['irq'] + stats['softirq'] + stats['steal']
  69. return (idle, non_idle)
  70. #
  71. def calculate_core_cpu_usage(initial, current):
  72. """
  73. Calculation adapted from: https://stackoverflow.com/questions/23367857/accurate-calculation-of-cpu-usage-given-in-percentage-in-linux/
  74. """
  75. (initial_idle, initial_non_idle) = calculate_cpu_idle_stats(initial)
  76. initial_total = initial_idle + initial_non_idle
  77. (current_idle, current_non_idle) = calculate_cpu_idle_stats(current)
  78. current_total = current_idle + current_non_idle
  79. clock_ticks = current_non_idle-initial_non_idle
  80. fraction = clock_ticks/(current_total-initial_total)
  81. return (clock_ticks, fraction)
  82. #
  83. def calculate_process_cpu_usage(process_initial, process_current, cpu_initial, cpu_current):
  84. process_initial_non_idle = process_initial['utime'] + process_initial['stime']
  85. process_current_non_idle = process_current['utime'] + process_current['stime']
  86. used_cores = (process_initial['processor'], process_current['processor'])
  87. core_totals = []
  88. for core in used_cores:
  89. (initial_idle, initial_non_idle) = calculate_cpu_idle_stats(cpu_initial[core])
  90. initial_total = initial_idle + initial_non_idle
  91. (current_idle, current_non_idle) = calculate_cpu_idle_stats(cpu_current[core])
  92. current_total = current_idle + current_non_idle
  93. core_totals.append(current_total-initial_total)
  94. #
  95. clock_ticks = process_current_non_idle-process_initial_non_idle
  96. fraction = clock_ticks/(sum(x**2 for x in core_totals)/sum(core_totals))
  97. return (clock_ticks, fraction)
  98. #
  99. def calculate_core_cpu_usage_continuous(stats):
  100. ticks = []
  101. fractions = []
  102. for i in range(len(stats)-1):
  103. (clock_ticks, fraction) = calculate_core_cpu_usage(stats[i], stats[i+1])
  104. ticks.append(clock_ticks)
  105. fractions.append(fraction)
  106. #
  107. return {'ticks': ticks, 'fractions': fractions}
  108. #
  109. def calculate_process_cpu_usage_continuous(process_stats, cpu_stats):
  110. ticks = []
  111. fractions = []
  112. assert all([len(process_stats) == len(cpu_stats[i]) for i in cpu_stats])
  113. for i in range(len(process_stats)-1):
  114. (clock_ticks, fraction) = calculate_process_cpu_usage(process_stats[i], process_stats[i+1], {core: cpu_stats[core][i] for core in cpu_stats}, {core: cpu_stats[core][i+1] for core in cpu_stats})
  115. ticks.append(clock_ticks)
  116. fractions.append(fraction)
  117. #
  118. return {'ticks': ticks, 'fractions': fractions}
  119. #
  120. def log_cpu_stats(path, interval, pids, stop_event):
  121. pids = sorted([int(pid) for pid in pids])
  122. tids = {pid: sorted([int(tid) for tid in os.listdir('/proc/{}/task'.format(pid))]) for pid in pids}
  123. stat_file = open('/proc/stat', 'r')
  124. pid_files = {pid: open('/proc/{}/stat'.format(pid), 'r') for pid in pids}
  125. tid_files = {pid: {tid: open('/proc/{}/task/{}/stat'.format(pid, tid), 'r') for tid in tids[pid]} for pid in pids}
  126. raw_stats = {'timestamps': [],
  127. 'timestamps_finished': [],
  128. 'system': [],
  129. 'process': {x: {'pid': [],
  130. 'tid': {y: [] for y in tid_files[x]}} for x in pid_files}}
  131. # begin collecting data
  132. while not stop_event.is_set():
  133. start_time = time.time()
  134. raw_stats['timestamps'].append(start_time)
  135. t_0 = time.time()
  136. contents = read_proc_stat_file(stat_file)
  137. t_1 = time.time()
  138. raw_stats['system'].append((contents, t_1, t_1-t_0))
  139. for pid in pids:
  140. t_0 = time.time()
  141. contents = read_proc_pid_stat_file(pid_files[pid])
  142. t_1 = time.time()
  143. raw_stats['process'][pid]['pid'].append((contents, t_1, t_1-t_0))
  144. for tid in tids[pid]:
  145. t_0 = time.time()
  146. contents = read_proc_pid_stat_file(tid_files[pid][tid])
  147. t_1 = time.time()
  148. raw_stats['process'][pid]['tid'][tid].append((contents, t_1, t_1-t_0))
  149. #
  150. #
  151. finished_time = time.time()
  152. raw_stats['timestamps_finished'].append(finished_time)
  153. wait_time = max(0, interval-(time.time()-finished_time))
  154. stop_event.wait(wait_time)
  155. #
  156. # begin formatting data
  157. stats = {'timestamps': raw_stats['timestamps'],
  158. 'timestamps_finished': raw_stats['timestamps_finished'],
  159. 'cpu':{'system': [],
  160. 'id': {cpu: [] for cpu in parse_proc_stat_file(raw_stats['system'][0][0])['cpus'].keys()}},
  161. 'process': {pid: {'pid': [],
  162. 'tid': {tid: [] for tid in tids[pid]}} for pid in pids}}
  163. for x in range(len(raw_stats['timestamps'])):
  164. current_stats = parse_proc_stat_file(raw_stats['system'][x][0])
  165. system_stats = current_stats['system']
  166. system_stats['read_time'] = raw_stats['system'][x][1]
  167. system_stats['read_duration'] = raw_stats['system'][x][2]
  168. stats['cpu']['system'].append(system_stats)
  169. for cpu in current_stats['cpus']:
  170. stats['cpu']['id'][cpu].append(current_stats['cpus'][cpu])
  171. #
  172. for pid in pids:
  173. pid_stats = parse_proc_pid_stat_file(raw_stats['process'][pid]['pid'][x][0])
  174. pid_stats['read_time'] = raw_stats['process'][pid]['pid'][x][1]
  175. pid_stats['read_duration'] = raw_stats['process'][pid]['pid'][x][2]
  176. stats['process'][pid]['pid'].append(pid_stats)
  177. for tid in tids[pid]:
  178. tid_stats = parse_proc_pid_stat_file(raw_stats['process'][pid]['tid'][tid][x][0])
  179. tid_stats['read_time'] = raw_stats['process'][pid]['tid'][tid][x][1]
  180. tid_stats['read_duration'] = raw_stats['process'][pid]['tid'][tid][x][2]
  181. stats['process'][pid]['tid'][tid].append(tid_stats)
  182. #
  183. #
  184. #
  185. with gzip.GzipFile(path, 'wb') as f:
  186. pickle.dump(stats, f, protocol=4)
  187. #
  188. #
  189. def load_cpu_stats(path):
  190. with gzip.GzipFile(path, 'rb') as f:
  191. return pickle.load(f)
  192. #
  193. #
  194. if __name__ == '__main__':
  195. stop_event = threading.Event()
  196. parser = argparse.ArgumentParser(description='Log CPU usage data and save as a gzipped pickle file.',
  197. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  198. parser.add_argument('--interval', type=float, default=1, help='interval for data collection')
  199. parser.add_argument('--pids', type=str, help='comma-separated list of processes to log')
  200. parser.add_argument('file_out', metavar='file-out', type=str, help='where to save the data')
  201. args = parser.parse_args()
  202. if args.pids != None:
  203. pids = [int(pid) for pid in args.pids.split(',')]
  204. else:
  205. pids = []
  206. #
  207. t = threading.Thread(target=log_cpu_stats, args=(args.file_out, args.interval, pids, stop_event))
  208. t.start()
  209. try:
  210. while t.is_alive():
  211. t.join(timeout=100)
  212. #
  213. except KeyboardInterrupt:
  214. stop_event.set()
  215. print()
  216. #
  217. t.join()
  218. #