practracker.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. #!/usr/bin/python
  2. """
  3. Best-practices tracker for Tor source code.
  4. Go through the various .c files and collect metrics about them. If the metrics
  5. violate some of our best practices and they are not found in the optional
  6. exceptions file, then log a problem about them.
  7. We currently do metrics about file size, function size and number of includes,
  8. for C source files and headers.
  9. practracker.py should be run with its second argument pointing to the Tor
  10. top-level source directory like this:
  11. $ python3 ./scripts/maint/practracker/practracker.py .
  12. To regenerate the exceptions file so that it allows all current
  13. problems in the Tor source, use the --regen flag:
  14. $ python3 --regen ./scripts/maint/practracker/practracker.py .
  15. """
  16. from __future__ import print_function
  17. import os, sys
  18. import metrics
  19. import util
  20. import problem
  21. import includes
  22. # The filename of the exceptions file (it should be placed in the practracker directory)
  23. EXCEPTIONS_FNAME = "./exceptions.txt"
  24. # Recommended file size
  25. MAX_FILE_SIZE = 3000 # lines
  26. # Recommended function size
  27. MAX_FUNCTION_SIZE = 100 # lines
  28. # Recommended number of #includes
  29. MAX_INCLUDE_COUNT = 50
  30. # Recommended file size for headers
  31. MAX_H_FILE_SIZE = 500
  32. # Recommended include count for headers
  33. MAX_H_INCLUDE_COUNT = 15
  34. # Recommended number of dependency violations
  35. MAX_DEP_VIOLATIONS = 0
  36. # Map from problem type to functions that adjust for tolerance
  37. TOLERANCE_FNS = {
  38. 'include-count': lambda n: int(n*1.1),
  39. 'function-size': lambda n: int(n*1.1),
  40. 'file-size': lambda n: int(n*1.02),
  41. 'dependency-violation': lambda n: (n+2)
  42. }
  43. #######################################################
  44. # The Tor source code topdir
  45. TOR_TOPDIR = None
  46. #######################################################
  47. if sys.version_info[0] <= 2:
  48. def open_file(fname):
  49. return open(fname, 'r')
  50. else:
  51. def open_file(fname):
  52. return open(fname, 'r', encoding='utf-8')
  53. def consider_file_size(fname, f):
  54. """Consider the size of 'f' and yield an FileSizeItem for it.
  55. """
  56. file_size = metrics.get_file_len(f)
  57. yield problem.FileSizeItem(fname, file_size)
  58. def consider_includes(fname, f):
  59. """Consider the #include count in for 'f' and yield an IncludeCountItem
  60. for it.
  61. """
  62. include_count = metrics.get_include_count(f)
  63. yield problem.IncludeCountItem(fname, include_count)
  64. def consider_function_size(fname, f):
  65. """yield a FunctionSizeItem for every function in f.
  66. """
  67. for name, lines in metrics.get_function_lines(f):
  68. canonical_function_name = "%s:%s()" % (fname, name)
  69. yield problem.FunctionSizeItem(canonical_function_name, lines)
  70. def consider_include_violations(fname, real_fname, f):
  71. n = 0
  72. for item in includes.consider_include_rules(real_fname, f):
  73. n += 1
  74. if n:
  75. yield problem.DependencyViolationItem(fname, n)
  76. #######################################################
  77. def consider_all_metrics(files_list):
  78. """Consider metrics for all files, and yield a sequence of problem.Item
  79. object for those issues."""
  80. for fname in files_list:
  81. with open_file(fname) as f:
  82. for item in consider_metrics_for_file(fname, f):
  83. yield item
  84. def consider_metrics_for_file(fname, f):
  85. """
  86. Yield a sequence of problem.Item objects for all of the metrics in
  87. 'f'.
  88. """
  89. real_fname = fname
  90. # Strip the useless part of the path
  91. if fname.startswith(TOR_TOPDIR):
  92. fname = fname[len(TOR_TOPDIR):]
  93. # Get file length
  94. for item in consider_file_size(fname, f):
  95. yield item
  96. # Consider number of #includes
  97. f.seek(0)
  98. for item in consider_includes(fname, f):
  99. yield item
  100. # Get function length
  101. f.seek(0)
  102. for item in consider_function_size(fname, f):
  103. yield item
  104. # Check for "upward" includes
  105. f.seek(0)
  106. for item in consider_include_violations(fname, real_fname, f):
  107. yield item
  108. HEADER="""\
  109. # Welcome to the exceptions file for Tor's best-practices tracker!
  110. #
  111. # Each line of this file represents a single violation of Tor's best
  112. # practices -- typically, a violation that we had before practracker.py
  113. # first existed.
  114. #
  115. # There are three kinds of problems that we recognize right now:
  116. # function-size -- a function of more than {MAX_FUNCTION_SIZE} lines.
  117. # file-size -- a .c file of more than {MAX_FILE_SIZE} lines, or a .h
  118. # file with more than {MAX_H_FILE_SIZE} lines.
  119. # include-count -- a .c file with more than {MAX_INCLUDE_COUNT} #includes,
  120. or a .h file with more than {MAX_H_INCLUDE_COUNT} #includes.
  121. # dependency-violation -- a file includes a header that it should
  122. # not, according to an advisory .may_include file.
  123. #
  124. # Each line below represents a single exception that practracker should
  125. # _ignore_. Each line has four parts:
  126. # 1. The word "problem".
  127. # 2. The kind of problem.
  128. # 3. The location of the problem: either a filename, or a
  129. # filename:functionname pair.
  130. # 4. The magnitude of the problem to ignore.
  131. #
  132. # So for example, consider this line:
  133. # problem file-size /src/core/or/connection_or.c 3200
  134. #
  135. # It tells practracker to allow the mentioned file to be up to 3200 lines
  136. # long, even though ordinarily it would warn about any file with more than
  137. # {MAX_FILE_SIZE} lines.
  138. #
  139. # You can either edit this file by hand, or regenerate it completely by
  140. # running `make practracker-regen`.
  141. #
  142. # Remember: It is better to fix the problem than to add a new exception!
  143. """.format(**globals())
  144. def main(argv):
  145. import argparse
  146. progname = argv[0]
  147. parser = argparse.ArgumentParser(prog=progname)
  148. parser.add_argument("--regen", action="store_true",
  149. help="Regenerate the exceptions file")
  150. parser.add_argument("--list-overbroad", action="store_true",
  151. help="List over-broad exceptions")
  152. parser.add_argument("--exceptions",
  153. help="Override the location for the exceptions file")
  154. parser.add_argument("--strict", action="store_true",
  155. help="Make all warnings into errors")
  156. parser.add_argument("--terse", action="store_true",
  157. help="Do not emit helpful instructions.")
  158. parser.add_argument("--max-h-file-size", default=MAX_H_FILE_SIZE,
  159. help="Maximum lines per .h file")
  160. parser.add_argument("--max-h-include-count", default=MAX_H_INCLUDE_COUNT,
  161. help="Maximum includes per .h file")
  162. parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
  163. help="Maximum lines per .c file")
  164. parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
  165. help="Maximum includes per .c file")
  166. parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
  167. help="Maximum lines per function")
  168. parser.add_argument("--max-dependency-violations", default=MAX_DEP_VIOLATIONS,
  169. help="Maximum number of dependency violations to allow")
  170. parser.add_argument("--include-dir", action="append",
  171. default=["src"],
  172. help="A directory (under topdir) to search for source")
  173. parser.add_argument("topdir", default=".", nargs="?",
  174. help="Top-level directory for the tor source")
  175. args = parser.parse_args(argv[1:])
  176. global TOR_TOPDIR
  177. TOR_TOPDIR = args.topdir
  178. if args.exceptions:
  179. exceptions_file = args.exceptions
  180. else:
  181. exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
  182. # 0) Configure our thresholds of "what is a problem actually"
  183. filt = problem.ProblemFilter()
  184. filt.addThreshold(problem.FileSizeItem("*.c", int(args.max_file_size)))
  185. filt.addThreshold(problem.IncludeCountItem("*.c", int(args.max_include_count)))
  186. filt.addThreshold(problem.FileSizeItem("*.h", int(args.max_h_file_size)))
  187. filt.addThreshold(problem.IncludeCountItem("*.h", int(args.max_h_include_count)))
  188. filt.addThreshold(problem.FunctionSizeItem("*.c", int(args.max_function_size)))
  189. filt.addThreshold(problem.DependencyViolationItem("*.c", int(args.max_dependency_violations)))
  190. filt.addThreshold(problem.DependencyViolationItem("*.h", int(args.max_dependency_violations)))
  191. # 1) Get all the .c files we care about
  192. files_list = util.get_tor_c_files(TOR_TOPDIR, args.include_dir)
  193. # 2) Initialize problem vault and load an optional exceptions file so that
  194. # we don't warn about the past
  195. if args.regen:
  196. tmpname = exceptions_file + ".tmp"
  197. tmpfile = open(tmpname, "w")
  198. problem_file = tmpfile
  199. problem_file.write(HEADER)
  200. ProblemVault = problem.ProblemVault()
  201. else:
  202. ProblemVault = problem.ProblemVault(exceptions_file)
  203. problem_file = sys.stdout
  204. # 2.1) Adjust the exceptions so that we warn only about small problems,
  205. # and produce errors on big ones.
  206. if not (args.regen or args.list_overbroad or args.strict):
  207. ProblemVault.set_tolerances(TOLERANCE_FNS)
  208. # 3) Go through all the files and report problems if they are not exceptions
  209. found_new_issues = 0
  210. for item in filt.filter(consider_all_metrics(files_list)):
  211. status = ProblemVault.register_problem(item)
  212. if status == problem.STATUS_ERR:
  213. print(item, file=problem_file)
  214. found_new_issues += 1
  215. elif status == problem.STATUS_WARN:
  216. # warnings always go to stdout.
  217. print("(warning) {}".format(item))
  218. if args.regen:
  219. tmpfile.close()
  220. os.rename(tmpname, exceptions_file)
  221. sys.exit(0)
  222. # If new issues were found, try to give out some advice to the developer on how to resolve it.
  223. if found_new_issues and not args.regen and not args.terse:
  224. new_issues_str = """\
  225. FAILURE: practracker found {} new problem(s) in the code: see warnings above.
  226. Please fix the problems if you can, and update the exceptions file
  227. ({}) if you can't.
  228. See doc/HACKING/HelpfulTools.md for more information on using practracker.\
  229. You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
  230. variable.
  231. """.format(found_new_issues, exceptions_file)
  232. print(new_issues_str)
  233. if args.list_overbroad:
  234. def k_fn(tup):
  235. return tup[0].key()
  236. for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
  237. if p is None:
  238. print(ex, "->", 0)
  239. else:
  240. print(ex, "->", p.metric_value)
  241. sys.exit(found_new_issues)
  242. if __name__ == '__main__':
  243. if os.environ.get("TOR_DISABLE_PRACTRACKER"):
  244. sys.exit(0)
  245. main(sys.argv)