includes.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. #!/usr/bin/python
  2. # Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info.
  3. """This script looks through all the directories for files matching *.c or
  4. *.h, and checks their #include directives to make sure that only "permitted"
  5. headers are included.
  6. Any #include directives with angle brackets (like #include <stdio.h>) are
  7. ignored -- only directives with quotes (like #include "foo.h") are
  8. considered.
  9. To decide what includes are permitted, this script looks at a .may_include
  10. file in each directory. This file contains empty lines, #-prefixed
  11. comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
  12. for files that are permitted.
  13. """
  14. from __future__ import print_function
  15. import fnmatch
  16. import os
  17. import re
  18. import sys
  19. if sys.version_info[0] <= 2:
  20. def open_file(fname):
  21. return open(fname, 'r')
  22. else:
  23. def open_file(fname):
  24. return open(fname, 'r', encoding='utf-8')
  25. def warn(msg):
  26. print(msg, file=sys.stderr)
  27. def fname_is_c(fname):
  28. """ Return true iff 'fname' is the name of a file that we should
  29. search for possibly disallowed #include directives. """
  30. return fname.endswith(".h") or fname.endswith(".c")
  31. INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
  32. RULES_FNAME = ".may_include"
  33. ALLOWED_PATTERNS = [
  34. re.compile(r'^.*\*\.(h|inc)$'),
  35. re.compile(r'^.*/.*\.h$'),
  36. re.compile(r'^ext/.*\.c$'),
  37. re.compile(r'^orconfig.h$'),
  38. re.compile(r'^micro-revision.i$'),
  39. ]
  40. def pattern_is_normal(s):
  41. for p in ALLOWED_PATTERNS:
  42. if p.match(s):
  43. return True
  44. return False
  45. class Error(object):
  46. def __init__(self, location, msg, is_advisory=False):
  47. self.location = location
  48. self.msg = msg
  49. self.is_advisory = is_advisory
  50. def __str__(self):
  51. return "{} at {}".format(self.msg, self.location)
  52. class Rules(object):
  53. """ A 'Rules' object is the parsed version of a .may_include file. """
  54. def __init__(self, dirpath):
  55. self.dirpath = dirpath
  56. if dirpath.startswith("src/"):
  57. self.incpath = dirpath[4:]
  58. else:
  59. self.incpath = dirpath
  60. self.patterns = []
  61. self.usedPatterns = set()
  62. self.is_advisory = False
  63. def addPattern(self, pattern):
  64. if pattern == "!advisory":
  65. self.is_advisory = True
  66. return
  67. if not pattern_is_normal(pattern):
  68. warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
  69. self.patterns.append(pattern)
  70. def includeOk(self, path):
  71. for pattern in self.patterns:
  72. if fnmatch.fnmatchcase(path, pattern):
  73. self.usedPatterns.add(pattern)
  74. return True
  75. return False
  76. def applyToLines(self, lines, loc_prefix=""):
  77. lineno = 0
  78. for line in lines:
  79. lineno += 1
  80. m = INCLUDE_PATTERN.match(line)
  81. if m:
  82. include = m.group(1)
  83. if not self.includeOk(include):
  84. yield Error("{}{}".format(loc_prefix,str(lineno)),
  85. "Forbidden include of {}".format(include),
  86. is_advisory=self.is_advisory)
  87. def applyToFile(self, fname):
  88. with open_file(fname) as f:
  89. #print(fname)
  90. for error in self.applyToLines(iter(f), "{}:".format(fname)):
  91. yield error
  92. def noteUnusedRules(self):
  93. for p in self.patterns:
  94. if p not in self.usedPatterns:
  95. warn("Pattern {} in {} was never used.".format(p, self.dirpath))
  96. def getAllowedDirectories(self):
  97. allowed = []
  98. for p in self.patterns:
  99. m = re.match(r'^(.*)/\*\.(h|inc)$', p)
  100. if m:
  101. allowed.append(m.group(1))
  102. continue
  103. m = re.match(r'^(.*)/[^/]*$', p)
  104. if m:
  105. allowed.append(m.group(1))
  106. continue
  107. return allowed
  108. include_rules_cache = {}
  109. def load_include_rules(fname):
  110. """ Read a rules file from 'fname', and return it as a Rules object.
  111. Return 'None' if fname does not exist.
  112. """
  113. if fname in include_rules_cache:
  114. return include_rules_cache[fname]
  115. if not os.path.exists(fname):
  116. include_rules_cache[fname] = None
  117. return None
  118. result = Rules(os.path.split(fname)[0])
  119. with open_file(fname) as f:
  120. for line in f:
  121. line = line.strip()
  122. if line.startswith("#") or not line:
  123. continue
  124. result.addPattern(line)
  125. include_rules_cache[fname] = result
  126. return result
  127. def get_all_include_rules():
  128. """Return a list of all the Rules objects we have loaded so far,
  129. sorted by their directory names."""
  130. return [ rules for (fname,rules) in
  131. sorted(include_rules_cache.items())
  132. if rules is not None ]
  133. def remove_self_edges(graph):
  134. """Takes a directed graph in as an adjacency mapping (a mapping from
  135. node to a list of the nodes to which it connects).
  136. Remove all edges from a node to itself."""
  137. for k in list(graph):
  138. graph[k] = [ d for d in graph[k] if d != k ]
  139. def toposort(graph, limit=100):
  140. """Takes a directed graph in as an adjacency mapping (a mapping from
  141. node to a list of the nodes to which it connects). Tries to
  142. perform a topological sort on the graph, arranging the nodes into
  143. "levels", such that every member of each level is only reachable
  144. by members of later levels.
  145. Returns a list of the members of each level.
  146. Modifies the input graph, removing every member that could be
  147. sorted. If the graph does not become empty, then it contains a
  148. cycle.
  149. "limit" is the max depth of the graph after which we give up trying
  150. to sort it and conclude we have a cycle.
  151. """
  152. all_levels = []
  153. n = 0
  154. while graph:
  155. n += 0
  156. cur_level = []
  157. all_levels.append(cur_level)
  158. for k in list(graph):
  159. graph[k] = [ d for d in graph[k] if d in graph ]
  160. if graph[k] == []:
  161. cur_level.append(k)
  162. for k in cur_level:
  163. del graph[k]
  164. n += 1
  165. if n > limit:
  166. break
  167. return all_levels
  168. def consider_include_rules(fname):
  169. dirpath = os.path.split(fname)[0]
  170. rules_fname = os.path.join(dirpath, RULES_FNAME)
  171. rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
  172. if rules is None:
  173. return
  174. for err in rules.applyToFile(fname):
  175. yield err
  176. list_unused = False
  177. log_sorted_levels = False
  178. def walk_c_files(topdir="src"):
  179. """Run through all c and h files under topdir, looking for
  180. include-rule violations. Yield those violations."""
  181. for dirpath, dirnames, fnames in os.walk(topdir):
  182. for fname in fnames:
  183. if fname_is_c(fname):
  184. fullpath = os.path.join(dirpath,fname)
  185. for err in consider_include_rules(fullpath):
  186. yield err
  187. def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
  188. list_advisories=False):
  189. trouble = False
  190. for err in walk_c_files(topdir):
  191. if err.is_advisory and not list_advisories:
  192. continue
  193. print(err, file=sys.stderr)
  194. if not err.is_advisory:
  195. trouble = True
  196. if trouble:
  197. err(
  198. """To change which includes are allowed in a C file, edit the {}
  199. files in its enclosing directory.""".format(RULES_FNAME))
  200. sys.exit(1)
  201. if list_unused:
  202. for rules in get_all_include_rules():
  203. rules.noteUnusedRules()
  204. uses_dirs = { }
  205. for rules in get_all_include_rules():
  206. uses_dirs[rules.incpath] = rules.getAllowedDirectories()
  207. remove_self_edges(uses_dirs)
  208. all_levels = toposort(uses_dirs)
  209. if log_sorted_levels:
  210. for (n, cur_level) in enumerate(all_levels):
  211. if cur_level:
  212. print(n, cur_level)
  213. if uses_dirs:
  214. print("There are circular .may_include dependencies in here somewhere:",
  215. uses_dirs)
  216. sys.exit(1)
  217. def main(argv):
  218. import argparse
  219. progname = argv[0]
  220. parser = argparse.ArgumentParser(prog=progname)
  221. parser.add_argument("--toposort", action="store_true",
  222. help="Print a topologically sorted list of modules")
  223. parser.add_argument("--list-unused", action="store_true",
  224. help="List unused lines in .may_include files.")
  225. parser.add_argument("--list-advisories", action="store_true",
  226. help="List advisories as well as forbidden includes")
  227. parser.add_argument("topdir", default="src", nargs="?",
  228. help="Top-level directory for the tor source")
  229. args = parser.parse_args(argv[1:])
  230. run_check_includes(topdir=args.topdir,
  231. log_sorted_levels=args.toposort,
  232. list_unused=args.list_unused,
  233. list_advisories=args.list_advisories)
  234. if __name__ == '__main__':
  235. main(sys.argv)