includes.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. #!/usr/bin/python
  2. # Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info.
  3. """This script looks through all the directories for files matching *.c or
  4. *.h, and checks their #include directives to make sure that only "permitted"
  5. headers are included.
  6. Any #include directives with angle brackets (like #include <stdio.h>) are
  7. ignored -- only directives with quotes (like #include "foo.h") are
  8. considered.
  9. To decide what includes are permitted, this script looks at a .may_include
  10. file in each directory. This file contains empty lines, #-prefixed
  11. comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
  12. for files that are permitted.
  13. """
  14. from __future__ import print_function
  15. import fnmatch
  16. import os
  17. import re
  18. import sys
  19. if sys.version_info[0] <= 2:
  20. def open_file(fname):
  21. return open(fname, 'r')
  22. else:
  23. def open_file(fname):
  24. return open(fname, 'r', encoding='utf-8')
  25. def warn(msg):
  26. print(msg, file=sys.stderr)
  27. def fname_is_c(fname):
  28. """ Return true iff 'fname' is the name of a file that we should
  29. search for possibly disallowed #include directives. """
  30. return fname.endswith(".h") or fname.endswith(".c")
  31. INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
  32. RULES_FNAME = ".may_include"
  33. ALLOWED_PATTERNS = [
  34. re.compile(r'^.*\*\.(h|inc)$'),
  35. re.compile(r'^.*/.*\.h$'),
  36. re.compile(r'^ext/.*\.c$'),
  37. re.compile(r'^orconfig.h$'),
  38. re.compile(r'^micro-revision.i$'),
  39. ]
  40. def pattern_is_normal(s):
  41. for p in ALLOWED_PATTERNS:
  42. if p.match(s):
  43. return True
  44. return False
  45. class Error(object):
  46. def __init__(self, location, msg, is_advisory=False):
  47. self.location = location
  48. self.msg = msg
  49. self.is_advisory = is_advisory
  50. def __str__(self):
  51. return "{} at {}".format(self.msg, self.location)
  52. class Rules(object):
  53. """ A 'Rules' object is the parsed version of a .may_include file. """
  54. def __init__(self, dirpath):
  55. self.dirpath = dirpath
  56. if dirpath.startswith("src/"):
  57. self.incpath = dirpath[4:]
  58. else:
  59. self.incpath = dirpath
  60. self.patterns = []
  61. self.usedPatterns = set()
  62. self.is_advisory = False
  63. def addPattern(self, pattern):
  64. if pattern == "!advisory":
  65. self.is_advisory = True
  66. return
  67. if not pattern_is_normal(pattern):
  68. warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
  69. self.patterns.append(pattern)
  70. def includeOk(self, path):
  71. for pattern in self.patterns:
  72. if fnmatch.fnmatchcase(path, pattern):
  73. self.usedPatterns.add(pattern)
  74. return True
  75. return False
  76. def applyToLines(self, lines, loc_prefix=""):
  77. lineno = 0
  78. for line in lines:
  79. lineno += 1
  80. m = INCLUDE_PATTERN.match(line)
  81. if m:
  82. include = m.group(1)
  83. if not self.includeOk(include):
  84. yield Error("{}{}".format(loc_prefix,str(lineno)),
  85. "Forbidden include of {}".format(include),
  86. is_advisory=self.is_advisory)
  87. def applyToFile(self, fname, f):
  88. for error in self.applyToLines(iter(f), "{}:".format(fname)):
  89. yield error
  90. def noteUnusedRules(self):
  91. for p in self.patterns:
  92. if p not in self.usedPatterns:
  93. warn("Pattern {} in {} was never used.".format(p, self.dirpath))
  94. def getAllowedDirectories(self):
  95. allowed = []
  96. for p in self.patterns:
  97. m = re.match(r'^(.*)/\*\.(h|inc)$', p)
  98. if m:
  99. allowed.append(m.group(1))
  100. continue
  101. m = re.match(r'^(.*)/[^/]*$', p)
  102. if m:
  103. allowed.append(m.group(1))
  104. continue
  105. return allowed
  106. include_rules_cache = {}
  107. def load_include_rules(fname):
  108. """ Read a rules file from 'fname', and return it as a Rules object.
  109. Return 'None' if fname does not exist.
  110. """
  111. if fname in include_rules_cache:
  112. return include_rules_cache[fname]
  113. if not os.path.exists(fname):
  114. include_rules_cache[fname] = None
  115. return None
  116. result = Rules(os.path.split(fname)[0])
  117. with open_file(fname) as f:
  118. for line in f:
  119. line = line.strip()
  120. if line.startswith("#") or not line:
  121. continue
  122. result.addPattern(line)
  123. include_rules_cache[fname] = result
  124. return result
  125. def get_all_include_rules():
  126. """Return a list of all the Rules objects we have loaded so far,
  127. sorted by their directory names."""
  128. return [ rules for (fname,rules) in
  129. sorted(include_rules_cache.items())
  130. if rules is not None ]
  131. def remove_self_edges(graph):
  132. """Takes a directed graph in as an adjacency mapping (a mapping from
  133. node to a list of the nodes to which it connects).
  134. Remove all edges from a node to itself."""
  135. for k in list(graph):
  136. graph[k] = [ d for d in graph[k] if d != k ]
  137. def toposort(graph, limit=100):
  138. """Takes a directed graph in as an adjacency mapping (a mapping from
  139. node to a list of the nodes to which it connects). Tries to
  140. perform a topological sort on the graph, arranging the nodes into
  141. "levels", such that every member of each level is only reachable
  142. by members of later levels.
  143. Returns a list of the members of each level.
  144. Modifies the input graph, removing every member that could be
  145. sorted. If the graph does not become empty, then it contains a
  146. cycle.
  147. "limit" is the max depth of the graph after which we give up trying
  148. to sort it and conclude we have a cycle.
  149. """
  150. all_levels = []
  151. n = 0
  152. while graph:
  153. n += 0
  154. cur_level = []
  155. all_levels.append(cur_level)
  156. for k in list(graph):
  157. graph[k] = [ d for d in graph[k] if d in graph ]
  158. if graph[k] == []:
  159. cur_level.append(k)
  160. for k in cur_level:
  161. del graph[k]
  162. n += 1
  163. if n > limit:
  164. break
  165. return all_levels
  166. def consider_include_rules(fname, f):
  167. dirpath = os.path.split(fname)[0]
  168. rules_fname = os.path.join(dirpath, RULES_FNAME)
  169. rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
  170. if rules is None:
  171. return
  172. for err in rules.applyToFile(fname, f):
  173. yield err
  174. list_unused = False
  175. log_sorted_levels = False
  176. def walk_c_files(topdir="src"):
  177. """Run through all c and h files under topdir, looking for
  178. include-rule violations. Yield those violations."""
  179. for dirpath, dirnames, fnames in os.walk(topdir):
  180. for fname in fnames:
  181. if fname_is_c(fname):
  182. fullpath = os.path.join(dirpath,fname)
  183. with open(fullpath) as f:
  184. for err in consider_include_rules(fullpath, f):
  185. yield err
  186. def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
  187. list_advisories=False):
  188. trouble = False
  189. for err in walk_c_files(topdir):
  190. if err.is_advisory and not list_advisories:
  191. continue
  192. print(err, file=sys.stderr)
  193. if not err.is_advisory:
  194. trouble = True
  195. if trouble:
  196. err(
  197. """To change which includes are allowed in a C file, edit the {}
  198. files in its enclosing directory.""".format(RULES_FNAME))
  199. sys.exit(1)
  200. if list_unused:
  201. for rules in get_all_include_rules():
  202. rules.noteUnusedRules()
  203. uses_dirs = { }
  204. for rules in get_all_include_rules():
  205. uses_dirs[rules.incpath] = rules.getAllowedDirectories()
  206. remove_self_edges(uses_dirs)
  207. all_levels = toposort(uses_dirs)
  208. if log_sorted_levels:
  209. for (n, cur_level) in enumerate(all_levels):
  210. if cur_level:
  211. print(n, cur_level)
  212. if uses_dirs:
  213. print("There are circular .may_include dependencies in here somewhere:",
  214. uses_dirs)
  215. sys.exit(1)
  216. def main(argv):
  217. import argparse
  218. progname = argv[0]
  219. parser = argparse.ArgumentParser(prog=progname)
  220. parser.add_argument("--toposort", action="store_true",
  221. help="Print a topologically sorted list of modules")
  222. parser.add_argument("--list-unused", action="store_true",
  223. help="List unused lines in .may_include files.")
  224. parser.add_argument("--list-advisories", action="store_true",
  225. help="List advisories as well as forbidden includes")
  226. parser.add_argument("topdir", default="src", nargs="?",
  227. help="Top-level directory for the tor source")
  228. args = parser.parse_args(argv[1:])
  229. run_check_includes(topdir=args.topdir,
  230. log_sorted_levels=args.toposort,
  231. list_unused=args.list_unused,
  232. list_advisories=args.list_advisories)
  233. if __name__ == '__main__':
  234. main(sys.argv)