includes.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. #!/usr/bin/python
  2. # Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info.
  3. """This script looks through all the directories for files matching *.c or
  4. *.h, and checks their #include directives to make sure that only "permitted"
  5. headers are included.
  6. Any #include directives with angle brackets (like #include <stdio.h>) are
  7. ignored -- only directives with quotes (like #include "foo.h") are
  8. considered.
  9. To decide what includes are permitted, this script looks at a .may_include
  10. file in each directory. This file contains empty lines, #-prefixed
  11. comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
  12. for files that are permitted.
  13. """
  14. from __future__ import print_function
  15. import fnmatch
  16. import os
  17. import re
  18. import sys
  19. if sys.version_info[0] <= 2:
  20. def open_file(fname):
  21. return open(fname, 'r')
  22. else:
  23. def open_file(fname):
  24. return open(fname, 'r', encoding='utf-8')
  25. def warn(msg):
  26. print(msg, file=sys.stderr)
  27. def fname_is_c(fname):
  28. """ Return true iff 'fname' is the name of a file that we should
  29. search for possibly disallowed #include directives. """
  30. return fname.endswith(".h") or fname.endswith(".c")
  31. INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
  32. RULES_FNAME = ".may_include"
  33. ALLOWED_PATTERNS = [
  34. re.compile(r'^.*\*\.(h|inc)$'),
  35. re.compile(r'^.*/.*\.h$'),
  36. re.compile(r'^ext/.*\.c$'),
  37. re.compile(r'^orconfig.h$'),
  38. re.compile(r'^micro-revision.i$'),
  39. ]
  40. def pattern_is_normal(s):
  41. for p in ALLOWED_PATTERNS:
  42. if p.match(s):
  43. return True
  44. return False
  45. class Error(object):
  46. def __init__(self, location, msg):
  47. self.location = location
  48. self.msg = msg
  49. def __str__(self):
  50. return "{} at {}".format(self.msg, self.location)
  51. class Rules(object):
  52. """ A 'Rules' object is the parsed version of a .may_include file. """
  53. def __init__(self, dirpath):
  54. self.dirpath = dirpath
  55. if dirpath.startswith("src/"):
  56. self.incpath = dirpath[4:]
  57. else:
  58. self.incpath = dirpath
  59. self.patterns = []
  60. self.usedPatterns = set()
  61. def addPattern(self, pattern):
  62. if not pattern_is_normal(pattern):
  63. warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
  64. self.patterns.append(pattern)
  65. def includeOk(self, path):
  66. for pattern in self.patterns:
  67. if fnmatch.fnmatchcase(path, pattern):
  68. self.usedPatterns.add(pattern)
  69. return True
  70. return False
  71. def applyToLines(self, lines, loc_prefix=""):
  72. lineno = 0
  73. for line in lines:
  74. lineno += 1
  75. m = INCLUDE_PATTERN.match(line)
  76. if m:
  77. include = m.group(1)
  78. if not self.includeOk(include):
  79. yield Error("{}{}".format(loc_prefix,str(lineno)),
  80. "Forbidden include of {}".format(include))
  81. def applyToFile(self, fname):
  82. with open_file(fname) as f:
  83. #print(fname)
  84. for error in self.applyToLines(iter(f), "{}:".format(fname)):
  85. yield error
  86. def noteUnusedRules(self):
  87. for p in self.patterns:
  88. if p not in self.usedPatterns:
  89. warn("Pattern {} in {} was never used.".format(p, self.dirpath))
  90. def getAllowedDirectories(self):
  91. allowed = []
  92. for p in self.patterns:
  93. m = re.match(r'^(.*)/\*\.(h|inc)$', p)
  94. if m:
  95. allowed.append(m.group(1))
  96. continue
  97. m = re.match(r'^(.*)/[^/]*$', p)
  98. if m:
  99. allowed.append(m.group(1))
  100. continue
  101. return allowed
  102. include_rules_cache = {}
  103. def load_include_rules(fname):
  104. """ Read a rules file from 'fname', and return it as a Rules object.
  105. Return 'None' if fname does not exist.
  106. """
  107. if fname in include_rules_cache:
  108. return include_rules_cache[fname]
  109. if not os.path.exists(fname):
  110. include_rules_cache[fname] = None
  111. return None
  112. result = Rules(os.path.split(fname)[0])
  113. with open_file(fname) as f:
  114. for line in f:
  115. line = line.strip()
  116. if line.startswith("#") or not line:
  117. continue
  118. result.addPattern(line)
  119. include_rules_cache[fname] = result
  120. return result
  121. def get_all_include_rules():
  122. """Return a list of all the Rules objects we have loaded so far,
  123. sorted by their directory names."""
  124. return [ rules for (fname,rules) in
  125. sorted(include_rules_cache.items())
  126. if rules is not None ]
  127. def remove_self_edges(graph):
  128. """Takes a directed graph in as an adjacency mapping (a mapping from
  129. node to a list of the nodes to which it connects).
  130. Remove all edges from a node to itself."""
  131. for k in list(graph):
  132. graph[k] = [ d for d in graph[k] if d != k ]
  133. def toposort(graph, limit=100):
  134. """Takes a directed graph in as an adjacency mapping (a mapping from
  135. node to a list of the nodes to which it connects). Tries to
  136. perform a topological sort on the graph, arranging the nodes into
  137. "levels", such that every member of each level is only reachable
  138. by members of later levels.
  139. Returns a list of the members of each level.
  140. Modifies the input graph, removing every member that could be
  141. sorted. If the graph does not become empty, then it contains a
  142. cycle.
  143. "limit" is the max depth of the graph after which we give up trying
  144. to sort it and conclude we have a cycle.
  145. """
  146. all_levels = []
  147. n = 0
  148. while graph:
  149. n += 0
  150. cur_level = []
  151. all_levels.append(cur_level)
  152. for k in list(graph):
  153. graph[k] = [ d for d in graph[k] if d in graph ]
  154. if graph[k] == []:
  155. cur_level.append(k)
  156. for k in cur_level:
  157. del graph[k]
  158. n += 1
  159. if n > limit:
  160. break
  161. return all_levels
  162. def consider_include_rules(fname):
  163. dirpath = os.path.split(fname)[0]
  164. rules_fname = os.path.join(dirpath, RULES_FNAME)
  165. rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
  166. if rules is None:
  167. return
  168. for err in rules.applyToFile(fname):
  169. yield err
  170. list_unused = False
  171. log_sorted_levels = False
  172. def walk_c_files(topdir="src"):
  173. """Run through all c and h files under topdir, looking for
  174. include-rule violations. Yield those violations."""
  175. for dirpath, dirnames, fnames in os.walk(topdir):
  176. for fname in fnames:
  177. if fname_is_c(fname):
  178. fullpath = os.path.join(dirpath,fname)
  179. for err in consider_include_rules(fullpath):
  180. yield err
  181. def run_check_includes(topdir, list_unused=False, log_sorted_levels=False):
  182. trouble = False
  183. for err in walk_c_files(topdir):
  184. print(err, file=sys.stderr)
  185. trouble = True
  186. if trouble:
  187. err(
  188. """To change which includes are allowed in a C file, edit the {}
  189. files in its enclosing directory.""".format(RULES_FNAME))
  190. sys.exit(1)
  191. if list_unused:
  192. for rules in get_all_include_rules():
  193. rules.noteUnusedRules()
  194. uses_dirs = { }
  195. for rules in get_all_include_rules():
  196. uses_dirs[rules.incpath] = rules.getAllowedDirectories()
  197. remove_self_edges(uses_dirs)
  198. all_levels = toposort(uses_dirs)
  199. if log_sorted_levels:
  200. for (n, cur_level) in enumerate(all_levels):
  201. if cur_level:
  202. print(n, cur_level)
  203. if uses_dirs:
  204. print("There are circular .may_include dependencies in here somewhere:",
  205. uses_dirs)
  206. sys.exit(1)
  207. def main(argv):
  208. import argparse
  209. progname = argv[0]
  210. parser = argparse.ArgumentParser(prog=progname)
  211. parser.add_argument("--toposort", action="store_true",
  212. help="Print a topologically sorted list of modules")
  213. parser.add_argument("--list-unused", action="store_true",
  214. help="List unused lines in .may_include files.")
  215. parser.add_argument("topdir", default="src", nargs="?",
  216. help="Top-level directory for the tor source")
  217. args = parser.parse_args(argv[1:])
  218. run_check_includes(topdir=args.topdir,
  219. log_sorted_levels=args.toposort,
  220. list_unused=args.list_unused)
  221. if __name__ == '__main__':
  222. main(sys.argv)