annotate_ifdef_directives 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #!/usr/bin/python
  2. # Copyright (c) 2017-2019, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. # This script iterates over a list of C files. For each file, it looks at the
  5. # #if/#else C macros, and annotates them with comments explaining what they
  6. # match.
  7. #
  8. # For example, it replaces this:
  9. #
  10. # #ifdef HAVE_OCELOT
  11. # // 500 lines of ocelot code
  12. # #endif
  13. #
  14. # with this:
  15. #
  16. # #ifdef HAVE_OCELOT
  17. # // 500 lines of ocelot code
  18. # #endif /* defined(HAVE_OCELOT) */
  19. #
  20. # Note that only #else and #endif lines are annotated. Existing comments
  21. # on those lines are removed.
  22. import re
  23. # Any block with fewer than this many lines does not need annotations.
  24. LINE_OBVIOUSNESS_LIMIT = 4
  25. # Maximum line width. This includes a terminating newline character.
  26. #
  27. # (This is the maximum before encoding, so that if the the operating system
  28. # uses multiple characers to encode newline, that's still okay.)
  29. LINE_WIDTH=80
  30. class Problem(Exception):
  31. pass
  32. def close_parens_needed(expr):
  33. """Return the number of left-parentheses needed to make 'expr'
  34. balanced.
  35. """
  36. return expr.count("(") - expr.count(")")
  37. def truncate_expression(expr, new_width):
  38. """Given a parenthesized C expression in 'expr', try to return a new
  39. expression that is similar to 'expr', but no more than 'new_width'
  40. characters long.
  41. Try to return an expression with balanced parentheses.
  42. """
  43. if len(expr) <= new_width:
  44. # The expression is already short enough.
  45. return expr
  46. ellipsis = "..."
  47. # Start this at the minimum that we might truncate.
  48. n_to_remove = len(expr) + len(ellipsis) - new_width
  49. # Try removing characters, one by one, until we get something where
  50. # re-balancing the parentheses still fits within the limit.
  51. while n_to_remove < len(expr):
  52. truncated = expr[:-n_to_remove] + ellipsis
  53. truncated += ")" * close_parens_needed(truncated)
  54. if len(truncated) <= new_width:
  55. return truncated
  56. n_to_remove += 1
  57. return ellipsis
  58. def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
  59. """
  60. Return fmt%argument, for use as a commented line. If the line would
  61. be longer than maxwidth, truncate argument.
  62. Requires that fmt%"..." will fit into maxwidth characters.
  63. Requires that fmt ends with a newline.
  64. """
  65. assert fmt.endswith("\n")
  66. result = fmt % argument
  67. if len(result) <= maxwidth:
  68. return result
  69. else:
  70. # How long can we let the argument be? Try filling in the
  71. # format with an empty argument to find out.
  72. max_arg_width = maxwidth - len(fmt % "")
  73. result = fmt % truncate_expression(argument, max_arg_width)
  74. assert len(result) <= maxwidth
  75. return result
  76. def negate(expr):
  77. """Return a negated version of expr; try to avoid double-negation.
  78. We usually wrap expressions in parentheses and add a "!".
  79. >>> negate("A && B")
  80. '!(A && B)'
  81. But if we recognize the expression as negated, we can restore it.
  82. >>> negate(negate("A && B"))
  83. 'A && B'
  84. The same applies for defined(FOO).
  85. >>> negate("defined(FOO)")
  86. '!defined(FOO)'
  87. >>> negate(negate("defined(FOO)"))
  88. 'defined(FOO)'
  89. Internal parentheses don't confuse us:
  90. >>> negate("!(FOO) && !(BAR)")
  91. '!(!(FOO) && !(BAR))'
  92. """
  93. expr = expr.strip()
  94. # See whether we match !(...), with no intervening close-parens.
  95. m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
  96. if m:
  97. return m.group(1)
  98. # See whether we match !?defined(...), with no intervening close-parens.
  99. m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
  100. if m:
  101. if m.group(1) == "!":
  102. prefix = ""
  103. else:
  104. prefix = "!"
  105. return prefix + m.group(2)
  106. return "!(%s)" % expr
  107. def uncomment(s):
  108. """
  109. Remove existing trailing comments from an #else or #endif line.
  110. """
  111. s = re.sub(r'//.*','',s)
  112. s = re.sub(r'/\*.*','',s)
  113. return s.strip()
  114. def translate(f_in, f_out):
  115. """
  116. Read a file from f_in, and write its annotated version to f_out.
  117. """
  118. # A stack listing our current if/else state. Each member of the stack
  119. # is a list of directives. Each directive is a 3-tuple of
  120. # (command, rest, lineno)
  121. # where "command" is one of if/ifdef/ifndef/else/elif, and where
  122. # "rest" is an expression in a format suitable for use with #if, and where
  123. # lineno is the line number where the directive occurred.
  124. stack = []
  125. # the stack element corresponding to the top level of the file.
  126. whole_file = []
  127. cur_level = whole_file
  128. lineno = 0
  129. for line in f_in:
  130. lineno += 1
  131. m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
  132. line)
  133. if not m:
  134. # no directive, so we can just write it out.
  135. f_out.write(line)
  136. continue
  137. command,rest = m.groups()
  138. if command in ("if", "ifdef", "ifndef"):
  139. # The #if directive pushes us one level lower on the stack.
  140. if command == 'ifdef':
  141. rest = "defined(%s)"%uncomment(rest)
  142. elif command == 'ifndef':
  143. rest = "!defined(%s)"%uncomment(rest)
  144. elif rest.endswith("\\"):
  145. rest = rest[:-1]+"..."
  146. rest = uncomment(rest)
  147. new_level = [ (command, rest, lineno) ]
  148. stack.append(cur_level)
  149. cur_level = new_level
  150. f_out.write(line)
  151. elif command in ("else", "elif"):
  152. # We stay at the same level on the stack. If we have an #else,
  153. # we comment it.
  154. if len(cur_level) == 0 or cur_level[-1][0] == 'else':
  155. raise Problem("Unexpected #%s on %d"% (command,lineno))
  156. if (len(cur_level) == 1 and command == 'else' and
  157. lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
  158. f_out.write(commented_line("#else /* %s */\n",
  159. negate(cur_level[0][1])))
  160. else:
  161. f_out.write(line)
  162. cur_level.append((command, rest, lineno))
  163. else:
  164. # We pop one element on the stack, and comment an endif.
  165. assert command == 'endif'
  166. if len(stack) == 0:
  167. raise Problem("Unmatched #%s on %s"% (command,lineno))
  168. if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
  169. f_out.write(line)
  170. elif len(cur_level) == 1 or (
  171. len(cur_level) == 2 and cur_level[1][0] == 'else'):
  172. f_out.write(commented_line("#endif /* %s */\n",
  173. cur_level[0][1]))
  174. else:
  175. f_out.write(commented_line("#endif /* %s || ... */\n",
  176. cur_level[0][1]))
  177. cur_level = stack.pop()
  178. if len(stack) or cur_level != whole_file:
  179. raise Problem("Missing #endif")
  180. import sys,os
  181. for fn in sys.argv[1:]:
  182. with open(fn+"_OUT", 'w') as output_file:
  183. translate(open(fn, 'r'), output_file)
  184. os.rename(fn+"_OUT", fn)