Browse Source

Merge branch 'tor-github/pr/1177'

David Goulet 4 years ago
parent
commit
c4864de573

+ 1 - 1
Makefile.am

@@ -371,7 +371,7 @@ endif
 
 check-best-practices:
 if USEPYTHON
-	$(PYTHON) $(top_srcdir)/scripts/maint/practracker/practracker.py $(top_srcdir)
+	@$(PYTHON) $(top_srcdir)/scripts/maint/practracker/practracker.py $(top_srcdir)
 endif
 
 practracker-regen:

+ 4 - 0
changes/ticket29746

@@ -0,0 +1,4 @@
+  o Minor bugfixes (best practices tracker):
+    - Fix a few issues in the best-practices script, including tests, tab
+      tolerance, error reporting, and directory-exclusion logic. Fixes bug
+      29746; bugfix on 0.4.1.1-alpha.

+ 6 - 0
changes/ticket30752

@@ -0,0 +1,6 @@
+  o Minor features (best practices tracker):
+    - Give a warning rather than an error when a practracker exception is
+      violated by a small amount; add a --list-overbroad option to
+      practracker that lists exceptions that are stricter than they need to
+      be, and provide an environment variable for disabling
+      practracker. Closes ticekt 30752.

+ 7 - 3
scripts/maint/practracker/metrics.py

@@ -16,7 +16,7 @@ def get_include_count(f):
     """Get number of #include statements in the file"""
     include_count = 0
     for line in f:
-        if re.match(r' *# *include', line):
+        if re.match(r'\s*#\s*include', line):
             include_count += 1
     return include_count
 
@@ -31,6 +31,7 @@ def get_function_lines(f):
                             "DISABLE_GCC_WARNING", "DISABLE_GCC_WARNINGS"}
 
     in_function = False
+    found_openbrace = False
     for lineno, line in enumerate(f):
         if not in_function:
             # find the start of a function
@@ -41,10 +42,13 @@ def get_function_lines(f):
                     continue
                 func_start = lineno
                 in_function = True
-
+        elif not found_openbrace and line.startswith("{"):
+            found_openbrace = True
+            func_start = lineno
         else:
             # Find the end of a function
             if line.startswith("}"):
-                n_lines = lineno - func_start
+                n_lines = lineno - func_start + 1
                 in_function = False
+                found_openbrace = False
                 yield (func_name, n_lines)

+ 80 - 45
scripts/maint/practracker/practracker.py

@@ -36,10 +36,14 @@ MAX_FUNCTION_SIZE = 100 # lines
 # Recommended number of #includes
 MAX_INCLUDE_COUNT = 50
 
-#######################################################
+# Map from problem type to functions that adjust for tolerance
+TOLERANCE_FNS = {
+    'include-count': lambda n: int(n*1.1),
+    'function-size': lambda n: int(n*1.1),
+    'file-size': lambda n: int(n*1.02)
+}
 
-# ProblemVault singleton
-ProblemVault = None
+#######################################################
 
 # The Tor source code topdir
 TOR_TOPDIR = None
@@ -54,71 +58,59 @@ else:
         return open(fname, 'r', encoding='utf-8')
 
 def consider_file_size(fname, f):
-    """Consider file size issues for 'f' and return True if a new issue was found"""
+    """Consider the size of 'f' and yield an FileSizeItem for it.
+    """
     file_size = metrics.get_file_len(f)
-    if file_size > MAX_FILE_SIZE:
-        p = problem.FileSizeProblem(fname, file_size)
-        return ProblemVault.register_problem(p)
-    return False
+    yield problem.FileSizeItem(fname, file_size)
 
 def consider_includes(fname, f):
-    """Consider #include issues for 'f' and return True if a new issue was found"""
+    """Consider the #include count in for 'f' and yield an IncludeCountItem
+        for it.
+    """
     include_count = metrics.get_include_count(f)
 
-    if include_count > MAX_INCLUDE_COUNT:
-        p = problem.IncludeCountProblem(fname, include_count)
-        return ProblemVault.register_problem(p)
-    return False
+    yield problem.IncludeCountItem(fname, include_count)
 
 def consider_function_size(fname, f):
-    """Consider the function sizes for 'f' and return True if a new issue was found"""
-    found_new_issues = False
+    """yield a FunctionSizeItem for every function in f.
+    """
 
     for name, lines in metrics.get_function_lines(f):
-        # Don't worry about functions within our limits
-        if lines <= MAX_FUNCTION_SIZE:
-            continue
-
-        # That's a big function! Issue a problem!
         canonical_function_name = "%s:%s()" % (fname, name)
-        p = problem.FunctionSizeProblem(canonical_function_name, lines)
-        found_new_issues |= ProblemVault.register_problem(p)
-
-    return found_new_issues
+        yield problem.FunctionSizeItem(canonical_function_name, lines)
 
 #######################################################
 
 def consider_all_metrics(files_list):
-    """Consider metrics for all files, and return True if new issues were found"""
-    found_new_issues = False
+    """Consider metrics for all files, and yield a sequence of problem.Item
+       object for those issues."""
     for fname in files_list:
         with open_file(fname) as f:
-            found_new_issues |= consider_metrics_for_file(fname, f)
-    return found_new_issues
+            for item in consider_metrics_for_file(fname, f):
+                yield item
 
 def consider_metrics_for_file(fname, f):
     """
-    Consider the various metrics for file with filename 'fname' and file descriptor 'f'.
-    Return True if we found new issues.
+       Yield a sequence of problem.Item objects for all of the metrics in
+       'f'.
     """
     # Strip the useless part of the path
     if fname.startswith(TOR_TOPDIR):
         fname = fname[len(TOR_TOPDIR):]
 
-    found_new_issues = False
-
     # Get file length
-    found_new_issues |= consider_file_size(fname, f)
+    for item in consider_file_size(fname, f):
+        yield item
 
     # Consider number of #includes
     f.seek(0)
-    found_new_issues |= consider_includes(fname, f)
+    for item in consider_includes(fname, f):
+        yield item
 
     # Get function length
     f.seek(0)
-    found_new_issues |= consider_function_size(fname, f)
-
-    return found_new_issues
+    for item in consider_function_size(fname, f):
+        yield item
 
 HEADER="""\
 # Welcome to the exceptions file for Tor's best-practices tracker!
@@ -161,8 +153,20 @@ def main(argv):
     parser = argparse.ArgumentParser(prog=progname)
     parser.add_argument("--regen", action="store_true",
                         help="Regenerate the exceptions file")
+    parser.add_argument("--list-overbroad", action="store_true",
+                        help="List over-strict exceptions")
     parser.add_argument("--exceptions",
                         help="Override the location for the exceptions file")
+    parser.add_argument("--strict", action="store_true",
+                        help="Make all warnings into errors")
+    parser.add_argument("--terse", action="store_true",
+                        help="Do not emit helpful instructions.")
+    parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
+                        help="Maximum lines per C file size")
+    parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
+                        help="Maximum includes per C file")
+    parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
+                        help="Maximum lines per function")
     parser.add_argument("topdir", default=".", nargs="?",
                         help="Top-level directory for the tor source")
     args = parser.parse_args(argv[1:])
@@ -174,24 +178,41 @@ def main(argv):
     else:
         exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
 
+    # 0) Configure our thresholds of "what is a problem actually"
+    filt = problem.ProblemFilter()
+    filt.addThreshold(problem.FileSizeItem("*", int(args.max_file_size)))
+    filt.addThreshold(problem.IncludeCountItem("*", int(args.max_include_count)))
+    filt.addThreshold(problem.FunctionSizeItem("*", int(args.max_function_size)))
+
     # 1) Get all the .c files we care about
     files_list = util.get_tor_c_files(TOR_TOPDIR)
 
     # 2) Initialize problem vault and load an optional exceptions file so that
     # we don't warn about the past
-    global ProblemVault
-
     if args.regen:
         tmpname = exceptions_file + ".tmp"
         tmpfile = open(tmpname, "w")
-        sys.stdout = tmpfile
-        sys.stdout.write(HEADER)
+        problem_file = tmpfile
         ProblemVault = problem.ProblemVault()
     else:
         ProblemVault = problem.ProblemVault(exceptions_file)
+        problem_file = sys.stdout
+
+    # 2.1) Adjust the exceptions so that we warn only about small problems,
+    # and produce errors on big ones.
+    if not (args.regen or args.list_overbroad or args.strict):
+        ProblemVault.set_tolerances(TOLERANCE_FNS)
 
     # 3) Go through all the files and report problems if they are not exceptions
-    found_new_issues = consider_all_metrics(files_list)
+    found_new_issues = 0
+    for item in filt.filter(consider_all_metrics(files_list)):
+        status = ProblemVault.register_problem(item)
+        if status == problem.STATUS_ERR:
+            print(item, file=problem_file)
+            found_new_issues += 1
+        elif status == problem.STATUS_WARN:
+            # warnings always go to stdout.
+            print("(warning) {}".format(item))
 
     if args.regen:
         tmpfile.close()
@@ -199,18 +220,32 @@ def main(argv):
         sys.exit(0)
 
     # If new issues were found, try to give out some advice to the developer on how to resolve it.
-    if found_new_issues and not args.regen:
+    if found_new_issues and not args.regen and not args.terse:
         new_issues_str = """\
-FAILURE: practracker found new problems in the code: see warnings above.
+FAILURE: practracker found {} new problem(s) in the code: see warnings above.
 
 Please fix the problems if you can, and update the exceptions file
 ({}) if you can't.
 
 See doc/HACKING/HelpfulTools.md for more information on using practracker.\
-""".format(exceptions_file)
+
+You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
+variable.
+""".format(found_new_issues, exceptions_file)
         print(new_issues_str)
 
+    if args.list_overbroad:
+        def k_fn(tup):
+            return tup[0].key()
+        for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
+            if p is None:
+                print(ex, "->", 0)
+            else:
+                print(ex, "->", p.metric_value)
+
     sys.exit(found_new_issues)
 
 if __name__ == '__main__':
+    if os.environ.get("TOR_DISABLE_PRACTRACKER"):
+        sys.exit(0)
     main(sys.argv)

+ 15 - 3
scripts/maint/practracker/practracker_tests.py

@@ -1,3 +1,5 @@
+#!/usr/bin/python
+
 """Some simple tests for practracker metrics"""
 
 import unittest
@@ -38,13 +40,23 @@ class TestFunctionLength(unittest.TestCase):
     def test_function_length(self):
         funcs = StringIO.StringIO(function_file)
         # All functions should have length 2
-        for name, lines in metrics.function_lines(funcs):
+        for name, lines in metrics.get_function_lines(funcs):
             self.assertEqual(name, "fun")
 
         funcs.seek(0)
 
-        for name, lines in metrics.function_lines(funcs):
-            self.assertEqual(lines, 2)
+        for name, lines in metrics.get_function_lines(funcs):
+            self.assertEqual(lines, 4)
+
+class TestIncludeCount(unittest.TestCase):
+    def test_include_count(self):
+        f = StringIO.StringIO("""
+  #   include <abc.h>
+  #   include "def.h"
+#include "ghi.h"
+\t#\t include "jkl.h"
+""")
+        self.assertEqual(metrics.get_include_count(f),4)
 
 if __name__ == '__main__':
     unittest.main()

+ 82 - 24
scripts/maint/practracker/problem.py

@@ -13,6 +13,10 @@ import os.path
 import re
 import sys
 
+STATUS_ERR = 2
+STATUS_WARN = 1
+STATUS_OK = 0
+
 class ProblemVault(object):
     """
     Singleton where we store the various new problems we
@@ -22,6 +26,9 @@ class ProblemVault(object):
     def __init__(self, exception_fname=None):
         # Exception dictionary: { problem.key() : Problem object }
         self.exceptions = {}
+        # Exception dictionary: maps key to the problem it was used to
+        # suppress.
+        self.used_exception_for = {}
 
         if exception_fname == None:
             return
@@ -57,42 +64,90 @@ class ProblemVault(object):
 
     def register_problem(self, problem):
         """
-        Register this problem to the problem value. Return True if it was a new
-        problem or it worsens an already existing problem.
+        Register this problem to the problem value. Return true if it was a new
+        problem or it worsens an already existing problem.  A true
+        value may be STATUS_ERR to indicate a hard violation, or STATUS_WARN
+        to indicate a warning.
         """
         # This is a new problem, print it
         if problem.key() not in self.exceptions:
-            print(problem)
-            return True
+            return STATUS_ERR
 
         # If it's an old problem, we don't warn if the situation got better
         # (e.g. we went from 4k LoC to 3k LoC), but we do warn if the
         # situation worsened (e.g. we went from 60 includes to 80).
-        if problem.is_worse_than(self.exceptions[problem.key()]):
-            print(problem)
-            return True
+        status = problem.is_worse_than(self.exceptions[problem.key()])
+        if status == STATUS_OK:
+            self.used_exception_for[problem.key()] = problem
 
-        return False
+        return status
 
-class Problem(object):
+    def list_overbroad_exceptions(self):
+        """Return an iterator of tuples containing (ex,prob) where ex is an
+           exceptions in this vault that are stricter than it needs to be, and
+           prob is the worst problem (if any) that it covered.
+        """
+        for k in self.exceptions:
+            e = self.exceptions[k]
+            p = self.used_exception_for.get(k)
+            if p is None or e.is_worse_than(p):
+                yield (e, p)
+
+    def set_tolerances(self, fns):
+        """Adjust the tolerances for the exceptions in this vault.  Takes
+           a map of problem type to a function that adjusts the permitted
+           function to its new maximum value."""
+        for k in self.exceptions:
+            ex = self.exceptions[k]
+            fn = fns.get(ex.problem_type)
+            if fn is not None:
+                ex.metric_value = fn(ex.metric_value)
+
+class ProblemFilter(object):
+    def __init__(self):
+        self.thresholds = dict()
+
+    def addThreshold(self, item):
+        self.thresholds[item.get_type()] = item
+
+    def matches(self, item):
+        filt = self.thresholds.get(item.get_type(), None)
+        if filt is None:
+            return False
+        return item.is_worse_than(filt)
+
+    def filter(self, sequence):
+        for item in iter(sequence):
+            if self.matches(item):
+                yield item
+
+class Item(object):
     """
-    A generic problem in our source code. See the subclasses below for the
-    specific problems we are trying to tackle.
+    A generic measurement about some aspect of our source code. See
+    the subclasses below for the specific problems we are trying to tackle.
     """
     def __init__(self, problem_type, problem_location, metric_value):
         self.problem_location = problem_location
         self.metric_value = int(metric_value)
+        self.warning_threshold = self.metric_value
         self.problem_type = problem_type
 
     def is_worse_than(self, other_problem):
-        """Return True if this is a worse problem than other_problem"""
+        """Return STATUS_ERR if this is a worse problem than other_problem.
+           Return STATUS_WARN if it is a little worse, but falls within the
+           warning threshold.  Return STATUS_OK if this problem is not
+           at all worse than other_problem.
+        """
         if self.metric_value > other_problem.metric_value:
-            return True
-        return False
+            return STATUS_ERR
+        elif self.metric_value > other_problem.warning_threshold:
+            return STATUS_WARN
+        else:
+            return STATUS_OK
 
     def key(self):
         """Generate a unique key that describes this problem that can be used as a dictionary key"""
-        # Problem location is a filesystem path, so we need to normalize this
+        # Item location is a filesystem path, so we need to normalize this
         # across platforms otherwise same paths are not gonna match.
         canonical_location = os.path.normcase(self.problem_location)
         return "%s:%s" % (canonical_location, self.problem_type)
@@ -100,7 +155,10 @@ class Problem(object):
     def __str__(self):
         return "problem %s %s %s" % (self.problem_type, self.problem_location, self.metric_value)
 
-class FileSizeProblem(Problem):
+    def get_type(self):
+        return self.problem_type
+
+class FileSizeItem(Item):
     """
     Denotes a problem with the size of a .c file.
 
@@ -108,9 +166,9 @@ class FileSizeProblem(Problem):
     'metric_value' is the number of lines in the .c file.
     """
     def __init__(self, problem_location, metric_value):
-        super(FileSizeProblem, self).__init__("file-size", problem_location, metric_value)
+        super(FileSizeItem, self).__init__("file-size", problem_location, metric_value)
 
-class IncludeCountProblem(Problem):
+class IncludeCountItem(Item):
     """
     Denotes a problem with the number of #includes in a .c file.
 
@@ -118,9 +176,9 @@ class IncludeCountProblem(Problem):
     'metric_value' is the number of #includes in the .c file.
     """
     def __init__(self, problem_location, metric_value):
-        super(IncludeCountProblem, self).__init__("include-count", problem_location, metric_value)
+        super(IncludeCountItem, self).__init__("include-count", problem_location, metric_value)
 
-class FunctionSizeProblem(Problem):
+class FunctionSizeItem(Item):
     """
     Denotes a problem with a size of a function in a .c file.
 
@@ -131,7 +189,7 @@ class FunctionSizeProblem(Problem):
     The 'metric_value' is the size of the offending function in lines.
     """
     def __init__(self, problem_location, metric_value):
-        super(FunctionSizeProblem, self).__init__("function-size", problem_location, metric_value)
+        super(FunctionSizeItem, self).__init__("function-size", problem_location, metric_value)
 
 comment_re = re.compile(r'#.*$')
 
@@ -149,10 +207,10 @@ def get_old_problem_from_exception_str(exception_str):
         raise ValueError("Misformatted line {!r}".format(orig_str))
 
     if problem_type == "file-size":
-        return FileSizeProblem(problem_location, metric_value)
+        return FileSizeItem(problem_location, metric_value)
     elif problem_type == "include-count":
-        return IncludeCountProblem(problem_location, metric_value)
+        return IncludeCountItem(problem_location, metric_value)
     elif problem_type == "function-size":
-        return FunctionSizeProblem(problem_location, metric_value)
+        return FunctionSizeItem(problem_location, metric_value)
     else:
         raise ValueError("Unknown exception type {!r}".format(orig_str))

+ 50 - 0
scripts/maint/practracker/test_practracker.sh

@@ -0,0 +1,50 @@
+#!/bin/sh
+
+umask 077
+
+TMPDIR=""
+clean () {
+  if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then
+    rm -rf "$TMPDIR"
+  fi
+}
+trap clean EXIT HUP INT TERM
+
+if test "${PRACTRACKER_DIR}" = "" ||
+        test ! -e "${PRACTRACKER_DIR}/practracker.py" ; then
+    PRACTRACKER_DIR=$(dirname "$0")
+fi
+
+TMPDIR="$(mktemp -d -t pracktracker.test.XXXXXX)"
+if test -z "${TMPDIR}" || test ! -d "${TMPDIR}" ; then
+    echo >&2 "mktemp failed."
+    exit 1;
+fi
+
+DATA="${PRACTRACKER_DIR}/testdata"
+
+run_practracker() {
+    "${PYTHON:-python}" "${PRACTRACKER_DIR}/practracker.py" \
+        --max-include-count=0 --max-file-size=0 --max-function-size=0 --terse \
+        "${DATA}/" "$@";
+}
+
+echo "ex0:"
+
+run_practracker --exceptions "${DATA}/ex0.txt" > "${TMPDIR}/ex0-received.txt"
+
+if cmp "${TMPDIR}/ex0-received.txt" "${DATA}/ex0-expected.txt" ; then
+    echo "  OK"
+else
+    exit 1
+fi
+
+echo "ex1:"
+
+run_practracker --exceptions "${DATA}/ex1.txt" > "${TMPDIR}/ex1-received.txt"
+
+if cmp "${TMPDIR}/ex1-received.txt" "${DATA}/ex1-expected.txt" ;then
+    echo "  OK"
+else
+    exit 1
+fi

+ 38 - 0
scripts/maint/practracker/testdata/a.c

@@ -0,0 +1,38 @@
+
+#include "one.h"
+#include "two.h"
+#incldue "three.h"
+
+# include "four.h"
+
+int
+i_am_a_function(void)
+{
+  call();
+  call();
+  /* comment
+
+     another */
+
+  return 3;
+}
+
+#	include  "five.h"
+
+long
+another_function(long x,
+                 long y)
+{
+  int abcd;
+
+  abcd = x+y;
+  abcd *= abcd;
+
+  /* comment here */
+
+  return abcd +
+    abcd +
+    abcd;
+}
+
+/* And a comment to grow! */

+ 15 - 0
scripts/maint/practracker/testdata/b.c

@@ -0,0 +1,15 @@
+
+MOCK_IMPL(int,
+foo,(void))
+{
+  // blah1
+  return 0;
+}
+
+MOCK_IMPL(int,
+bar,( long z))
+{
+  // blah2
+
+  return (int)(z+2);
+}

+ 0 - 0
scripts/maint/practracker/testdata/ex.txt


+ 7 - 0
scripts/maint/practracker/testdata/ex0-expected.txt

@@ -0,0 +1,7 @@
+problem file-size a.c 38
+problem include-count a.c 4
+problem function-size a.c:i_am_a_function() 9
+problem function-size a.c:another_function() 12
+problem file-size b.c 15
+problem function-size b.c:foo() 4
+problem function-size b.c:bar() 5

+ 0 - 0
scripts/maint/practracker/testdata/ex0.txt


+ 3 - 0
scripts/maint/practracker/testdata/ex1-expected.txt

@@ -0,0 +1,3 @@
+problem function-size a.c:i_am_a_function() 9
+(warning) problem function-size a.c:another_function() 12
+problem function-size b.c:foo() 4

+ 11 - 0
scripts/maint/practracker/testdata/ex1.txt

@@ -0,0 +1,11 @@
+
+problem file-size a.c 38
+problem include-count a.c 4
+# this problem will produce an error
+problem function-size a.c:i_am_a_function() 8
+# this problem will produce a warning
+problem function-size a.c:another_function() 11
+problem file-size b.c 15
+# This is removed, and so will produce an error.
+# problem function-size b.c:foo() 4
+problem function-size b.c:bar() 5

+ 2 - 0
scripts/maint/practracker/testdata/not_c_file

@@ -0,0 +1,2 @@
+
+This isn't a C file, so practracker shouldn't care about it.

+ 10 - 4
scripts/maint/practracker/util.py

@@ -2,15 +2,24 @@ import os
 
 # We don't want to run metrics for unittests, automatically-generated C files,
 # external libraries or git leftovers.
-EXCLUDE_SOURCE_DIRS = {"/src/test/", "/src/trunnel/", "/src/ext/", "/.git/"}
+EXCLUDE_SOURCE_DIRS = {"src/test/", "src/trunnel/", "src/rust/",
+                       "src/ext/", ".git/"}
+
+def _norm(p):
+    return os.path.normcase(os.path.normpath(p))
 
 def get_tor_c_files(tor_topdir):
     """
     Return a list with the .c filenames we want to get metrics of.
     """
     files_list = []
+    exclude_dirs = { _norm(os.path.join(tor_topdir, p)) for p in EXCLUDE_SOURCE_DIRS }
+
 
     for root, directories, filenames in os.walk(tor_topdir):
+        # Remove all the directories that are excluded.
+        directories[:] = [ d for d in directories
+                           if _norm(os.path.join(root,d)) not in exclude_dirs ]
         directories.sort()
         filenames.sort()
         for filename in filenames:
@@ -18,10 +27,7 @@ def get_tor_c_files(tor_topdir):
             if not filename.endswith(".c"):
                 continue
 
-            # Exclude the excluded paths
             full_path = os.path.join(root,filename)
-            if any(os.path.normcase(exclude_dir) in full_path for exclude_dir in EXCLUDE_SOURCE_DIRS):
-                continue
 
             files_list.append(full_path)