소스 검색

refactoring on extensions filter and DiffJ integration

Welder Luz 4 년 전
부모
커밋
3a6a23b87a

+ 1 - 0
code/szz/build.gradle

@@ -52,6 +52,7 @@ dependencies {
         compile group: 'commons-cli', name: 'commons-cli', version: '1.4'
         compile group: 'info.debatty', name: 'java-string-similarity', version: '1.0.1'
         compile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1'
+        compile group: 'org.incava', name: 'diffj', version: '1.6.4'
 }
 
 task runJar(type: JavaExec) {

+ 36 - 0
code/szz/src/main/java/diff/CompilationUnitExtension.java

@@ -0,0 +1,36 @@
+package diff;
+
+import net.sourceforge.pmd.lang.java.ast.ASTCompilationUnit;
+import org.incava.diffj.compunit.CompilationUnit;
+import org.incava.diffj.compunit.Imports;
+import org.incava.diffj.compunit.Package;
+import org.incava.diffj.element.Differences;
+import org.incava.diffj.type.Types;
+
+public class CompilationUnitExtension extends CompilationUnit {
+    public CompilationUnitExtension(ASTCompilationUnit compUnit) {
+        super(compUnit);
+    }
+
+    public Differences diff(CompilationUnit toCompUnit) {
+        if (toCompUnit == null) {
+            return null;
+        }
+
+        Differences differences = new Differences();
+
+        Package fromPackage = getPackage();
+        Package toPackage = toCompUnit.getPackage();
+        fromPackage.diff(toPackage, differences);
+
+        Imports fromImports = getImports();
+        Imports toImports = toCompUnit.getImports();
+        fromImports.diff(toImports, differences);
+
+        Types fromTypes = getTypes();
+        Types toTypes = toCompUnit.getTypes();
+        fromTypes.diff(toTypes, differences);
+
+        return differences;
+    }
+}

+ 32 - 0
code/szz/src/main/java/diff/JavaFileExtension.java

@@ -0,0 +1,32 @@
+package diff;
+
+import org.incava.diffj.element.Differences;
+import org.incava.diffj.io.JavaFile;
+import org.incava.diffj.lang.DiffJException;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class JavaFileExtension extends JavaFile {
+    public JavaFileExtension(String contents) throws DiffJException {
+        super(null, "label", contents, "1.8");
+    }
+
+    public CompilationUnitExtension compile() throws DiffJException {
+        return new CompilationUnitExtension(super.compile().getAstCompUnit());
+    }
+
+    public Set<Integer> affectedLineNumbers(JavaFileExtension fileToCompare) throws DiffJException {
+        CompilationUnitExtension fromCompUnit = compile();
+        CompilationUnitExtension toCompUnit = fileToCompare.compile();
+        Differences differences = fromCompUnit.diff(toCompUnit);
+        Set<Integer> affectedLines = new HashSet<>();
+        differences.getFileDiffs().forEach(it -> {
+                affectedLines.add(it.getSecondLocation().getStart().getLine());
+                affectedLines.add(it.getSecondLocation().getEnd().getLine());
+            }
+        );
+
+        return affectedLines;
+    }
+}

+ 70 - 17
code/szz/src/main/java/parser/GitParser.java

@@ -25,6 +25,7 @@
 package parser;
 
 import data.Issues;
+import diff.JavaFileExtension;
 import graph.AnnotationMap;
 import graph.FileAnnotationGraph;
 import org.eclipse.jgit.api.BlameCommand;
@@ -32,9 +33,15 @@ import org.eclipse.jgit.api.errors.GitAPIException;
 import org.eclipse.jgit.blame.BlameResult;
 import org.eclipse.jgit.diff.DiffEntry;
 import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.ObjectReader;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.revwalk.RevTree;
+import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.incava.diffj.lang.DiffJException;
 import org.json.simple.JSONObject;
 import org.json.simple.parser.JSONParser;
 import org.json.simple.parser.ParseException;
@@ -46,7 +53,9 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.*;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 
 /**
@@ -57,8 +66,11 @@ import java.util.stream.Collectors;
  */
 public class GitParser {
 
+  private static final List<String> USELESS_FILE_EXTENSIONS = Arrays.asList(".md", ".txt", ".markdown");
+
   private CommitUtil util;
   private Repository repo;
+  private RevWalk revWalk;
   private Issues issues;
 
   private String resultPath;
@@ -84,6 +96,7 @@ public class GitParser {
     builder.addCeilingDirectory(new File(path));
     builder.findGitDir(new File(path));
     this.repo = builder.build();
+    this.revWalk = new RevWalk(repo);
     this.blameCommand = new BlameCommand(this.repo);
 
     this.resultPath = resultPath;
@@ -138,24 +151,14 @@ public class GitParser {
    * @param source the source commit from which the trace should start at.
    */
   private FileAnnotationGraph traceFileChanges(String filePath, Commit source, int step)
-      throws IOException, GitAPIException {
+          throws IOException, GitAPIException {
 
-    if (step == 0) return null;
+    if (step == 0 || !source.diffWithParent.containsKey(filePath)) return null;
 
     /*
      * Save all line numbers for the source commits deletions.
      */
-    List<Integer> delIndexes = null;
-    if (source.diffWithParent.containsKey(filePath))
-      delIndexes =
-          source
-              .diffWithParent
-              .get(filePath)
-              .deletions
-              .stream()
-              .map(s -> parseInt(s[0]))
-              .collect(Collectors.toList());
-    else return null;
+    List<Integer> delIndexes = buildDelIndexes(filePath, source);
 
     FileAnnotationGraph graph = createEmptyGraph(filePath);
     graph.revisions.add(ObjectId.toString(source.commit.toObjectId()));
@@ -170,6 +173,48 @@ public class GitParser {
     return graph;
   }
 
+  private List<Integer> buildDelIndexes(String filePath, Commit source) {
+    List<Integer> delIndexes = source
+            .diffWithParent
+            .get(filePath)
+            .deletions
+            .stream()
+            .map(s -> parseInt(s[0]))
+            .collect(Collectors.toList());
+
+    if(filePath.endsWith(".java")) {
+      Set<Integer> changesFromDiffJ = changesFromDiffJ(filePath, source);
+      delIndexes = delIndexes.stream().filter(changesFromDiffJ::contains).collect(Collectors.toList());
+    }
+
+    return delIndexes;
+  }
+
+  private Set<Integer> changesFromDiffJ(String filePath, Commit source) {
+    try {
+      JavaFileExtension revision = getFileContentAtRevision(filePath, source.commit);
+      JavaFileExtension parentRev = getFileContentAtRevision(filePath, source.commit.getParent(0));
+      // Converting line numbers to indexes.
+      return revision.affectedLineNumbers(parentRev).stream().map(it ->
+              it-1
+      ).collect(Collectors.toSet());
+    } catch (Exception e) {
+      e.printStackTrace();
+      return Collections.emptySet();
+    }
+  }
+
+  private JavaFileExtension getFileContentAtRevision(String filePath, RevCommit revision) throws IOException, DiffJException {
+    RevTree tree = revWalk.parseCommit(revision.getId()).getTree();
+    TreeWalk treeWalk = TreeWalk.forPath(repo, filePath, tree);
+    ObjectId blobId = treeWalk.getObjectId(0);
+    ObjectReader objectReader = repo.newObjectReader();
+    ObjectLoader objectLoader = objectReader.open(blobId);
+    byte[] bytes = objectLoader.getBytes();
+
+    return new JavaFileExtension(new String(bytes, StandardCharsets.UTF_8));
+  }
+
   /*
    * Start building subgraphs.
    */
@@ -212,8 +257,8 @@ public class GitParser {
     int index;
     Map<RevCommit, Map<Integer, Integer>> foundRevisions = new HashMap<>();
 
-    for (int i = 0; i < delIndexes.size(); i++) {
-      index = delIndexes.get(i);
+    for (Integer delIndex : delIndexes) {
+      index = delIndex;
       if (index == -1) continue;
       try {
         RevCommit foundRev = found.getSourceCommit(index);
@@ -278,7 +323,15 @@ public class GitParser {
   }
 
   private boolean checkFileType(String filePath) {
-    return !filePath.contains("src/test/") && !filePath.endsWith(".md");
+    return !filePath.contains("/test/") && checkFileExtension(filePath);
+  }
+
+  private boolean checkFileExtension(String filePath) {
+    AtomicBoolean validExtension = new AtomicBoolean(true);
+    USELESS_FILE_EXTENSIONS.forEach(it -> {
+      validExtension.set(!filePath.endsWith(it));
+    });
+    return validExtension.get();
   }
 
   /**
@@ -325,7 +378,7 @@ public class GitParser {
    *
    * @param path the path to the json file where the changes are stored.
    */
-  public Set<RevCommit> readBugFixCommits(String path) throws IOException, GitAPIException {
+  public Set<RevCommit> readBugFixCommits(String path) throws IOException {
     if (repo == null) return Collections.emptySet();
 
     this.issues = new Issues();