Browse Source

SZZ implementation, multi-threaded.

Inital source tree for the SZZ algorithm. Call Main to run an analysis
on a local repository.

The SZZ algorithm uses commits with bug words in the message. These
commits are then connected together with previous commits that also
have induced changes on the same files.

The result from the program is now a file with commits and their
annotation graphs. It also produces the potential commits that
introduces bugs.

Build and run with gradle build && gradle runJar or
gradle build && gradle fatJar ; java -jar
build/libs/szz_find_bug_introducers-0.1.jar

Formatted with google java format.
Oscar Svensson 6 years ago
parent
commit
a141c55b65

+ 86 - 0
code/szz/build.gradle

@@ -0,0 +1,86 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+apply plugin: 'java'
+
+version = '0.1'
+
+repositories {
+        jcenter()
+}
+
+jar {
+        manifest {
+                attributes 'Main-Class': 'Main'
+        }
+}
+
+sourceSets {
+        main {
+                java {
+                        srcDirs 'src'
+                }
+        }
+}
+
+dependencies {
+        compile group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25'
+        compile group: 'org.slf4j', name: 'slf4j-simple', version: '1.7.25'
+        compile group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '4.10.0.201712302008-r'
+        compile group: 'commons-io', name: 'commons-io', version: '2.6'
+        compile group: 'commons-cli', name: 'commons-cli', version: '1.4'
+        compile group: 'info.debatty', name: 'java-string-similarity', version: '1.0.1'
+        compile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1'
+}
+
+task runJar(type: JavaExec) {
+        def issues_var = ""
+        if (project.hasProperty("issues")) {
+                issues_var = issues
+        }
+
+        def repository_var = ""
+        if (project.hasProperty("repo")) {
+                repository_var = repo
+        }
+
+        args issues_var, repository_var
+        jvmArgs = ['-Xms1028m', '-Xmx2056m']
+        classpath = files('build/libs/read.jar')
+        classpath += sourceSets.main.runtimeClasspath
+        main = 'Main'
+}
+
+task fatJar(type: Jar) {
+        manifest {
+                attributes 'Implementation-Title': 'SZZ Algorithm: Bug Introducing Commits Finder',
+                     'Implementation-Version': version,
+                     'Main-Class': 'Main'
+        }
+        baseName = 'szz_find_bug_introducers'
+        from { configurations.compile.collect {it.isDirectory() ? it : zipTree(it) } } {
+                exclude 'META-INF/*.RSA', 'META-INF/*.SF', 'META-INF/*.DSA'
+        }
+        with jar
+}

+ 42 - 0
code/szz/settings.gradle

@@ -0,0 +1,42 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This settings file was auto generated by the Gradle buildInit task
+ * by 'oskars' at '2/20/18 10:20 AM' with Gradle 3.2.1
+ *
+ * The settings file is used to specify which projects to include in your build.
+ * In a single project build this file can be empty or even removed.
+ *
+ * Detailed information about configuring a multi-project build in Gradle can be found
+ * in the user guide at https://docs.gradle.org/3.2.1/userguide/multi_project_builds.html
+ */
+
+/*
+// To declare projects as part of a multi-project build use the 'include' method
+include 'shared'
+include 'api'
+include 'services:webservice'
+*/
+
+rootProject.name = 'read'

+ 67 - 0
code/szz/src/main/java/Main.java

@@ -0,0 +1,67 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+import diff.SimplePartition;
+import java.util.*;
+import org.apache.commons.cli.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import parser.GitParserThread;
+import util.Configuration;
+
+/**
+ * @author Oscar Svensson
+ */
+public class Main {
+
+  private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+  public static void main(String... args) {
+    Configuration conf = Configuration.init(logger, args);
+
+    List<String> issue_paths =
+        SimplePartition.splitFile(conf.getNumberOfCPUS(), conf.getIssuePath(), "./issues");
+    if (issue_paths.isEmpty()) return;
+
+    List<GitParserThread> parsers = new LinkedList<>();
+    List<String> resPaths = new LinkedList<>();
+    for (int i = 0; i < conf.getNumberOfCPUS(); i++) {
+      String resPath = String.format("%s/result%d", conf.getResultPath(), i);
+      resPaths.add(resPath);
+
+      parsers.add(new GitParserThread(i, issue_paths.get(i)));
+      parsers.get(i).start();
+    }
+
+    for (int i = 0; i < conf.getNumberOfCPUS(); i++) {
+      try {
+        parsers.get(i).join();
+      } catch (Exception e) {
+        logger.warn(e.getMessage());
+      }
+    }
+
+    SimplePartition.mergeFiles(resPaths, conf.getResultPath());
+  }
+}

+ 71 - 0
code/szz/src/main/java/data/Issues.java

@@ -0,0 +1,71 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package data;
+
+import java.util.*;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.revwalk.RevCommit;
+
+/**
+ * A container of changes that are considered as fixes for issues.
+ *
+ * @author Oscar Svensson
+ */
+public class Issues {
+
+  public Set<RevCommit> revisions;
+  public Map<String, Map<String, String>> dates;
+
+  /**
+   * Return the three dates for a change. The dates are the commit date, the resolution date and the
+   * creation date.
+   *
+   * @param commit the referencing commit.
+   * @return a map containing all dates.
+   */
+  public Map<String, String> get(RevCommit commit) {
+    String rev = ObjectId.toString(commit.toObjectId());
+
+    try {
+      return dates.get(rev);
+    } catch (Exception e) {
+      return new HashMap<>();
+    }
+  }
+
+  /**
+   * Return the three dates for a change. The dates are the commit date, the resolution date and the
+   * creation date.
+   *
+   * @param commit the referencing commit.
+   * @return a map containing all dates.
+   */
+  public Map<String, String> get(String commit) {
+    if (dates.containsKey(commit)) {
+      return dates.get(commit);
+    }
+    return new HashMap<>();
+  }
+}

+ 215 - 0
code/szz/src/main/java/diff/DiffingLines.java

@@ -0,0 +1,215 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package diff;
+
+import java.io.*;
+import java.util.*;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.diff.Edit;
+import org.eclipse.jgit.diff.EditList;
+import org.eclipse.jgit.diff.RawText;
+import org.eclipse.jgit.lib.AbbreviatedObjectId;
+import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.storage.pack.PackConfig;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+/**
+ * A diff class that gives the diffing lines between two revisions.
+ *
+ * @author Oscar Svensson
+ */
+public class DiffingLines {
+
+  private int customContext;
+
+  private Repository repo = null;
+
+  public class DiffLines {
+    public List<String[]> insertions = new LinkedList<>();
+    public List<String[]> deletions = new LinkedList<>();
+
+    /*
+     * Get the lines as a JSON.
+     */
+    public JSONObject getJSON() {
+      JSONObject diffLines = new JSONObject();
+
+      JSONArray added = new JSONArray();
+      JSONArray deleted = new JSONArray();
+
+      for (String[] lineNNumber : insertions) {
+        added.add(lineNNumber[0]);
+        added.add(lineNNumber[1]);
+      }
+      for (String[] lineNNumber : deletions) {
+        deleted.add(lineNNumber[0]);
+        deleted.add(lineNNumber[1]);
+      }
+
+      diffLines.put("add", added);
+      diffLines.put("delete", deleted);
+      return diffLines;
+    }
+
+    public String toString() {
+      return "";
+    }
+  }
+
+  public DiffingLines(Repository repo, int customContext) {
+    this.repo = repo;
+
+    if (customContext < 0) {
+      throw new IllegalStateException("Custom Context can't be lower than 0!!");
+    }
+
+    this.customContext = customContext;
+  }
+
+  /**
+   * Extract the RawText object from a id.
+   *
+   * @param id the id on the object, which in this case is a commit.
+   * @return either null or a RawText object.
+   */
+  private RawText toRaw(AbbreviatedObjectId id) {
+    try {
+      ObjectLoader loader = this.repo.open(id.toObjectId());
+      return RawText.load(loader, PackConfig.DEFAULT_BIG_FILE_THRESHOLD);
+    } catch (Exception e) {
+      return null;
+    }
+  }
+
+  /**
+   * Get the formatted diff between two commits.
+   *
+   * @param entry the DiffEntry object containing the real diff.
+   * @param edits the edited chunks.
+   * @return the Diffing lines with line numbers.
+   */
+  public DiffLines getDiffingLines(DiffEntry entry, EditList edits) throws IOException, GitAPIException {
+    /*
+     * Access the RawText objects for the old and the new entry.
+     */
+    RawText old = toRaw(entry.getOldId());
+    RawText present = toRaw(entry.getNewId());
+
+    /*
+     * If the old file is null, it indicates that a new file has been made.
+     */
+    if (old == null || present == null) return new DiffLines();
+
+    DiffLines lines = new DiffLines();
+
+    int i = 0;
+    /*
+     * Loop through all edits.
+     */
+    while (i < edits.size()) {
+        Edit first = edits.get(i);
+        int last = last(edits, i);
+        Edit second = edits.get(last);
+
+        /*
+         * Get the limits for the change in the old file.
+         */
+        int firstIndex = first.getBeginA() - customContext;
+        int firstEnd = second.getEndA() + customContext;
+
+        /*
+         * Get the limits for the change in the new file.
+         */
+        int secondIndex = first.getBeginB() - customContext;
+        int secondEnd = second.getEndB() + customContext;
+
+        /*
+         * Are they out of boundary?
+         */
+        firstIndex = 0 > firstIndex ? 0 : firstIndex;
+        firstEnd = old.size() < firstEnd ? old.size() : firstEnd;
+
+        secondIndex = 0 > secondIndex ? 0 : secondIndex;
+        secondEnd = present.size() < secondEnd ? present.size() : secondEnd;
+
+        /*
+         * Loop through both revisions parallel.
+         */
+        while (firstIndex < firstEnd || secondIndex < secondEnd) {
+            String[] info = null;
+
+            if (firstIndex < first.getBeginA() || last + 1 < i) {
+                info = new String[]{Integer.toString(firstIndex), old.getString(firstIndex)};
+                lines.insertions.add(info);
+                firstIndex+=1;
+                secondIndex+=1;
+            } else if (firstIndex < first.getEndA()) {
+                info = new String[]{Integer.toString(firstIndex), old.getString(firstIndex)};
+                lines.deletions.add(info);
+                firstIndex+=1;
+            } else if (secondIndex < first.getEndB()) {
+                info = new String[]{Integer.toString(secondIndex), present.getString(secondIndex)};
+                lines.insertions.add(info);
+                secondIndex+=1;
+            }
+
+            /*
+             * Check if there is a gap between the next diff.
+             */
+            if (firstIndex >= first.getEndA() &&
+                secondIndex >= first.getEndB() &&
+                ++i < edits.size()){
+                first = edits.get(i);
+            }
+        }
+    }
+    return lines;
+  }
+
+  /**
+   * Get the last index that is common for two edits.
+   */
+  private int last(List<Edit> edits, int i) {
+     Edit first = null;
+     Edit second = null;
+     int last = i + 1;
+
+     while (true) {
+        if (last >= edits.size()) return last - 1;
+
+        first = edits.get(last);
+        second = edits.get(last-1);
+
+        if (first.getBeginA() - second.getEndA() > 2*customContext ||
+            first.getBeginB() - second.getEndB() > 2*customContext)
+            return last - 1;
+
+        last++;
+     }
+  }
+}

+ 202 - 0
code/szz/src/main/java/diff/SimplePartition.java

@@ -0,0 +1,202 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package diff;
+
+import java.io.*;
+import java.util.*;
+import org.apache.commons.io.FilenameUtils;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+/**
+ * A class which partitions the commits into evenly partitions.
+ *
+ * @author Oscar Svensson
+ */
+public class SimplePartition {
+
+  private static String ANNOTATIONPATH = "annotations.json",
+      COMMITSPATH = "commits.json",
+      FIXINTRODUCERSPATH = "fix_and_introducers_pairs.json",
+      SUBFIXINTRODUCERSPATH = "fix_and_introducers_pairs_%d.json";
+
+  private static List<String> splitJSON(int partitions, String path, String resPath) {
+
+    List<String> paths = new ArrayList<>();
+    JSONParser parser = new JSONParser();
+    try {
+      JSONObject issues = (JSONObject) parser.parse(new FileReader(path));
+
+      int size = issues.keySet().size();
+
+      int div = size / partitions;
+      int mod = size % partitions;
+
+      List<Integer[]> chunks = new ArrayList<>();
+
+      /*
+       * Divide into evenly sized chunks.
+       */
+      for (int i = 0; i < partitions; i++) {
+        Integer[] chunk = new Integer[2];
+        chunk[0] = i * div + Math.min(i, mod);
+        chunk[1] = (i + 1) * div + Math.min(i + 1, mod);
+        if (i < partitions - 1) chunk[1] -= 1;
+
+        chunks.add(chunk);
+      }
+
+      List<String> keys = new ArrayList<>(issues.keySet());
+
+      for (int i = 0; i < partitions; i++) {
+        JSONObject chunkObject = new JSONObject();
+        Integer[] chunk = chunks.get(i);
+
+        int limit = i < partitions - 1 ? chunk[1] : chunk[1] - 1;
+        for (int start = chunk[0]; start <= limit; start++) {
+          String key = keys.get(start);
+          chunkObject.put(key, issues.get(key));
+        }
+
+        String chunkPath = resPath + "/" + String.format(SUBFIXINTRODUCERSPATH, i);
+
+        FileWriter writer = new FileWriter(chunkPath);
+        writer.write(chunkObject.toJSONString());
+        writer.flush();
+
+        paths.add(chunkPath);
+      }
+
+    } catch (IOException | ParseException e) {
+      e.printStackTrace();
+      return new LinkedList<>();
+    }
+
+    return paths;
+  }
+
+  public static List<String> splitFile(int partitions, String path, String resPath) {
+    List<String> paths = new LinkedList<>();
+
+    File f = new File(path);
+    if (!f.isFile()) {
+      System.err.println("Path doesn't exists...");
+      return new LinkedList<>();
+    }
+
+    File resDir = new File(resPath);
+    if (resDir.exists()) {
+      System.err.println(String.format("%s already exists!", resPath));
+      return new LinkedList<>();
+    }
+
+    resDir.mkdirs();
+
+    String extension = FilenameUtils.getExtension(path);
+    if (extension.equals("json")) {
+      paths = SimplePartition.splitJSON(partitions, path, resPath);
+    } else {
+      System.err.println("Unknown filetype...");
+      return new LinkedList<>();
+    }
+
+    return paths;
+  }
+
+  public static void mergeFiles(List<String> resPaths, String resPath) {
+    List<JSONObject> commits = new LinkedList<>();
+    List<JSONObject> annotations = new LinkedList<>();
+    List<JSONArray> fix_and_introducers_pairs = new LinkedList<>();
+
+    JSONParser parser = new JSONParser();
+    for (String path : resPaths) {
+      File dir = new File(path);
+      if (dir.exists() && dir.isDirectory()) {
+        try {
+          JSONObject commit = (JSONObject) parser.parse(new FileReader(path + "/" + COMMITSPATH));
+          commits.add(commit);
+          JSONObject annotation =
+              (JSONObject) parser.parse(new FileReader(path + "/" + ANNOTATIONPATH));
+          annotations.add(annotation);
+          JSONArray pair =
+              (JSONArray) parser.parse(new FileReader(path + "/" + FIXINTRODUCERSPATH));
+          fix_and_introducers_pairs.add(pair);
+        } catch (IOException | ParseException e) {
+          e.printStackTrace();
+        }
+      } else {
+        System.err.println(path + " doesn't exist! Omitting..");
+      }
+    }
+
+    /*
+     * Merge all commits into a single object.
+     */
+    JSONObject commitObject = new JSONObject();
+    for (JSONObject commit : commits) {
+      for (Object c : commit.keySet()) {
+        commitObject.put(c, commit.get(c));
+      }
+    }
+
+    /*
+     * Write all merged files into a single file.
+     */
+    try (FileWriter writer = new FileWriter(resPath + "/" + COMMITSPATH)) {
+      writer.write(commitObject.toJSONString());
+      writer.flush();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    JSONObject annotationsObject = new JSONObject();
+    for (JSONObject annotation : annotations) {
+      for (Object a : annotation.keySet()) {
+        annotationsObject.put(a, annotation.get(a));
+      }
+    }
+
+    try (FileWriter writer = new FileWriter(resPath + "/" + ANNOTATIONPATH)) {
+      writer.write(annotationsObject.toJSONString());
+      writer.flush();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    JSONArray pairObject = new JSONArray();
+    for (JSONArray pair : fix_and_introducers_pairs) {
+      pairObject.addAll(pair);
+    }
+
+    try (FileWriter writer = new FileWriter(resPath + "/" + FIXINTRODUCERSPATH)) {
+      writer.write(pairObject.toJSONString());
+      writer.flush();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+}

+ 76 - 0
code/szz/src/main/java/graph/AnnotationMap.java

@@ -0,0 +1,76 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package graph;
+
+import java.io.*;
+import java.io.IOException;
+import java.util.*;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+/**
+ * A class which represents Annotations between commits.
+ *
+ * @author Oscar Svensson
+ */
+public class AnnotationMap<K, V> extends HashMap<K, V> {
+
+  /** Constructor for the AnnotationMap. */
+  public AnnotationMap() {
+    super();
+  }
+
+  /**
+   * A helper method to save a AnnotationMap to JSON.
+   *
+   * @param path the path to the directory where the JSON file will be written.
+   */
+  public void saveToJSON(String path) {
+    Set<Map.Entry<K, V>> entries = entrySet();
+
+    JSONObject tree = new JSONObject();
+    for (Map.Entry<K, V> entry : entries) {
+      String commit = (String) entry.getKey();
+
+      JSONArray jFileObject = new JSONArray();
+      List<FileAnnotationGraph> graphs = (List<FileAnnotationGraph>) entry.getValue();
+
+      for (FileAnnotationGraph graph : graphs) jFileObject.add(graph.getGraphJSON());
+
+      tree.put(commit, jFileObject);
+    }
+
+    if (path != null) {
+      try (FileWriter file = new FileWriter(path + "/" + "annotations.json")) {
+        file.write(tree.toJSONString());
+        file.flush();
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    } else {
+      throw new IllegalArgumentException("Can't save AnnotationMap to a null path...");
+    }
+  }
+}

+ 117 - 0
code/szz/src/main/java/graph/FileAnnotationGraph.java

@@ -0,0 +1,117 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package graph;
+
+import java.util.*;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+/**
+ * A class that represents a Annotation graph.
+ *
+ * <p>For a source revision and given a filepath, it stores all revisions that have made a change on
+ * that file before the source revision.
+ *
+ * @author Oscar Svensson
+ */
+public class FileAnnotationGraph {
+  public String filePath;
+
+  public LinkedList<String> revisions;
+  public Map<String, Map<Integer, Integer>> mappings;
+  public Map<String, FileAnnotationGraph> sub_graphs;
+
+  /**
+   * Get line mapping for a specific revison to its successor. Returns an empty map if the given
+   * revision is the first aka the one that was used to create this grap.
+   *
+   * @param revision the revision which should be mapped to its successor.
+   * @return a map containing all line numbers that corresponds to the lines in the successor.
+   */
+  public Map<Integer, Integer> getLineMapping(String revision) {
+    if (revision == revisions.getFirst()) {
+      System.err.println("Revision must have a successor! Return empty map..");
+      return new LinkedHashMap<>();
+    }
+    return this.mappings.get(revision);
+  }
+
+  /**
+   * Return the revisions that have made changes to this file.
+   *
+   * @return a LinkedList containing the revisions in order.
+   */
+  public LinkedList<String> getRevisions() {
+    return this.revisions;
+  }
+
+  /**
+   * Get the associated file path. This symbolises the file that all revisions in this graph have in
+   * common.
+   *
+   * @return a String containing the file path to the annotated file.
+   */
+  public String getFilePath() {
+    return this.filePath;
+  }
+
+  public JSONObject getGraphJSON() {
+    JSONObject tree = new JSONObject();
+
+    tree.put("filePath", this.filePath);
+
+    JSONArray revisionArray = new JSONArray();
+    for (String rev : this.revisions) {
+      revisionArray.add(rev);
+    }
+
+    tree.put("revisions", revisionArray);
+
+    JSONObject jsonLineMappings = new JSONObject();
+    Map<Integer, Integer> lineMappings = null;
+
+    JSONObject lineMappingsObject = null;
+    for (String rev : this.mappings.keySet()) {
+      lineMappings = this.mappings.get(rev);
+      lineMappingsObject = new JSONObject();
+
+      for (Map.Entry<Integer, Integer> lineMapping : lineMappings.entrySet()) {
+        lineMappingsObject.put(lineMapping.getKey(), lineMapping.getValue());
+      }
+      jsonLineMappings.put(rev, lineMappingsObject);
+    }
+
+    tree.put("mappings", jsonLineMappings);
+
+    JSONObject subGraphs = new JSONObject();
+    for (Map.Entry<String, FileAnnotationGraph> entry : this.sub_graphs.entrySet()) {
+      subGraphs.put(entry.getKey(), entry.getValue().getGraphJSON());
+    }
+
+    tree.put("subgraphs", subGraphs);
+
+    return tree;
+  }
+}

+ 53 - 0
code/szz/src/main/java/heuristics/BugFinderFactory.java

@@ -0,0 +1,53 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package heuristics;
+
+import data.Issues;
+import org.eclipse.jgit.lib.Repository;
+import util.Configuration;
+
+/**
+ * Factory class to get different bugfinders.
+ *
+ * @author Oscar Svensson
+ */
+public class BugFinderFactory {
+
+  public static final int SIMPLE = 1;
+  public static final int DISTANCE = 2;
+
+  public static BugIntroducerFinder getFinder(Repository repo, Issues issues) {
+    Configuration conf = Configuration.getInstance();
+    switch (conf.getBugFinder()) {
+      case SIMPLE:
+        return new SimpleBugIntroducerFinder(
+            issues, repo, conf.getDepth(), conf.getPartialFixPattern());
+      case DISTANCE:
+        return new DistanceIntroducerFinder(repo, conf.getDiffCustomContext());
+    }
+
+    return null;
+  }
+}

+ 41 - 0
code/szz/src/main/java/heuristics/BugIntroducerFinder.java

@@ -0,0 +1,41 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package heuristics;
+
+import graph.AnnotationMap;
+import graph.FileAnnotationGraph;
+import java.io.*;
+import java.util.*;
+import org.eclipse.jgit.api.errors.GitAPIException;
+
+/**
+ * An interface for all bug introduce finders.
+ *
+ * @author Oscar Svensson
+ */
+public interface BugIntroducerFinder {
+  public List<String[]> findBugIntroducingCommits(
+      AnnotationMap<String, List<FileAnnotationGraph>> graphs) throws IOException, GitAPIException;
+}

+ 220 - 0
code/szz/src/main/java/heuristics/DistanceIntroducerFinder.java

@@ -0,0 +1,220 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package heuristics;
+
+import data.Issues;
+import diff.DiffingLines.DiffLines;
+import graph.AnnotationMap;
+import graph.FileAnnotationGraph;
+import info.debatty.java.stringsimilarity.Jaccard;
+import java.io.*;
+import java.util.*;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import parser.Commit;
+import util.CommitUtil;
+
+/**
+ * A bug introduce finder which tries to check if a change is only cosmetic.
+ *
+ * @author Oscar Svensson
+ */
+public class DistanceIntroducerFinder implements BugIntroducerFinder {
+
+  private Repository repo;
+  private Issues issues;
+
+  private CommitUtil util;
+
+  /** An abstraction of a distance between two revisions. */
+  private class RevisionDistance {
+    public double distance;
+    public List<String[]> updatedDiffLines;
+  }
+
+  public DistanceIntroducerFinder(Repository repo, int customContext) {
+
+    this.util = new CommitUtil(repo, customContext);
+  }
+
+  private int parseInt(String value) {
+    try {
+      return Integer.parseInt(value);
+    } catch (Exception e) {
+      return -1;
+    }
+  }
+
+  private RevCommit stringToRev(String rev) {
+    RevCommit commit = null;
+    try {
+      commit = this.repo.parseCommit(this.repo.resolve(rev));
+    } catch (Exception e) {
+      return null;
+    }
+    return commit;
+  }
+
+  /**
+   * Compute the Jaccard distance between two revisions.
+   *
+   * @param current a list containing the deletions made by a bugfixing commit. On the format
+   *     [{lineIndex, Line},...]
+   * @param other the line from the revision to compare with.
+   * @param lineMapping the linemapping between the two revisions.
+   * @return the distance between the two revisions and a list with the updated indexes.
+   */
+  private RevisionDistance compareTwoSections(
+      List<String[]> current, List<String> other, Map<Integer, Integer> lineMapping) {
+
+    RevisionDistance dist = new RevisionDistance();
+    dist.updatedDiffLines = new LinkedList<>();
+    Iterator<String[]> currentIterator = current.iterator();
+    double distance = 0.0;
+
+    Jaccard j2 = new Jaccard(2);
+
+    while (currentIterator.hasNext()) {
+      String[] entry = currentIterator.next();
+      int lineId = parseInt(entry[0]);
+
+      if (lineId >= 0) {
+        int otherId = -1;
+        try {
+          otherId = lineMapping.get(lineId);
+
+          if (entry[1] != null) {
+            distance += j2.distance(entry[1], other.get(otherId));
+
+            dist.updatedDiffLines.add(new String[] {Integer.toString(otherId), entry[1]});
+          }
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    dist.distance = current.size() > 0 ? (distance / current.size()) : distance;
+
+    return dist;
+  }
+
+  /**
+   * Provided with a map with changes, the method finds potential bugintroducing commits by tracing
+   * backwards all inserted and deleted lines.
+   *
+   * <p>All lines that have been
+   */
+  public List<String[]> findBugIntroducingCommits(
+      AnnotationMap<String, List<FileAnnotationGraph>> graphs) throws IOException, GitAPIException {
+
+    List<String[]> bugIntroducers = new LinkedList<>();
+    for (Map.Entry<String, List<FileAnnotationGraph>> entry : graphs.entrySet()) {
+
+      List<FileAnnotationGraph> files = new LinkedList<>();
+      RevCommit sCommit = null;
+
+      if ((sCommit = stringToRev(entry.getKey())) == null) continue;
+      files = entry.getValue();
+      Commit source = this.util.getCommitDiffingLines(sCommit);
+
+      /*
+       * Grep the first commit and check what have changed. Grab the insertions and the
+       * deletions and trace back to when they was first introduced.
+       *
+       * The commit that introduced them the first time will be considered as the bug
+       * introducing commit.
+       */
+      for (FileAnnotationGraph graph : files) {
+        String[] fixBugPair = new String[2];
+        fixBugPair[0] = entry.getKey();
+
+        /*
+         * Only check the lines that was removed.
+         */
+        DiffLines diffLines = source.diffWithParent.get(graph.filePath);
+        List<String[]> deletions = diffLines.deletions;
+
+        /*
+         * Throw away the first revision aka the source commit.
+         */
+        Iterator<String> revisions = graph.revisions.iterator();
+        revisions.next();
+        if (!revisions.hasNext()) continue;
+
+        /*
+         * Extract the revision after the source. This revision should include the deleted lines.
+         */
+        String prevRevision = revisions.next();
+
+        /*
+         * Now check when the deleted lines where added aka when the lines changes between the
+         * revisions.
+         */
+        double smallest = 1.0;
+        String smallestDistCommit = prevRevision;
+
+        while (revisions.hasNext()) {
+          String revision = revisions.next();
+          RevCommit next = null;
+
+          if ((next = stringToRev(revision)) == null) continue;
+
+          List<String> nextLines = this.util.getFileLines(next.getTree(), graph.filePath);
+
+          Map<Integer, Integer> lineMapping = graph.getLineMapping(revision);
+
+          /*
+           * Pick the corresponding lines from the previous revisions.
+           *
+           * TODO: Make a better measurement and also check subgraphs, this measurement isn't
+           * really valid.
+           */
+          RevisionDistance distance = compareTwoSections(deletions, nextLines, lineMapping);
+
+          /*
+           * The commit with a too low jaccard distance will be considered as the one the introduced
+           * the deleted lines.
+           */
+          if (distance.distance < smallest) {
+            smallest = distance.distance;
+            smallestDistCommit = revision;
+          }
+
+          deletions = distance.updatedDiffLines;
+        }
+        fixBugPair[1] = smallestDistCommit;
+        bugIntroducers.add(fixBugPair);
+      }
+
+      /*
+       * TODO: Misses if semantics changes. Only checks as long as the line hasn't changed at
+       * all. Check if the file has been modified between revisions.
+       */
+    }
+
+    return bugIntroducers;
+  }
+}

+ 227 - 0
code/szz/src/main/java/heuristics/SimpleBugIntroducerFinder.java

@@ -0,0 +1,227 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package heuristics;
+
+import data.Issues;
+import graph.AnnotationMap;
+import graph.FileAnnotationGraph;
+import java.io.*;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import util.RevisionCombinationGenerator;
+
+/**
+ * A simple bug introduce finder as described by Zeller et al.
+ *
+ * @author Oscar Svensson
+ */
+public class SimpleBugIntroducerFinder implements BugIntroducerFinder {
+
+  private Issues issues;
+  private Repository repo;
+  private int depth;
+  private Pattern partialFixPattern;
+
+  public SimpleBugIntroducerFinder(
+      Issues issues, Repository repo, int depth, String partialFixPattern) {
+    this.issues = issues;
+    this.repo = repo;
+    this.depth = depth;
+
+    this.partialFixPattern = Pattern.compile(partialFixPattern);
+  }
+
+  /**
+   * Check if a commit is within a fix timeframe.
+   *
+   * @param fix the commit containing the fix.
+   * @param commit the potential bug introducing commit.
+   * @return if the commit is within the timeframe.
+   */
+  private boolean isWithinTimeframe(String fix, String commit) throws IOException, GitAPIException {
+    Map<String, String> dates = this.issues.get(fix);
+
+    RevCommit rCommit = this.repo.parseCommit(this.repo.resolve(commit));
+
+    Date revisionDate = rCommit.getCommitterIdent().getWhen();
+
+    String commitDateString = dates.get("creationdate");
+    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
+
+    Date commitDate = null;
+    try {
+      commitDate = format.parse(commitDateString);
+    } catch (Exception e) {
+      e.printStackTrace();
+      return false;
+    }
+
+    return revisionDate.before(commitDate);
+  }
+
+  /** Check if a commit is a partial fix. */
+  private boolean isPartialFix(String commit) throws IOException, GitAPIException {
+    RevCommit rCommit = this.repo.parseCommit(this.repo.resolve(commit));
+    String message = rCommit.getFullMessage();
+
+    Matcher fixMatch = this.partialFixPattern.matcher(commit);
+
+    return fixMatch.find();
+  }
+
+  private Collection<FileAnnotationGraph> getSubGraphs(
+      Collection<FileAnnotationGraph> root, int depth) {
+    Collection<FileAnnotationGraph> sub = new LinkedList<>();
+    if (depth == 1) return sub;
+
+    for (FileAnnotationGraph subGraph : root) {
+      if (depth > 2) {
+        sub.addAll(getSubGraphs(subGraph.sub_graphs.values(), depth - 1));
+        sub.addAll(subGraph.sub_graphs.values());
+      }
+      sub.add(subGraph);
+    }
+    return sub;
+  }
+
+  /**
+   * Simple heuristics of the SZZ algorithm. Pick all commits that have made changes to a line but
+   * take into consideration if they have been made before or after the bug was reported.
+   *
+   * @param graphs a graph containing all reported bugfixes.
+   */
+  public List<String[]> findBugIntroducingCommits(
+      AnnotationMap<String, List<FileAnnotationGraph>> graphs) throws IOException, GitAPIException {
+
+    List<String[]> bugIntroducers = new LinkedList<>();
+    List<String[]> potentialBugIntroducers = new LinkedList<>();
+
+    Map<String, List<String>> bucketIntroducers = new HashMap<String, List<String>>();
+    Map<String, List<String>> bucketIssues = new HashMap<String, List<String>>();
+
+    for (Map.Entry<String, List<FileAnnotationGraph>> entry : graphs.entrySet()) {
+
+      List<FileAnnotationGraph> files = new LinkedList<>();
+      String sCommitString = entry.getKey();
+      files = entry.getValue();
+
+      /*
+       * Grab all commits that are seen as fixes or that have changed anything.
+       * Only checks the first layer of commits.
+       */
+      Collection<FileAnnotationGraph> subGraphs = getSubGraphs(files, this.depth);
+      subGraphs.addAll(files);
+
+      for (FileAnnotationGraph fileGraph : subGraphs) {
+        Iterator<String> revisions = fileGraph.revisions.iterator();
+        revisions.next();
+        if (!revisions.hasNext()) continue;
+
+        while (revisions.hasNext()) {
+          String rev = revisions.next();
+          String[] pair = new String[2];
+          pair[0] = sCommitString;
+          pair[1] = rev;
+
+          /*
+           * Check if the timestamp is within the timeframe or not.
+           */
+          if (isWithinTimeframe(sCommitString, rev)) {
+            bugIntroducers.add(pair);
+          } else {
+            if (!bucketIntroducers.containsKey(fileGraph.filePath)) {
+              bucketIntroducers.put(fileGraph.filePath, new ArrayList<>());
+            }
+            bucketIntroducers.get(fileGraph.filePath).add(rev);
+
+            if (!bucketIssues.containsKey(fileGraph.filePath)) {
+              bucketIssues.put(fileGraph.filePath, new ArrayList<>());
+            }
+            bucketIssues.get(fileGraph.filePath).add(sCommitString);
+          }
+        }
+      }
+    }
+
+    List<String[]> partial_fix_suspects = new LinkedList<>();
+    Map<String, List<String>> partialIntroducers = new HashMap<String, List<String>>();
+    Map<String, List<String>> partialIssues = new HashMap<String, List<String>>();
+    /*
+     * Now check if any of the potential bugintroducing commits are bugintroducers for any other fix commit, aka weak suspects.
+     * This check should be made smarter...
+     */
+    for (Map.Entry<String, List<String>> entry : bucketIntroducers.entrySet()) {
+      List<String> introducers = entry.getValue();
+      List<String> issues = bucketIssues.get(entry.getKey());
+
+      RevisionCombinationGenerator gen = new RevisionCombinationGenerator(introducers, issues, 2);
+      List<String[]> revisions = gen.generateRevIssuePairs();
+      for (String[] pair : revisions) {
+        if (isWithinTimeframe(pair[1], pair[0])) {
+          bugIntroducers.add(pair);
+        } else {
+
+          if (!partialIntroducers.containsKey(entry.getKey())) {
+            partialIntroducers.put(entry.getKey(), new ArrayList<>());
+          }
+          partialIntroducers.get(entry.getKey()).add(pair[0]);
+
+          if (!partialIssues.containsKey(entry.getKey())) {
+            partialIssues.put(entry.getKey(), new ArrayList<>());
+          }
+          partialIssues.get(entry.getKey()).add(pair[1]);
+        }
+      }
+    }
+
+    /*
+     * Now check for partial fixes. If a commit is flagged as a fix, it is a candidate to be a partial fix.
+     */
+    for (Map.Entry<String, List<String>> suspects : partialIntroducers.entrySet()) {
+      List<String> introducers = suspects.getValue();
+      List<String> issues = partialIssues.get(suspects.getKey());
+
+      RevisionCombinationGenerator gen = new RevisionCombinationGenerator(introducers, issues, 2);
+      List<String[]> revisions = gen.generateRevIssuePairs();
+
+      for (String[] pair : revisions) {
+        if (isPartialFix(pair[0])) {
+          bugIntroducers.add(pair);
+        }
+      }
+    }
+
+    /*
+     * All other pairs that hasn't been flagged as bug introducers are said to be hard suspects.
+     */
+
+    return bugIntroducers;
+  }
+}

+ 134 - 0
code/szz/src/main/java/parser/Commit.java

@@ -0,0 +1,134 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package parser;
+
+import diff.DiffingLines.DiffLines;
+import java.util.*;
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+/**
+ * Commit abstraction. Contains a reference to a RevCommit, a Map containing the diffing lines with
+ * its parent commit and a map with each change type for each changed file.
+ *
+ * @author Oscar Svensson
+ */
+public class Commit {
+  public RevCommit commit;
+  public Map<String, DiffLines> diffWithParent;
+  public Map<String, DiffEntry.ChangeType> changeTypes;
+  public Map<String, Collection<RevCommit>> fileAnnotations;
+
+  /**
+   * Constructor for a commit.
+   *
+   * @param commit the reference to a commit.
+   */
+  public Commit(RevCommit commit) {
+    this.commit = commit;
+
+    diffWithParent = new HashMap<>();
+    changeTypes = new HashMap<>();
+  }
+  /**
+   * Return the hash representation of the commit.
+   *
+   * @return a hash representation as a string.
+   */
+  public String getHashString() {
+    return ObjectId.toString(commit.toObjectId());
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+
+    if (diffWithParent != null) {
+      for (Map.Entry<String, DiffLines> entry : diffWithParent.entrySet()) {
+        String path = entry.getKey();
+        sb.append(path);
+        sb.append("  " + changeTypes.get(path));
+        sb.append("\n");
+
+        /*
+         * TODO: ADD CustomFormatter.DiffLines toString()
+         */
+
+        sb.append(entry.getValue().getJSON());
+        sb.append("\n");
+      }
+    }
+    if (fileAnnotations != null) {
+      sb.append("Annotation graph\n");
+      for (Map.Entry<String, Collection<RevCommit>> entry : fileAnnotations.entrySet()) {
+        String path = entry.getKey();
+
+        sb.append(path);
+
+        sb.append("\n");
+        sb.append("[");
+        for (RevCommit c : entry.getValue()) {
+          sb.append(c + "->");
+        }
+        sb.append("]\n");
+      }
+    }
+    return commit.toString() + "\n\n" + sb.toString();
+  }
+
+  /**
+   * Helper method to convert a Commit object to a JSON object.
+   *
+   * @return a JSONObject containing the commit. Omits the RevCommit.
+   */
+  public JSONObject toJson() {
+    JSONObject tree = new JSONObject();
+
+    JSONObject diffing = new JSONObject();
+    for (Map.Entry<String, DiffLines> diff : diffWithParent.entrySet()) {
+      String file = diff.getKey();
+
+      JSONArray lines = new JSONArray();
+      DiffLines line = diff.getValue();
+
+      lines.add(line.getJSON());
+
+      diffing.put(file, lines);
+    }
+    tree.put("diff", diffing);
+
+    JSONObject changes = new JSONObject();
+    for (Map.Entry<String, DiffEntry.ChangeType> changeType : changeTypes.entrySet()) {
+      changes.put(changeType.getKey(), changeType.getValue().toString());
+    }
+
+    tree.put("changes", changes);
+
+    return tree;
+  }
+}

+ 382 - 0
code/szz/src/main/java/parser/GitParser.java

@@ -0,0 +1,382 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package parser;
+
+import data.Issues;
+import graph.AnnotationMap;
+import graph.FileAnnotationGraph;
+import java.io.*;
+import java.util.*;
+import java.util.stream.*;
+import org.eclipse.jgit.api.BlameCommand;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.blame.BlameResult;
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.diff.RawText;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+import org.slf4j.Logger;
+import util.CommitUtil;
+import util.JSONUtil;
+
+/**
+ * A class which is capable to search and build line mapping graphs from a local repository. Uses
+ * JGit to parse the repository and the revision trees.
+ *
+ * @author Oscar Svensson
+ */
+public class GitParser {
+
+  private CommitUtil util;
+  private Repository repo;
+  private Issues issues;
+
+  private String resultPath;
+  private String DEFAULT_RES_PATH = "./results";
+
+  private Logger logger;
+
+  private int depth;
+
+  /**
+   * The constructor for the GitParser class. It requires the repository to exist and will fail if
+   * its not. The resultPath is also created if it's not existing.
+   *
+   * @param path the path to where the local repository can be found.
+   * @param resultPath the path to where the JSON files will be written.
+   */
+  public GitParser(String path, String resultPath, int depth, int customContext)
+      throws IOException, GitAPIException {
+    FileRepositoryBuilder builder = new FileRepositoryBuilder();
+    builder.setMustExist(true);
+
+    builder.addCeilingDirectory(new File(path));
+    builder.findGitDir(new File(path));
+    this.repo = builder.build();
+
+    this.resultPath = resultPath;
+
+    /*
+     * Check if the resultpath exists otherwise create it.
+     */
+
+    if (this.resultPath != null) {
+      File resDirectory = new File(resultPath);
+      if (!resDirectory.exists()) resDirectory.mkdirs();
+    } else {
+      System.err.println("Resultpath not set! Using deafult directory instead.");
+      this.resultPath = this.DEFAULT_RES_PATH;
+    }
+
+    this.util = new CommitUtil(this.repo, customContext);
+
+    this.depth = depth;
+  }
+
+  public String getResultPath() {
+    return this.resultPath;
+  }
+
+  public Repository getRepository() {
+    return this.repo;
+  }
+
+  public Issues getIssues() {
+    return this.issues;
+  }
+
+  public void useLogger(Logger logger) {
+    this.logger = logger;
+  }
+
+  /**
+   * Map lines between one commit and another.
+   *
+   * @param foundCommit a blameresult containing information about a commit that have made changes
+   *     to a file.
+   * @param filePath the file that the commit have made changes to.
+   * @return a mapping with the original revision file lines as keys and the values the
+   *     corresponding lines in the other commit.
+   */
+  private List<Integer> getLineMappings(BlameResult foundCommit, String filePath)
+      throws IOException, GitAPIException {
+    foundCommit.computeAll();
+    RawText foundContent = foundCommit.getResultContents();
+
+    /*
+     * Easiest solution, maybe better with a list and a pair class?
+     */
+    List<Integer> lineMappings = new LinkedList<>();
+
+    for (int line = 0; line < foundContent.size(); line++) {
+      lineMappings.add(foundCommit.getSourceLine(line));
+    }
+    return lineMappings;
+  }
+
+  private int getSourceLine(BlameResult foundCommit, int index)
+      throws IOException, GitAPIException {
+    foundCommit.computeAll();
+
+    try {
+      return foundCommit.getSourceLine(index);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      return -1;
+    }
+  }
+
+  /**
+   * Traces a file change that have occured before a given commmit.
+   *
+   * @param filePath specifies which file to trace changes on.
+   * @param source the source commit from which the trace should start at.
+   */
+  private FileAnnotationGraph traceFileChanges(String filePath, Commit source, int step)
+      throws IOException, GitAPIException {
+
+    if (step == 0) return null;
+
+    BlameCommand command = new BlameCommand(this.repo);
+
+    /*
+     * Save all line numbers for the source commits deletions.
+     */
+    List<Integer> delIndexes = null;
+    if (source.diffWithParent.containsKey(filePath))
+      delIndexes =
+          source
+              .diffWithParent
+              .get(filePath)
+              .deletions
+              .stream()
+              .map(s -> parseInt(s[0]))
+              .collect(Collectors.toList());
+    else return null;
+
+    /*
+     * Create a graph to store line mappings in.
+     */
+    FileAnnotationGraph graph = new FileAnnotationGraph();
+    graph.filePath = filePath;
+    graph.revisions = new LinkedList<>();
+    graph.mappings = new HashMap<>();
+    graph.sub_graphs = new HashMap<>();
+
+    graph.revisions.add(ObjectId.toString(source.commit.toObjectId()));
+
+    int index = 0;
+
+    RevCommit parent = source.commit.getParent(0);
+    command.setStartCommit(parent);
+    command.setFilePath(filePath);
+
+    BlameResult found = command.call();
+    if (found == null) return graph;
+
+    Map<RevCommit, Map<Integer, Integer>> foundRevisions = new HashMap<>();
+
+    /*
+     * Grab the blamed commits and get the line numbers.
+     */
+    for (int i = 0; i < delIndexes.size(); i++) {
+      index = delIndexes.get(i);
+      if (index == -1) continue;
+      try {
+        RevCommit foundRev = found.getSourceCommit(i);
+
+        if (!foundRevisions.containsKey(foundRev)) {
+          Map<Integer, Integer> blamedLines = new LinkedHashMap<>();
+
+          blamedLines.put(index, getSourceLine(found, index));
+          foundRevisions.put(foundRev, blamedLines);
+        } else {
+          foundRevisions.get(foundRev).put(index, getSourceLine(found, index));
+        }
+      } catch (Exception e) {
+        // This means that a row didn't exist in a previous revision..
+      }
+    }
+
+    /*
+     * Save all mappings in the annotationgraph.
+     */
+    for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
+      String revSha = ObjectId.toString(rev.getKey().toObjectId());
+
+      if (!graph.mappings.containsKey(revSha)) {
+        graph.revisions.add(revSha);
+        graph.mappings.put(revSha, rev.getValue());
+      } else {
+        Map<Integer, Integer> linemapping = graph.mappings.get(revSha);
+        // Add missing mappings.
+        for (Map.Entry<Integer, Integer> entry : rev.getValue().entrySet()) {
+          if (!linemapping.containsKey(entry.getKey())) {
+            linemapping.put(entry.getKey(), entry.getValue());
+          }
+        }
+      }
+    }
+
+    /*
+     * Start building subgraphs.
+     */
+    for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
+      Commit subCommit = this.util.getCommitDiffingLines(rev.getKey());
+      FileAnnotationGraph subGraph = traceFileChanges(filePath, subCommit, step - 1);
+
+      if (subGraph == null) break;
+      graph.sub_graphs.put(subCommit.getHashString(), subGraph);
+    }
+
+    return graph;
+  }
+
+  /**
+   * With each revision, check all files and build their line mapping graphs for each changed line.
+   *
+   * @param commits list of commits that should be traced.
+   * @return the map containing annotation graphs for each file change by a commit.
+   */
+  private AnnotationMap<String, List<FileAnnotationGraph>> buildLineMappingGraph(
+      List<Commit> commits) throws IOException, GitAPIException {
+
+    AnnotationMap<String, List<FileAnnotationGraph>> fileGraph = new AnnotationMap<>();
+    for (Commit commit : commits) {
+      List<FileAnnotationGraph> graphs = new LinkedList<>();
+      for (Map.Entry<String, DiffEntry.ChangeType> file : commit.changeTypes.entrySet()) {
+        FileAnnotationGraph tracedCommits = traceFileChanges(file.getKey(), commit, this.depth);
+
+        graphs.add(tracedCommits);
+      }
+
+      fileGraph.put(commit.getHashString(), graphs);
+    }
+
+    return fileGraph;
+  }
+
+  /**
+   * Wrapper method to catch a faulty value.
+   *
+   * @param value the string to convert to an int.
+   * @return the value of the string as an int.
+   */
+  private int parseInt(String value) {
+    try {
+      return Integer.parseInt(value);
+    } catch (Exception e) {
+      return -1;
+    }
+  }
+
+  /**
+   * Searchs for commits that have certain keywords in their messages, indicating that they have
+   * fiexd bugs.
+   *
+   * <p>It then saves the found commits and the line mapping graph to two JSON files.
+   *
+   * @param commits a set containing references to commits.
+   */
+  public AnnotationMap<String, List<FileAnnotationGraph>> annotateCommits(Set<RevCommit> commits)
+      throws IOException, GitAPIException {
+    this.logger.info("Parsing difflines for all found commits.");
+    List<Commit> parsedCommits = this.util.getDiffingLines(commits);
+
+    this.logger.info("Saving parsed commits to file");
+    JSONUtil.saveFoundCommits(parsedCommits, this.resultPath);
+
+    this.logger.info("Building line mapping graph.");
+    AnnotationMap<String, List<FileAnnotationGraph>> mapping = buildLineMappingGraph(parsedCommits);
+
+    this.logger.info("Saving results to file");
+    mapping.saveToJSON(this.resultPath);
+
+    return mapping;
+  }
+
+  /**
+   * Use this method to use already found big fixing changes.
+   *
+   * @param path the path to the json file where the changes are stored.
+   */
+  public Set<RevCommit> readBugFixCommits(String path) throws IOException, GitAPIException {
+    if (repo == null) return Collections.emptySet();
+
+    this.issues = new Issues();
+
+    JSONParser commitParser = new JSONParser();
+    try {
+      JSONObject object = (JSONObject) commitParser.parse(new FileReader(path));
+
+      this.issues.revisions = new HashSet<>();
+      this.issues.dates = new HashMap<>();
+
+      for (Object issue : object.keySet()) {
+        Map<String, String> issueInfo = (Map<String, String>) object.get(issue);
+
+        String rev = issueInfo.get("hash");
+        RevCommit revCommit = this.repo.parseCommit(this.repo.resolve(rev));
+
+        Map<String, String> dates = new HashMap<>();
+
+        dates.put("resolutiondate", issueInfo.get("resolutiondate"));
+        dates.put("commitdate", issueInfo.get("commitdate"));
+        dates.put("creationdate", issueInfo.get("creationdate"));
+
+        this.issues.dates.put(rev, dates);
+        this.issues.revisions.add(revCommit);
+      }
+
+    } catch (FileNotFoundException | ParseException e) {
+      return Collections.emptySet();
+    }
+
+    this.logger.info(String.format("Found %d number of commits.", this.issues.revisions.size()));
+
+    if (this.issues.revisions.size() == 0) return Collections.emptySet();
+    return this.issues.revisions;
+  }
+
+  /** Finds commits that indicates a bugfix and then builds a line mapping graph. */
+  public Set<RevCommit> searchForBugFixes() throws IOException, GitAPIException {
+    if (repo == null) {
+      return Collections.emptySet();
+    }
+
+    SimpleCommitSearcher search = new SimpleCommitSearcher(this.repo);
+    Set<RevCommit> foundCommits = search.filterOnBugPatterns();
+    this.logger.info(String.format("Found %d number of commits", foundCommits.size()));
+
+    if (foundCommits.size() == 0) {
+      return Collections.emptySet();
+    }
+    return foundCommits;
+  }
+}

+ 116 - 0
code/szz/src/main/java/parser/GitParserThread.java

@@ -0,0 +1,116 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package parser;
+
+import graph.AnnotationMap;
+import graph.FileAnnotationGraph;
+import heuristics.BugFinderFactory;
+import heuristics.BugIntroducerFinder;
+import java.io.*;
+import java.util.*;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.Configuration;
+import util.JSONUtil;
+
+/**
+ * A thread that starts a parser instance. It also logs the activity of the parser
+ * and prints the current state.
+ *
+ * @author Oscar Svensson
+ */
+public class GitParserThread extends Thread {
+  public GitParser parser;
+  private String issues;
+  private int bugFinder;
+  private int depth;
+
+  private Logger logger = LoggerFactory.getLogger(GitParserThread.class);
+
+  public GitParserThread(
+      String repo, String issues, String results, int bugFinder, int depth, int customContext) {
+    try {
+      this.parser = new GitParser(repo, results, depth, customContext);
+      this.issues = issues;
+
+      this.bugFinder = bugFinder;
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (GitAPIException e) {
+      e.printStackTrace();
+    }
+
+    // this.contextMap = MDC.getCopyOfContextMap();
+    this.depth = depth;
+  }
+
+  public GitParserThread(int id, String issues) {
+    Configuration conf = Configuration.getInstance();
+    try {
+      this.parser =
+          new GitParser(
+              conf.getRepository(),
+              String.format("%s/result%d", conf.getResultPath(), id),
+              conf.getDepth(),
+              conf.getDiffCustomContext());
+      this.issues = issues;
+      this.bugFinder = conf.getBugFinder();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (GitAPIException e) {
+      e.printStackTrace();
+    }
+  }
+
+  public void run() {
+    this.parser.useLogger(this.logger);
+
+    logger.info("Started process...");
+    try {
+      Set<RevCommit> commits = this.parser.readBugFixCommits(this.issues);
+      logger.info("Checking each commits diff...");
+
+      AnnotationMap<String, List<FileAnnotationGraph>> graphs =
+          this.parser.annotateCommits(commits);
+      logger.info("Trying to find potential bug introducing commits...");
+      List<String[]> bugIntroducers = Collections.emptyList();
+
+      BugIntroducerFinder finder =
+          BugFinderFactory.getFinder(this.parser.getRepository(), this.parser.getIssues());
+
+      bugIntroducers = finder.findBugIntroducingCommits(graphs);
+
+      logger.info("Saving found bug introducing commits...");
+      JSONUtil.saveBugIntroducingCommits(bugIntroducers, this.parser.getResultPath());
+
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (GitAPIException e) {
+      e.printStackTrace();
+    }
+  }
+}

+ 129 - 0
code/szz/src/main/java/parser/SimpleCommitSearcher.java

@@ -0,0 +1,129 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package parser;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
+import util.Configuration;
+
+/**
+ * Simple class which does a search in a git repository for potential bug fixing commits.
+ *
+ * @author Oscar Svensson
+ */
+public class SimpleCommitSearcher {
+
+  private Git git;
+  private Repository repo;
+
+  public List<Matcher> bugpatterns;
+
+  /**
+   * Constructor using a path to a local repository.
+   */
+  public SimpleCommitSearcher(String repoPath) throws IOException, GitAPIException {
+    Configuration conf = Configuration.getInstance();
+    FileRepositoryBuilder builder = new FileRepositoryBuilder();
+    builder.setMustExist(true);
+
+    builder.addCeilingDirectory(new File(repoPath));
+    builder.findGitDir(new File(repoPath));
+    this.repo = builder.build();
+    this.git = new Git(repo);
+
+    bugpatterns = Arrays.asList(compile("JENKINS\\-[0-9]"));
+  }
+
+  /**
+   * Constructor using a repository.
+   */
+  public SimpleCommitSearcher(Repository repo) {
+    this.repo = repo;
+    this.git = new Git(repo);
+
+    bugpatterns = Arrays.asList(compile("JENKINS\\-[0-9]"));
+  }
+
+  /**
+   * Compile a regex pattern.
+   *
+   * @param pattern a regex like string.
+   * @return a matcher object that can be used to match bugs.
+   */
+  private Matcher compile(String pattern) {
+    Pattern p = Pattern.compile(pattern, Pattern.DOTALL);
+    return p.matcher("");
+  }
+
+  /**
+   * Method to count the number of commits in a repository. If the repository is null, this method
+   * returns -1.
+   */
+  public int countCommits() throws IOException, GitAPIException {
+    if (repo == null || git == null) {
+      return -1;
+    }
+
+    Iterable<RevCommit> logs = git.log().call();
+
+    int count = 0;
+    for (RevCommit recv : logs) {
+      count++;
+    }
+
+    return count;
+  }
+
+  /**
+   * Iterator through all commits and find all commits that matches the specific pattern.
+   *
+   * @return a set containing commits that matches the bugpatterns.
+   */
+  public Set<RevCommit> filterOnBugPatterns() throws IOException, GitAPIException {
+    Iterable<RevCommit> logs = this.git.log().call();
+    Set<RevCommit> foundCommits = new HashSet<>();
+
+    int number = 0;
+    for (Matcher bug_matcher : bugpatterns) {
+      for (RevCommit recv : logs) {
+        bug_matcher.reset(recv.getFullMessage());
+
+        if (bug_matcher.find()) {
+          foundCommits.add(recv);
+        }
+      }
+    }
+
+    return foundCommits;
+  }
+}

+ 222 - 0
code/szz/src/main/java/util/CommitUtil.java

@@ -0,0 +1,222 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package util;
+
+import data.Issues;
+import diff.DiffingLines;
+import diff.DiffingLines.DiffLines;
+import java.io.*;
+import java.util.*;
+import org.apache.commons.io.IOUtils;
+import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.diff.DiffFormatter;
+import org.eclipse.jgit.diff.EditList;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.ObjectReader;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.patch.FileHeader;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.revwalk.RevTree;
+import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.treewalk.AbstractTreeIterator;
+import org.eclipse.jgit.treewalk.CanonicalTreeParser;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.treewalk.filter.PathFilter;
+import org.eclipse.jgit.util.io.DisabledOutputStream;
+import parser.Commit;
+
+/**
+ * Util to perform specific operations on commits.
+ *
+ * @author Oscar Svensson
+ */
+public class CommitUtil {
+
+  private Git git;
+  private Issues issues;
+  private Repository repo;
+
+  private int customContext;
+
+  public CommitUtil(Repository repo, int customContext) {
+    this.repo = repo;
+
+    this.git = new Git(repo);
+
+    this.customContext = customContext;
+  }
+
+  /**
+   * Method to read a file from a specific revision.
+   *
+   * @param tree the revision tree that contains the file.
+   * @param path the path that leads to the file in the tree.
+   * @return a list containing all lines in the file.
+   */
+  public List<String> getFileLines(RevTree tree, String path) throws IOException, GitAPIException {
+
+    try (TreeWalk walk = new TreeWalk(this.repo)) {
+      walk.addTree(tree);
+      walk.setRecursive(true);
+      walk.setFilter(PathFilter.create(path));
+
+      walk.next();
+      ObjectId oId = walk.getObjectId(0);
+
+      if (oId == ObjectId.zeroId()) {
+        return new LinkedList<>();
+      }
+
+      ObjectLoader loader = this.repo.open(oId);
+
+      ByteArrayOutputStream stream = new ByteArrayOutputStream();
+      loader.copyTo(stream);
+
+      return IOUtils.readLines(new ByteArrayInputStream(stream.toByteArray()), "UTF-8");
+    } catch (Exception e) {
+      return new LinkedList<>();
+    }
+  }
+
+  /**
+   * Find all lines that diffs with a commits parent commit.
+   *
+   * @param commits a set of unique commits.
+   */
+  public List<Commit> getDiffingLines(Set<RevCommit> commits) throws IOException, GitAPIException {
+    List<Commit> parsedCommits = new LinkedList<>();
+
+    for (RevCommit revc : commits) {
+      Commit commit = getCommitDiffingLines(revc);
+
+      if (commit == null) continue;
+
+      if (!commit.diffWithParent.isEmpty()) parsedCommits.add(commit);
+    }
+
+    return parsedCommits;
+  }
+
+  /**
+   * Extracts the differences between two revisions.
+   *
+   * @param a revision a.
+   * @param b revision b.
+   * @return a list containing all diffs.
+   */
+  public List<DiffEntry> diffRevisions(RevCommit a, RevCommit b)
+      throws IOException, GitAPIException {
+    return this.git
+        .diff()
+        .setOldTree(getCanonicalTreeParser(a))
+        .setNewTree(getCanonicalTreeParser(b))
+        .call();
+  }
+
+  /**
+   * Extract a list containing all Edits that exists between two revisions.
+   *
+   * @param entry a diffentry which contains information about a diff between two revisions.
+   * @return an EditList containing all Edits.
+   */
+  public EditList getDiffEditList(DiffEntry entry) throws IOException, GitAPIException {
+    DiffFormatter form = new DiffFormatter(DisabledOutputStream.INSTANCE);
+    form.setRepository(this.git.getRepository());
+
+    FileHeader fh = form.toFileHeader(entry);
+    return fh.toEditList();
+  }
+
+  /**
+   * Extract filechanges between one revision and another. The resulting lines are formatted in a
+   * git diff format. Each line starts with the line number separated with added or removerd
+   * indicating if its the old or new file.
+   *
+   * @param newTree the new revision that contains the new changes.
+   * @param oldTree the old revision that contains the old changes.
+   * @param entry an DiffEntry that contains the number of changes between the newTree and the
+   *     oldTree.
+   * @return a list containing all diffing lines.
+   */
+  public DiffLines diffFile(DiffEntry entry)
+      throws IOException, GitAPIException {
+    EditList edits = getDiffEditList(entry);
+
+    DiffingLines differ = new DiffingLines(this.repo, this.customContext);
+
+    return differ.getDiffingLines(entry, edits);
+  }
+
+  /**
+   * Parse the lines a commit recently made changes to compared to its parent.
+   *
+   * @param revc the current revision.
+   * @return a commit object containing all differences.
+   */
+  public Commit getCommitDiffingLines(RevCommit revc, RevCommit... revother)
+      throws IOException, GitAPIException {
+
+    if (revc.getId() == revc.zeroId()) return null;
+
+    RevCommit parent = null;
+    if (revother.length > 0) parent = revother[0];
+    else if (revc.getParents().length > 0) parent = revc.getParent(0);
+    else parent = revc;
+
+    if (parent.getId() == ObjectId.zeroId()) return null;
+
+    List<DiffEntry> diffEntries = diffRevisions(parent, revc);
+
+    Commit commit = new Commit(revc);
+
+    for (DiffEntry entry : diffEntries) {
+      DiffLines changedLines = diffFile(entry);
+
+      commit.diffWithParent.put(entry.getNewPath(), changedLines);
+      commit.changeTypes.put(entry.getNewPath(), entry.getChangeType());
+    }
+    return commit;
+  }
+
+  /**
+   * Returns a revision tree parser wich could be used to compare revisions and extract revision
+   * files.
+   *
+   * @param commitId a unique ID for a commit in the repository.
+   * @return a tree iterator that could iterate through the revision tree.
+   */
+  private AbstractTreeIterator getCanonicalTreeParser(ObjectId commitId) throws IOException {
+    try (RevWalk walk = new RevWalk(this.git.getRepository())) {
+      RevCommit commit = walk.parseCommit(commitId);
+      ObjectId treeId = commit.getTree().getId();
+      try (ObjectReader reader = git.getRepository().newObjectReader()) {
+        return new CanonicalTreeParser(null, reader, treeId);
+      }
+    }
+  }
+}

+ 221 - 0
code/szz/src/main/java/util/Configuration.java

@@ -0,0 +1,221 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package util;
+
+import heuristics.BugFinderFactory;
+import java.util.*;
+import org.apache.commons.cli.*;
+import org.slf4j.Logger;
+
+/**
+ * Global configuration file. Contains all commandline options.
+ *
+ * @author Oscar Svensson
+ */
+public class Configuration {
+
+  private static Configuration instance = null;
+
+  private int depth = 3;
+  private int cpus = 1;
+  private int bugFinder = BugFinderFactory.SIMPLE;
+  private int diffCustomContext = 0;
+
+  private String issuePath = null;
+  private String repoPath = null;
+  private String resPath = "results";
+  private String partialFixPattern = "fix";
+
+  protected Configuration() {}
+
+  public static Configuration getInstance() {
+    if (instance == null) {
+      throw new IllegalStateException("Configuration not initialized!");
+    }
+    return instance;
+  }
+
+  public static Configuration init(Logger logger, String... args) {
+    if (instance != null) {
+      throw new IllegalStateException("Configuration already intialized!");
+    }
+
+    instance = new Configuration();
+
+    CommandLineParser parser = new DefaultParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(getCMDOptions(), args);
+    } catch (ParseException e) {
+      logger.warn(e.getMessage());
+      System.exit(1);
+    }
+
+    if (cmd.hasOption("i")) {
+      instance.setIssuePath(cmd.getOptionValue("i"));
+    } else {
+      logger.warn("No Issues specified! Please use -i <IssuePath>");
+      System.exit(1);
+    }
+
+    if (cmd.hasOption("r")) {
+      instance.setRepository(cmd.getOptionValue("r"));
+    } else {
+      logger.warn("No Repository specified! Please use -r <RepoPath>");
+      System.exit(1);
+    }
+
+    if (cmd.hasOption("d")) {
+      instance.setDepth(Integer.parseInt(cmd.getOptionValue("d")));
+    }
+
+    logger.info("Checking available processors...");
+    if (cmd.hasOption("c")) {
+      instance.setNumberOfCPUS(Integer.parseInt(cmd.getOptionValue("c")));
+      logger.info(String.format("Using %s cpus!", instance.getNumberOfCPUS()));
+    } else {
+      instance.setNumberOfCPUS(Runtime.getRuntime().availableProcessors());
+      logger.info(String.format("Found %s processes!", instance.getNumberOfCPUS()));
+    }
+
+    if (cmd.hasOption("b")) {
+      if (cmd.getOptionValue("b") == "distance") instance.setBugFinder(BugFinderFactory.DISTANCE);
+    }
+
+    if (cmd.hasOption("dc")) {
+      instance.setDiffCustomContext(Integer.parseInt(cmd.getOptionValue("dc")));
+    }
+
+    if (cmd.hasOption("p")) {
+      instance.setPartialFixPattern(cmd.getOptionValue("p"));
+    }
+
+    return instance;
+  }
+
+  public int getDepth() {
+    return depth;
+  }
+
+  protected void setDepth(int depth) {
+    this.depth = depth;
+  }
+
+  public String getIssuePath() {
+    return issuePath;
+  }
+
+  protected void setIssuePath(String issuePath) {
+    this.issuePath = issuePath;
+  }
+
+  public String getRepository() {
+    return repoPath;
+  }
+
+  protected void setRepository(String repoPath) {
+    this.repoPath = repoPath;
+  }
+
+  public int getNumberOfCPUS() {
+    return cpus;
+  }
+
+  protected void setNumberOfCPUS(int cpus) {
+    this.cpus = cpus;
+  }
+
+  public int getBugFinder() {
+    return bugFinder;
+  }
+
+  protected void setBugFinder(int bugFinder) {
+    this.bugFinder = bugFinder;
+  }
+
+  public int getDiffCustomContext() {
+    return diffCustomContext;
+  }
+
+  protected void setDiffCustomContext(int diffCustomContext) {
+    this.diffCustomContext = diffCustomContext;
+  }
+
+  public String getResultPath() {
+    return resPath;
+  }
+
+  protected void setResultPath(String resPath) {
+    this.resPath = resPath;
+  }
+
+  public String getPartialFixPattern() {
+    return partialFixPattern;
+  }
+
+  protected void setPartialFixPattern(String pattern) {
+    this.partialFixPattern = pattern;
+  }
+
+  private static Options getCMDOptions() {
+    Options options = new Options();
+
+    Option issue_option = new Option("i", true, "Path to the issue file.");
+    issue_option.setRequired(false);
+    options.addOption(issue_option);
+
+    Option repo_option = new Option("r", true, "Path to a local git repository.");
+    repo_option.setRequired(false);
+    options.addOption(repo_option);
+
+    Option depth_option = new Option("d", true, "Depth for the line mapping graph.");
+    depth_option.setRequired(false);
+    options.addOption(depth_option);
+
+    Option cpu_option = new Option("c", true, "The number of cpus. Defaults to all.");
+    cpu_option.setRequired(false);
+    options.addOption(cpu_option);
+
+    Option bugFinderOption =
+        new Option("b", true, "The choice of bugfinder. Either simple or distance.");
+    bugFinderOption.setRequired(false);
+    options.addOption(bugFinderOption);
+
+    Option diffCustomContextOption =
+        new Option("dc", true, "How many lines the differ adds around a diff.");
+    diffCustomContextOption.setRequired(false);
+    options.addOption(diffCustomContextOption);
+
+    Option partialFixPatternOption =
+        new Option(
+            "p",
+            true,
+            "Specify the pattern that should be used when maching bug fixes. Defaults to \"fix\"");
+    partialFixPatternOption.setRequired(false);
+    options.addOption(partialFixPatternOption);
+
+    return options;
+  }
+}

+ 93 - 0
code/szz/src/main/java/util/JSONUtil.java

@@ -0,0 +1,93 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package util;
+
+import java.io.*;
+import java.util.*;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import parser.Commit;
+
+/**
+ * Util to perform operations on JSON objects.
+ *
+ * @author Oscar Svensson
+ */
+public class JSONUtil {
+
+  /**
+   * Method to save found commits to file.
+   *
+   * @param commits list of found commits.
+   * @param path the path to which the commits will be saved to.(Not filepath but directory)
+   */
+  public static void saveFoundCommits(List<Commit> commits, String path) {
+    JSONObject jCommits = new JSONObject();
+
+    for (Commit commit : commits) {
+      jCommits.put(commit.getHashString(), commit.toJson());
+    }
+
+    if (path != null) {
+      try (FileWriter writer = new FileWriter(path + "/" + "commits.json")) {
+        writer.write(jCommits.toJSONString());
+        writer.flush();
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    } else {
+      System.out.println(jCommits);
+    }
+  }
+
+  /**
+   * Save a list of bugIntroducing commits.
+   *
+   * @param commits the list of pairs of commits. commits[0]=FIX, commits[1]=INTRODUCER
+   */
+  public static void saveBugIntroducingCommits(List<String[]> commits, String path) {
+    JSONArray jCommits = new JSONArray();
+
+    for (String[] pair : commits) {
+      JSONArray jPair = new JSONArray();
+
+      jPair.add(pair[0]);
+      jPair.add(pair[1]);
+
+      jCommits.add(jPair);
+    }
+
+    if (path != null) {
+      try (FileWriter writer = new FileWriter(path + "/" + "fix_and_introducers_pairs.json")) {
+        writer.write(jCommits.toJSONString());
+        writer.flush();
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    } else {
+      System.out.println(jCommits);
+    }
+  }
+}

+ 174 - 0
code/szz/src/main/java/util/RevisionCombinationGenerator.java

@@ -0,0 +1,174 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2018 Axis Communications AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package util;
+
+import java.math.BigInteger;
+import java.util.*;
+import java.util.stream.*;
+
+/**
+ * Combinator for revisions.
+ */
+public class RevisionCombinationGenerator {
+  private int[] a;
+  private int n;
+  private int r;
+  private BigInteger numLeft;
+  private BigInteger total;
+
+  private List<String> sa;
+  private List<String> sb;
+
+  /**
+   * Constructor
+   */
+  public RevisionCombinationGenerator(List<String> sa, List<String> sb, int r) {
+    this.n = sa.size() + sb.size();
+    if (r > this.n) {
+      throw new IllegalArgumentException();
+    }
+    if (n < 1) {
+      throw new IllegalArgumentException();
+    }
+    this.r = r;
+
+    this.sa = sa;
+    this.sb = sb;
+
+    a = new int[r];
+    BigInteger nFact = getFactorial(n);
+    BigInteger rFact = getFactorial(r);
+    BigInteger nminusrFact = getFactorial(n - r);
+    total = nFact.divide(rFact.multiply(nminusrFact));
+    reset();
+  }
+
+  public RevisionCombinationGenerator(int n, int r) {
+    if (r > n) {
+      throw new IllegalArgumentException();
+    }
+    if (n < 1) {
+      throw new IllegalArgumentException();
+    }
+    this.n = n;
+    this.r = r;
+    a = new int[r];
+    BigInteger nFact = getFactorial(n);
+    BigInteger rFact = getFactorial(r);
+    BigInteger nminusrFact = getFactorial(n - r);
+    total = nFact.divide(rFact.multiply(nminusrFact));
+    reset();
+  }
+
+  public void reset() {
+    for (int i = 0; i < a.length; i++) {
+      a[i] = i;
+    }
+    numLeft = new BigInteger(total.toString());
+  }
+
+  /**
+   * Return number of combinations not yet generated.
+   */
+  public BigInteger getNumLeft() {
+    return numLeft;
+  }
+
+  /**
+   * Are there more combinations?
+   */
+  public boolean hasMore() {
+    return numLeft.compareTo(BigInteger.ZERO) == 1;
+  }
+
+  /**
+   * Returns the total number of combinations,
+   */
+  public BigInteger getTotal() {
+    return total;
+  }
+
+  /**
+   * Computes factorial of n.
+   */
+  private static BigInteger getFactorial(int n) {
+    BigInteger fact = BigInteger.ONE;
+    for (int i = n; i > 1; i--) {
+      fact = fact.multiply(new BigInteger(Integer.toString(i)));
+    }
+    return fact;
+  }
+
+  /**
+   * Generates next combination (algorithm from Rosen p. 286)
+   */
+  public int[] getNext() {
+
+    if (numLeft.equals(total)) {
+      numLeft = numLeft.subtract(BigInteger.ONE);
+      return a;
+    }
+
+    int i = r - 1;
+    while (a[i] == n - r + i) {
+      i--;
+    }
+    a[i] = a[i] + 1;
+    for (int j = i + 1; j < r; j++) {
+      a[j] = a[i] + j - i;
+    }
+
+    numLeft = numLeft.subtract(BigInteger.ONE);
+    return a;
+  }
+
+  public List<String[]> generateRevIssuePairs() {
+    if (r != 2) return Collections.emptyList();
+    List<String[]> combinations = new LinkedList<>();
+    int[] indices;
+
+    Set<String> revs = new HashSet<>(sa);
+    Set<String> issues = new HashSet<>(sb);
+
+    List<String> all_commits = Stream.concat(sa.stream(), sb.stream()).collect(Collectors.toList());
+
+    boolean inRevs = false;
+    while (hasMore()) {
+      indices = getNext();
+
+      String c1 = all_commits.get(indices[0]);
+      String c2 = all_commits.get(indices[1]);
+
+      inRevs = revs.contains(c1);
+      if (revs.contains(c1) && issues.contains(c2)) {
+        combinations.add(new String[] {c1, c2});
+      } else if (revs.contains(c2) && issues.contains(c1)) {
+        combinations.add(new String[] {c2, c1});
+      }
+    }
+
+    return combinations;
+  }
+}