GitParser.java 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2018 Axis Communications AB
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in all
  14. * copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. */
  24. package parser;
  25. import data.Issues;
  26. import graph.AnnotationMap;
  27. import graph.FileAnnotationGraph;
  28. import org.eclipse.jgit.api.BlameCommand;
  29. import org.eclipse.jgit.api.errors.GitAPIException;
  30. import org.eclipse.jgit.blame.BlameResult;
  31. import org.eclipse.jgit.diff.DiffEntry;
  32. import org.eclipse.jgit.lib.ObjectId;
  33. import org.eclipse.jgit.lib.Repository;
  34. import org.eclipse.jgit.revwalk.RevCommit;
  35. import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
  36. import org.json.simple.JSONObject;
  37. import org.json.simple.parser.JSONParser;
  38. import org.json.simple.parser.ParseException;
  39. import org.slf4j.Logger;
  40. import util.CommitUtil;
  41. import util.JSONUtil;
  42. import java.io.File;
  43. import java.io.FileNotFoundException;
  44. import java.io.FileReader;
  45. import java.io.IOException;
  46. import java.util.*;
  47. import java.util.stream.Collectors;
  48. /**
  49. * A class which is capable to search and build line mapping graphs from a local repository. Uses
  50. * JGit to parse the repository and the revision trees.
  51. *
  52. * @author Oscar Svensson
  53. */
  54. public class GitParser {
  55. private CommitUtil util;
  56. private Repository repo;
  57. private Issues issues;
  58. private String resultPath;
  59. private Logger logger;
  60. private int depth;
  61. private BlameCommand blameCommand;
  62. /**
  63. * The constructor for the GitParser class. It requires the repository to exist and will fail if
  64. * its not. The resultPath is also created if it's not existing.
  65. *
  66. * @param path the path to where the local repository can be found.
  67. * @param resultPath the path to where the JSON files will be written.
  68. */
  69. public GitParser(String path, String resultPath, int depth, int customContext)
  70. throws IOException {
  71. FileRepositoryBuilder builder = new FileRepositoryBuilder();
  72. builder.setMustExist(true);
  73. builder.addCeilingDirectory(new File(path));
  74. builder.findGitDir(new File(path));
  75. this.repo = builder.build();
  76. this.blameCommand = new BlameCommand(this.repo);
  77. this.resultPath = resultPath;
  78. /*
  79. * Check if the resultpath exists otherwise create it.
  80. */
  81. if (this.resultPath != null) {
  82. File resDirectory = new File(resultPath);
  83. if (!resDirectory.exists()) resDirectory.mkdirs();
  84. } else {
  85. System.err.println("Resultpath not set! Using deafult directory instead.");
  86. this.resultPath = "./results";
  87. }
  88. this.util = new CommitUtil(this.repo, customContext);
  89. this.depth = depth;
  90. }
  91. public String getResultPath() {
  92. return this.resultPath;
  93. }
  94. public Repository getRepository() {
  95. return this.repo;
  96. }
  97. public Issues getIssues() {
  98. return this.issues;
  99. }
  100. public void useLogger(Logger logger) {
  101. this.logger = logger;
  102. }
  103. private int getSourceLine(BlameResult foundCommit, int index) throws IOException {
  104. foundCommit.computeAll();
  105. try {
  106. return foundCommit.getSourceLine(index);
  107. } catch (ArrayIndexOutOfBoundsException e) {
  108. return -1;
  109. }
  110. }
  111. /**
  112. * Traces a file change that have occured before a given commmit.
  113. *
  114. * @param filePath specifies which file to trace changes on.
  115. * @param source the source commit from which the trace should start at.
  116. */
  117. private FileAnnotationGraph traceFileChanges(String filePath, Commit source, int step)
  118. throws IOException, GitAPIException {
  119. if (step == 0) return null;
  120. /*
  121. * Save all line numbers for the source commits deletions.
  122. */
  123. List<Integer> delIndexes = null;
  124. if (source.diffWithParent.containsKey(filePath))
  125. delIndexes =
  126. source
  127. .diffWithParent
  128. .get(filePath)
  129. .deletions
  130. .stream()
  131. .map(s -> parseInt(s[0]))
  132. .collect(Collectors.toList());
  133. else return null;
  134. FileAnnotationGraph graph = createEmptyGraph(filePath);
  135. graph.revisions.add(ObjectId.toString(source.commit.toObjectId()));
  136. BlameResult found = callBlameCommand(filePath, source.commit.getParent(0));
  137. if (found == null) return graph;
  138. Map<RevCommit, Map<Integer, Integer>> foundRevisions = linkRevisionsWithLineNumbers(delIndexes, found);
  139. populateGraphWithMappings(graph, foundRevisions);
  140. populateSubgraphs(filePath, step, graph, foundRevisions);
  141. return graph;
  142. }
  143. /*
  144. * Start building subgraphs.
  145. */
  146. private void populateSubgraphs(String filePath, int step, FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) throws IOException, GitAPIException {
  147. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  148. Commit subCommit = this.util.getCommitDiffingLines(rev.getKey());
  149. FileAnnotationGraph subGraph = traceFileChanges(filePath, subCommit, step - 1);
  150. if (subGraph == null) break;
  151. graph.sub_graphs.put(subCommit.getHashString(), subGraph);
  152. }
  153. }
  154. /*
  155. * Save all mappings in the annotationgraph.
  156. */
  157. private void populateGraphWithMappings(FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) {
  158. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  159. String revSha = ObjectId.toString(rev.getKey().toObjectId());
  160. if (!graph.mappings.containsKey(revSha)) {
  161. graph.revisions.add(revSha);
  162. graph.mappings.put(revSha, rev.getValue());
  163. } else {
  164. Map<Integer, Integer> linemapping = graph.mappings.get(revSha);
  165. // Add missing mappings.
  166. for (Map.Entry<Integer, Integer> entry : rev.getValue().entrySet()) {
  167. if (!linemapping.containsKey(entry.getKey())) {
  168. linemapping.put(entry.getKey(), entry.getValue());
  169. }
  170. }
  171. }
  172. }
  173. }
  174. /*
  175. * Grab the blamed commits and get the line numbers.
  176. */
  177. private Map<RevCommit, Map<Integer, Integer>> linkRevisionsWithLineNumbers(List<Integer> delIndexes, BlameResult found) {
  178. int index;
  179. Map<RevCommit, Map<Integer, Integer>> foundRevisions = new HashMap<>();
  180. for (int i = 0; i < delIndexes.size(); i++) {
  181. index = delIndexes.get(i);
  182. if (index == -1) continue;
  183. try {
  184. RevCommit foundRev = found.getSourceCommit(index);
  185. if (!foundRevisions.containsKey(foundRev)) {
  186. Map<Integer, Integer> blamedLines = new LinkedHashMap<>();
  187. blamedLines.put(index, getSourceLine(found, index));
  188. foundRevisions.put(foundRev, blamedLines);
  189. } else {
  190. foundRevisions.get(foundRev).put(index, getSourceLine(found, index));
  191. }
  192. } catch (Exception e) {
  193. // This means that a row didn't exist in a previous revision..
  194. }
  195. }
  196. return foundRevisions;
  197. }
  198. private BlameResult callBlameCommand(String filePath, RevCommit startCommit) throws GitAPIException {
  199. blameCommand.setStartCommit(startCommit);
  200. blameCommand.setFilePath(filePath);
  201. return blameCommand.call();
  202. }
  203. /*
  204. * Create a graph to store line mappings in.
  205. */
  206. private FileAnnotationGraph createEmptyGraph(String filePath) {
  207. FileAnnotationGraph graph = new FileAnnotationGraph();
  208. graph.filePath = filePath;
  209. graph.revisions = new LinkedList<>();
  210. graph.mappings = new HashMap<>();
  211. graph.sub_graphs = new HashMap<>();
  212. return graph;
  213. }
  214. /**
  215. * With each revision, check all files and build their line mapping graphs for each changed line.
  216. *
  217. * @param commits list of commits that should be traced.
  218. * @return the map containing annotation graphs for each file change by a commit.
  219. */
  220. private AnnotationMap<String, List<FileAnnotationGraph>> buildLineMappingGraph(
  221. List<Commit> commits) throws IOException, GitAPIException {
  222. AnnotationMap<String, List<FileAnnotationGraph>> fileGraph = new AnnotationMap<>();
  223. for (Commit commit : commits) {
  224. List<FileAnnotationGraph> graphs = new LinkedList<>();
  225. for (Map.Entry<String, DiffEntry.ChangeType> file : commit.changeTypes.entrySet()) {
  226. String filePath = file.getKey();
  227. if (checkFileType(filePath)) {
  228. FileAnnotationGraph tracedCommits = traceFileChanges(filePath, commit, this.depth);
  229. graphs.add(tracedCommits);
  230. }
  231. }
  232. fileGraph.put(commit.getHashString(), graphs);
  233. }
  234. return fileGraph;
  235. }
  236. private boolean checkFileType(String filePath) {
  237. return !filePath.contains("src/test/") && !filePath.endsWith(".md");
  238. }
  239. /**
  240. * Wrapper method to catch a faulty value.
  241. *
  242. * @param value the string to convert to an int.
  243. * @return the value of the string as an int.
  244. */
  245. private int parseInt(String value) {
  246. try {
  247. return Integer.parseInt(value);
  248. } catch (Exception e) {
  249. return -1;
  250. }
  251. }
  252. /**
  253. * Searchs for commits that have certain keywords in their messages, indicating that they have
  254. * fiexd bugs.
  255. *
  256. * <p>It then saves the found commits and the line mapping graph to two JSON files.
  257. *
  258. * @param commits a set containing references to commits.
  259. */
  260. public AnnotationMap<String, List<FileAnnotationGraph>> annotateCommits(Set<RevCommit> commits)
  261. throws IOException, GitAPIException {
  262. this.logger.info("Parsing difflines for all found commits.");
  263. List<Commit> parsedCommits = this.util.getDiffingLines(commits);
  264. this.logger.info("Saving parsed commits to file");
  265. JSONUtil.saveFoundCommits(parsedCommits, this.resultPath);
  266. this.logger.info("Building line mapping graph.");
  267. AnnotationMap<String, List<FileAnnotationGraph>> mapping = buildLineMappingGraph(parsedCommits);
  268. this.logger.info("Saving results to file");
  269. mapping.saveToJSON(this.resultPath);
  270. return mapping;
  271. }
  272. /**
  273. * Use this method to use already found big fixing changes.
  274. *
  275. * @param path the path to the json file where the changes are stored.
  276. */
  277. public Set<RevCommit> readBugFixCommits(String path) throws IOException, GitAPIException {
  278. if (repo == null) return Collections.emptySet();
  279. this.issues = new Issues();
  280. JSONParser commitParser = new JSONParser();
  281. try {
  282. JSONObject object = (JSONObject) commitParser.parse(new FileReader(path));
  283. this.issues.revisions = new HashSet<>();
  284. this.issues.dates = new HashMap<>();
  285. for (Object issue : object.keySet()) {
  286. Map<String, String> issueInfo = (Map<String, String>) object.get(issue);
  287. String rev = issueInfo.get("hash");
  288. RevCommit revCommit = this.repo.parseCommit(this.repo.resolve(rev));
  289. Map<String, String> dates = new HashMap<>();
  290. dates.put("resolutiondate", issueInfo.get("resolutiondate"));
  291. dates.put("commitdate", issueInfo.get("commitdate"));
  292. dates.put("creationdate", issueInfo.get("creationdate"));
  293. this.issues.dates.put(rev, dates);
  294. this.issues.revisions.add(revCommit);
  295. }
  296. } catch (FileNotFoundException | ParseException e) {
  297. return Collections.emptySet();
  298. }
  299. this.logger.info(String.format("Found %d number of commits.", this.issues.revisions.size()));
  300. if (this.issues.revisions.size() == 0) return Collections.emptySet();
  301. return this.issues.revisions;
  302. }
  303. }