GitParser.java 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2018 Axis Communications AB
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in all
  14. * copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. */
  24. package parser;
  25. import data.Issues;
  26. import graph.AnnotationMap;
  27. import graph.FileAnnotationGraph;
  28. import java.io.*;
  29. import java.util.*;
  30. import java.util.stream.*;
  31. import org.eclipse.jgit.api.BlameCommand;
  32. import org.eclipse.jgit.api.errors.GitAPIException;
  33. import org.eclipse.jgit.blame.BlameResult;
  34. import org.eclipse.jgit.diff.DiffEntry;
  35. import org.eclipse.jgit.diff.RawText;
  36. import org.eclipse.jgit.lib.ObjectId;
  37. import org.eclipse.jgit.lib.Repository;
  38. import org.eclipse.jgit.revwalk.RevCommit;
  39. import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
  40. import org.json.simple.JSONObject;
  41. import org.json.simple.parser.JSONParser;
  42. import org.json.simple.parser.ParseException;
  43. import org.slf4j.Logger;
  44. import util.CommitUtil;
  45. import util.JSONUtil;
  46. /**
  47. * A class which is capable to search and build line mapping graphs from a local repository. Uses
  48. * JGit to parse the repository and the revision trees.
  49. *
  50. * @author Oscar Svensson
  51. */
  52. public class GitParser {
  53. private CommitUtil util;
  54. private Repository repo;
  55. private Issues issues;
  56. private String resultPath;
  57. private String DEFAULT_RES_PATH = "./results";
  58. private Logger logger;
  59. private int depth;
  60. private BlameCommand blameCommand;
  61. /**
  62. * The constructor for the GitParser class. It requires the repository to exist and will fail if
  63. * its not. The resultPath is also created if it's not existing.
  64. *
  65. * @param path the path to where the local repository can be found.
  66. * @param resultPath the path to where the JSON files will be written.
  67. */
  68. public GitParser(String path, String resultPath, int depth, int customContext)
  69. throws IOException, GitAPIException {
  70. FileRepositoryBuilder builder = new FileRepositoryBuilder();
  71. builder.setMustExist(true);
  72. builder.addCeilingDirectory(new File(path));
  73. builder.findGitDir(new File(path));
  74. this.repo = builder.build();
  75. this.blameCommand = new BlameCommand(this.repo);
  76. this.resultPath = resultPath;
  77. /*
  78. * Check if the resultpath exists otherwise create it.
  79. */
  80. if (this.resultPath != null) {
  81. File resDirectory = new File(resultPath);
  82. if (!resDirectory.exists()) resDirectory.mkdirs();
  83. } else {
  84. System.err.println("Resultpath not set! Using deafult directory instead.");
  85. this.resultPath = this.DEFAULT_RES_PATH;
  86. }
  87. this.util = new CommitUtil(this.repo, customContext);
  88. this.depth = depth;
  89. }
  90. public String getResultPath() {
  91. return this.resultPath;
  92. }
  93. public Repository getRepository() {
  94. return this.repo;
  95. }
  96. public Issues getIssues() {
  97. return this.issues;
  98. }
  99. public void useLogger(Logger logger) {
  100. this.logger = logger;
  101. }
  102. /**
  103. * Map lines between one commit and another.
  104. *
  105. * @param foundCommit a blameresult containing information about a commit that have made changes
  106. * to a file.
  107. * @param filePath the file that the commit have made changes to.
  108. * @return a mapping with the original revision file lines as keys and the values the
  109. * corresponding lines in the other commit.
  110. */
  111. private List<Integer> getLineMappings(BlameResult foundCommit, String filePath)
  112. throws IOException, GitAPIException {
  113. foundCommit.computeAll();
  114. RawText foundContent = foundCommit.getResultContents();
  115. /*
  116. * Easiest solution, maybe better with a list and a pair class?
  117. */
  118. List<Integer> lineMappings = new LinkedList<>();
  119. for (int line = 0; line < foundContent.size(); line++) {
  120. lineMappings.add(foundCommit.getSourceLine(line));
  121. }
  122. return lineMappings;
  123. }
  124. private int getSourceLine(BlameResult foundCommit, int index)
  125. throws IOException, GitAPIException {
  126. foundCommit.computeAll();
  127. try {
  128. return foundCommit.getSourceLine(index);
  129. } catch (ArrayIndexOutOfBoundsException e) {
  130. return -1;
  131. }
  132. }
  133. /**
  134. * Traces a file change that have occured before a given commmit.
  135. *
  136. * @param filePath specifies which file to trace changes on.
  137. * @param source the source commit from which the trace should start at.
  138. */
  139. private FileAnnotationGraph traceFileChanges(String filePath, Commit source, int step)
  140. throws IOException, GitAPIException {
  141. if (step == 0) return null;
  142. /*
  143. * Save all line numbers for the source commits deletions.
  144. */
  145. List<Integer> delIndexes = null;
  146. if (source.diffWithParent.containsKey(filePath))
  147. delIndexes =
  148. source
  149. .diffWithParent
  150. .get(filePath)
  151. .deletions
  152. .stream()
  153. .map(s -> parseInt(s[0]))
  154. .collect(Collectors.toList());
  155. else return null;
  156. FileAnnotationGraph graph = createEmptyGraph(filePath);
  157. graph.revisions.add(ObjectId.toString(source.commit.toObjectId()));
  158. BlameResult found = callBlameCommand(filePath, source.commit.getParent(0));
  159. if (found == null) return graph;
  160. Map<RevCommit, Map<Integer, Integer>> foundRevisions = linkRevisionsWithLineNumbers(delIndexes, found);
  161. populateGraphWithMappings(graph, foundRevisions);
  162. populateSubgraphs(filePath, step, graph, foundRevisions);
  163. return graph;
  164. }
  165. /*
  166. * Start building subgraphs.
  167. */
  168. private void populateSubgraphs(String filePath, int step, FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) throws IOException, GitAPIException {
  169. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  170. Commit subCommit = this.util.getCommitDiffingLines(rev.getKey());
  171. FileAnnotationGraph subGraph = traceFileChanges(filePath, subCommit, step - 1);
  172. if (subGraph == null) break;
  173. graph.sub_graphs.put(subCommit.getHashString(), subGraph);
  174. }
  175. }
  176. /*
  177. * Save all mappings in the annotationgraph.
  178. */
  179. private void populateGraphWithMappings(FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) {
  180. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  181. String revSha = ObjectId.toString(rev.getKey().toObjectId());
  182. if (!graph.mappings.containsKey(revSha)) {
  183. graph.revisions.add(revSha);
  184. graph.mappings.put(revSha, rev.getValue());
  185. } else {
  186. Map<Integer, Integer> linemapping = graph.mappings.get(revSha);
  187. // Add missing mappings.
  188. for (Map.Entry<Integer, Integer> entry : rev.getValue().entrySet()) {
  189. if (!linemapping.containsKey(entry.getKey())) {
  190. linemapping.put(entry.getKey(), entry.getValue());
  191. }
  192. }
  193. }
  194. }
  195. }
  196. /*
  197. * Grab the blamed commits and get the line numbers.
  198. */
  199. private Map<RevCommit, Map<Integer, Integer>> linkRevisionsWithLineNumbers(List<Integer> delIndexes, BlameResult found) {
  200. int index;
  201. Map<RevCommit, Map<Integer, Integer>> foundRevisions = new HashMap<>();
  202. for (int i = 0; i < delIndexes.size(); i++) {
  203. index = delIndexes.get(i);
  204. if (index == -1) continue;
  205. try {
  206. RevCommit foundRev = found.getSourceCommit(index);
  207. if (!foundRevisions.containsKey(foundRev)) {
  208. Map<Integer, Integer> blamedLines = new LinkedHashMap<>();
  209. blamedLines.put(index, getSourceLine(found, index));
  210. foundRevisions.put(foundRev, blamedLines);
  211. } else {
  212. foundRevisions.get(foundRev).put(index, getSourceLine(found, index));
  213. }
  214. } catch (Exception e) {
  215. // This means that a row didn't exist in a previous revision..
  216. }
  217. }
  218. return foundRevisions;
  219. }
  220. private BlameResult callBlameCommand(String filePath, RevCommit startCommit) throws GitAPIException {
  221. blameCommand.setStartCommit(startCommit);
  222. blameCommand.setFilePath(filePath);
  223. return blameCommand.call();
  224. }
  225. /*
  226. * Create a graph to store line mappings in.
  227. */
  228. private FileAnnotationGraph createEmptyGraph(String filePath) {
  229. FileAnnotationGraph graph = new FileAnnotationGraph();
  230. graph.filePath = filePath;
  231. graph.revisions = new LinkedList<>();
  232. graph.mappings = new HashMap<>();
  233. graph.sub_graphs = new HashMap<>();
  234. return graph;
  235. }
  236. /**
  237. * With each revision, check all files and build their line mapping graphs for each changed line.
  238. *
  239. * @param commits list of commits that should be traced.
  240. * @return the map containing annotation graphs for each file change by a commit.
  241. */
  242. private AnnotationMap<String, List<FileAnnotationGraph>> buildLineMappingGraph(
  243. List<Commit> commits) throws IOException, GitAPIException {
  244. AnnotationMap<String, List<FileAnnotationGraph>> fileGraph = new AnnotationMap<>();
  245. for (Commit commit : commits) {
  246. List<FileAnnotationGraph> graphs = new LinkedList<>();
  247. for (Map.Entry<String, DiffEntry.ChangeType> file : commit.changeTypes.entrySet()) {
  248. String filePath = file.getKey();
  249. if (checkFileType(filePath)) {
  250. FileAnnotationGraph tracedCommits = traceFileChanges(filePath, commit, this.depth);
  251. graphs.add(tracedCommits);
  252. }
  253. }
  254. fileGraph.put(commit.getHashString(), graphs);
  255. }
  256. return fileGraph;
  257. }
  258. private boolean checkFileType(String filePath) {
  259. return !filePath.contains("src/test/") && !filePath.endsWith(".md");
  260. }
  261. /**
  262. * Wrapper method to catch a faulty value.
  263. *
  264. * @param value the string to convert to an int.
  265. * @return the value of the string as an int.
  266. */
  267. private int parseInt(String value) {
  268. try {
  269. return Integer.parseInt(value);
  270. } catch (Exception e) {
  271. return -1;
  272. }
  273. }
  274. /**
  275. * Searchs for commits that have certain keywords in their messages, indicating that they have
  276. * fiexd bugs.
  277. *
  278. * <p>It then saves the found commits and the line mapping graph to two JSON files.
  279. *
  280. * @param commits a set containing references to commits.
  281. */
  282. public AnnotationMap<String, List<FileAnnotationGraph>> annotateCommits(Set<RevCommit> commits)
  283. throws IOException, GitAPIException {
  284. this.logger.info("Parsing difflines for all found commits.");
  285. List<Commit> parsedCommits = this.util.getDiffingLines(commits);
  286. this.logger.info("Saving parsed commits to file");
  287. JSONUtil.saveFoundCommits(parsedCommits, this.resultPath);
  288. this.logger.info("Building line mapping graph.");
  289. AnnotationMap<String, List<FileAnnotationGraph>> mapping = buildLineMappingGraph(parsedCommits);
  290. this.logger.info("Saving results to file");
  291. mapping.saveToJSON(this.resultPath);
  292. return mapping;
  293. }
  294. /**
  295. * Use this method to use already found big fixing changes.
  296. *
  297. * @param path the path to the json file where the changes are stored.
  298. */
  299. public Set<RevCommit> readBugFixCommits(String path) throws IOException, GitAPIException {
  300. if (repo == null) return Collections.emptySet();
  301. this.issues = new Issues();
  302. JSONParser commitParser = new JSONParser();
  303. try {
  304. JSONObject object = (JSONObject) commitParser.parse(new FileReader(path));
  305. this.issues.revisions = new HashSet<>();
  306. this.issues.dates = new HashMap<>();
  307. for (Object issue : object.keySet()) {
  308. Map<String, String> issueInfo = (Map<String, String>) object.get(issue);
  309. String rev = issueInfo.get("hash");
  310. RevCommit revCommit = this.repo.parseCommit(this.repo.resolve(rev));
  311. Map<String, String> dates = new HashMap<>();
  312. dates.put("resolutiondate", issueInfo.get("resolutiondate"));
  313. dates.put("commitdate", issueInfo.get("commitdate"));
  314. dates.put("creationdate", issueInfo.get("creationdate"));
  315. this.issues.dates.put(rev, dates);
  316. this.issues.revisions.add(revCommit);
  317. }
  318. } catch (FileNotFoundException | ParseException e) {
  319. return Collections.emptySet();
  320. }
  321. this.logger.info(String.format("Found %d number of commits.", this.issues.revisions.size()));
  322. if (this.issues.revisions.size() == 0) return Collections.emptySet();
  323. return this.issues.revisions;
  324. }
  325. /** Finds commits that indicates a bugfix and then builds a line mapping graph. */
  326. public Set<RevCommit> searchForBugFixes() throws IOException, GitAPIException {
  327. if (repo == null) {
  328. return Collections.emptySet();
  329. }
  330. SimpleCommitSearcher search = new SimpleCommitSearcher(this.repo);
  331. Set<RevCommit> foundCommits = search.filterOnBugPatterns();
  332. this.logger.info(String.format("Found %d number of commits", foundCommits.size()));
  333. if (foundCommits.size() == 0) {
  334. return Collections.emptySet();
  335. }
  336. return foundCommits;
  337. }
  338. }