GitParser.java 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2018 Axis Communications AB
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in all
  14. * copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. */
  24. package parser;
  25. import data.Issues;
  26. import diff.JavaFileExtension;
  27. import graph.AnnotationMap;
  28. import graph.FileAnnotationGraph;
  29. import org.eclipse.jgit.api.BlameCommand;
  30. import org.eclipse.jgit.api.errors.GitAPIException;
  31. import org.eclipse.jgit.blame.BlameResult;
  32. import org.eclipse.jgit.diff.DiffEntry;
  33. import org.eclipse.jgit.lib.ObjectId;
  34. import org.eclipse.jgit.lib.ObjectLoader;
  35. import org.eclipse.jgit.lib.ObjectReader;
  36. import org.eclipse.jgit.lib.Repository;
  37. import org.eclipse.jgit.revwalk.RevCommit;
  38. import org.eclipse.jgit.revwalk.RevTree;
  39. import org.eclipse.jgit.revwalk.RevWalk;
  40. import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
  41. import org.eclipse.jgit.treewalk.TreeWalk;
  42. import org.incava.diffj.lang.DiffJException;
  43. import org.json.simple.JSONObject;
  44. import org.json.simple.parser.JSONParser;
  45. import org.json.simple.parser.ParseException;
  46. import org.slf4j.Logger;
  47. import util.CommitUtil;
  48. import util.JSONUtil;
  49. import java.io.File;
  50. import java.io.FileNotFoundException;
  51. import java.io.FileReader;
  52. import java.io.IOException;
  53. import java.nio.charset.StandardCharsets;
  54. import java.util.*;
  55. import java.util.concurrent.atomic.AtomicBoolean;
  56. import java.util.stream.Collectors;
  57. /**
  58. * A class which is capable to search and build line mapping graphs from a local repository. Uses
  59. * JGit to parse the repository and the revision trees.
  60. *
  61. * @author Oscar Svensson
  62. */
  63. public class GitParser {
  64. private static final List<String> USELESS_FILE_EXTENSIONS = Arrays.asList(".md", ".txt", ".markdown");
  65. private CommitUtil util;
  66. private Repository repo;
  67. private RevWalk revWalk;
  68. private Issues issues;
  69. private String resultPath;
  70. private Logger logger;
  71. private int depth;
  72. private BlameCommand blameCommand;
  73. /**
  74. * The constructor for the GitParser class. It requires the repository to exist and will fail if
  75. * its not. The resultPath is also created if it's not existing.
  76. *
  77. * @param path the path to where the local repository can be found.
  78. * @param resultPath the path to where the JSON files will be written.
  79. */
  80. public GitParser(String path, String resultPath, int depth, int customContext)
  81. throws IOException {
  82. FileRepositoryBuilder builder = new FileRepositoryBuilder();
  83. builder.setMustExist(true);
  84. builder.addCeilingDirectory(new File(path));
  85. builder.findGitDir(new File(path));
  86. this.repo = builder.build();
  87. this.revWalk = new RevWalk(repo);
  88. this.blameCommand = new BlameCommand(this.repo);
  89. this.resultPath = resultPath;
  90. /*
  91. * Check if the resultpath exists otherwise create it.
  92. */
  93. if (this.resultPath != null) {
  94. File resDirectory = new File(resultPath);
  95. if (!resDirectory.exists()) resDirectory.mkdirs();
  96. } else {
  97. System.err.println("Resultpath not set! Using deafult directory instead.");
  98. this.resultPath = "./results";
  99. }
  100. this.util = new CommitUtil(this.repo, customContext);
  101. this.depth = depth;
  102. }
  103. public String getResultPath() {
  104. return this.resultPath;
  105. }
  106. public Repository getRepository() {
  107. return this.repo;
  108. }
  109. public Issues getIssues() {
  110. return this.issues;
  111. }
  112. public void useLogger(Logger logger) {
  113. this.logger = logger;
  114. }
  115. private int getSourceLine(BlameResult foundCommit, int index) throws IOException {
  116. foundCommit.computeAll();
  117. try {
  118. return foundCommit.getSourceLine(index);
  119. } catch (ArrayIndexOutOfBoundsException e) {
  120. return -1;
  121. }
  122. }
  123. /**
  124. * Traces a file change that have occured before a given commmit.
  125. *
  126. * @param filePath specifies which file to trace changes on.
  127. * @param source the source commit from which the trace should start at.
  128. */
  129. private FileAnnotationGraph traceFileChanges(String filePath, Commit source, int step)
  130. throws IOException, GitAPIException {
  131. if (step == 0 || !source.diffWithParent.containsKey(filePath)) return null;
  132. /*
  133. * Save all line numbers for the source commits deletions.
  134. */
  135. List<Integer> delIndexes = buildDelIndexes(filePath, source);
  136. FileAnnotationGraph graph = createEmptyGraph(filePath);
  137. graph.revisions.add(ObjectId.toString(source.commit.toObjectId()));
  138. BlameResult found = callBlameCommand(filePath, source.commit.getParent(0));
  139. if (found == null) return graph;
  140. Map<RevCommit, Map<Integer, Integer>> foundRevisions = linkRevisionsWithLineNumbers(delIndexes, found);
  141. populateGraphWithMappings(graph, foundRevisions);
  142. populateSubgraphs(filePath, step, graph, foundRevisions);
  143. return graph;
  144. }
  145. private List<Integer> buildDelIndexes(String filePath, Commit source) {
  146. List<Integer> delIndexes = source
  147. .diffWithParent
  148. .get(filePath)
  149. .deletions
  150. .stream()
  151. .map(s -> parseInt(s[0]))
  152. .collect(Collectors.toList());
  153. if(filePath.endsWith(".java")) {
  154. Set<Integer> changesFromDiffJ = changesFromDiffJ(filePath, source);
  155. delIndexes = delIndexes.stream().filter(changesFromDiffJ::contains).collect(Collectors.toList());
  156. }
  157. return delIndexes;
  158. }
  159. private Set<Integer> changesFromDiffJ(String filePath, Commit source) {
  160. try {
  161. JavaFileExtension revision = getFileContentAtRevision(filePath, source.commit);
  162. JavaFileExtension parentRev = getFileContentAtRevision(filePath, source.commit.getParent(0));
  163. // Converting line numbers to indexes.
  164. return revision.affectedLineNumbers(parentRev).stream().map(it ->
  165. it-1
  166. ).collect(Collectors.toSet());
  167. } catch (Exception e) {
  168. e.printStackTrace();
  169. return Collections.emptySet();
  170. }
  171. }
  172. private JavaFileExtension getFileContentAtRevision(String filePath, RevCommit revision) throws IOException, DiffJException {
  173. RevTree tree = revWalk.parseCommit(revision.getId()).getTree();
  174. TreeWalk treeWalk = TreeWalk.forPath(repo, filePath, tree);
  175. ObjectId blobId = treeWalk.getObjectId(0);
  176. ObjectReader objectReader = repo.newObjectReader();
  177. ObjectLoader objectLoader = objectReader.open(blobId);
  178. byte[] bytes = objectLoader.getBytes();
  179. return new JavaFileExtension(new String(bytes, StandardCharsets.UTF_8));
  180. }
  181. /*
  182. * Start building subgraphs.
  183. */
  184. private void populateSubgraphs(String filePath, int step, FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) throws IOException, GitAPIException {
  185. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  186. Commit subCommit = this.util.getCommitDiffingLines(rev.getKey());
  187. FileAnnotationGraph subGraph = traceFileChanges(filePath, subCommit, step - 1);
  188. if (subGraph == null) break;
  189. graph.sub_graphs.put(subCommit.getHashString(), subGraph);
  190. }
  191. }
  192. /*
  193. * Save all mappings in the annotationgraph.
  194. */
  195. private void populateGraphWithMappings(FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) {
  196. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  197. String revSha = ObjectId.toString(rev.getKey().toObjectId());
  198. if (!graph.mappings.containsKey(revSha)) {
  199. graph.revisions.add(revSha);
  200. graph.mappings.put(revSha, rev.getValue());
  201. } else {
  202. Map<Integer, Integer> linemapping = graph.mappings.get(revSha);
  203. // Add missing mappings.
  204. for (Map.Entry<Integer, Integer> entry : rev.getValue().entrySet()) {
  205. if (!linemapping.containsKey(entry.getKey())) {
  206. linemapping.put(entry.getKey(), entry.getValue());
  207. }
  208. }
  209. }
  210. }
  211. }
  212. /*
  213. * Grab the blamed commits and get the line numbers.
  214. */
  215. private Map<RevCommit, Map<Integer, Integer>> linkRevisionsWithLineNumbers(List<Integer> delIndexes, BlameResult found) {
  216. int index;
  217. Map<RevCommit, Map<Integer, Integer>> foundRevisions = new HashMap<>();
  218. for (Integer delIndex : delIndexes) {
  219. index = delIndex;
  220. if (index == -1) continue;
  221. try {
  222. RevCommit foundRev = found.getSourceCommit(index);
  223. if (!foundRevisions.containsKey(foundRev)) {
  224. Map<Integer, Integer> blamedLines = new LinkedHashMap<>();
  225. blamedLines.put(index, getSourceLine(found, index));
  226. foundRevisions.put(foundRev, blamedLines);
  227. } else {
  228. foundRevisions.get(foundRev).put(index, getSourceLine(found, index));
  229. }
  230. } catch (Exception e) {
  231. // This means that a row didn't exist in a previous revision..
  232. }
  233. }
  234. return foundRevisions;
  235. }
  236. private BlameResult callBlameCommand(String filePath, RevCommit startCommit) throws GitAPIException {
  237. blameCommand.setStartCommit(startCommit);
  238. blameCommand.setFilePath(filePath);
  239. return blameCommand.call();
  240. }
  241. /*
  242. * Create a graph to store line mappings in.
  243. */
  244. private FileAnnotationGraph createEmptyGraph(String filePath) {
  245. FileAnnotationGraph graph = new FileAnnotationGraph();
  246. graph.filePath = filePath;
  247. graph.revisions = new LinkedList<>();
  248. graph.mappings = new HashMap<>();
  249. graph.sub_graphs = new HashMap<>();
  250. return graph;
  251. }
  252. /**
  253. * With each revision, check all files and build their line mapping graphs for each changed line.
  254. *
  255. * @param commits list of commits that should be traced.
  256. * @return the map containing annotation graphs for each file change by a commit.
  257. */
  258. private AnnotationMap<String, List<FileAnnotationGraph>> buildLineMappingGraph(
  259. List<Commit> commits) throws IOException, GitAPIException {
  260. AnnotationMap<String, List<FileAnnotationGraph>> fileGraph = new AnnotationMap<>();
  261. for (Commit commit : commits) {
  262. List<FileAnnotationGraph> graphs = new LinkedList<>();
  263. for (Map.Entry<String, DiffEntry.ChangeType> file : commit.changeTypes.entrySet()) {
  264. String filePath = file.getKey();
  265. if (checkFileType(filePath)) {
  266. FileAnnotationGraph tracedCommits = traceFileChanges(filePath, commit, this.depth);
  267. graphs.add(tracedCommits);
  268. }
  269. }
  270. fileGraph.put(commit.getHashString(), graphs);
  271. }
  272. return fileGraph;
  273. }
  274. private boolean checkFileType(String filePath) {
  275. return !filePath.contains("/test/") && checkFileExtension(filePath);
  276. }
  277. private boolean checkFileExtension(String filePath) {
  278. AtomicBoolean validExtension = new AtomicBoolean(true);
  279. USELESS_FILE_EXTENSIONS.forEach(it -> {
  280. validExtension.set(!filePath.endsWith(it));
  281. });
  282. return validExtension.get();
  283. }
  284. /**
  285. * Wrapper method to catch a faulty value.
  286. *
  287. * @param value the string to convert to an int.
  288. * @return the value of the string as an int.
  289. */
  290. private int parseInt(String value) {
  291. try {
  292. return Integer.parseInt(value);
  293. } catch (Exception e) {
  294. return -1;
  295. }
  296. }
  297. /**
  298. * Searchs for commits that have certain keywords in their messages, indicating that they have
  299. * fiexd bugs.
  300. *
  301. * <p>It then saves the found commits and the line mapping graph to two JSON files.
  302. *
  303. * @param commits a set containing references to commits.
  304. */
  305. public AnnotationMap<String, List<FileAnnotationGraph>> annotateCommits(Set<RevCommit> commits)
  306. throws IOException, GitAPIException {
  307. this.logger.info("Parsing difflines for all found commits.");
  308. List<Commit> parsedCommits = this.util.getDiffingLines(commits);
  309. this.logger.info("Saving parsed commits to file");
  310. JSONUtil.saveFoundCommits(parsedCommits, this.resultPath);
  311. this.logger.info("Building line mapping graph.");
  312. AnnotationMap<String, List<FileAnnotationGraph>> mapping = buildLineMappingGraph(parsedCommits);
  313. this.logger.info("Saving results to file");
  314. mapping.saveToJSON(this.resultPath);
  315. return mapping;
  316. }
  317. /**
  318. * Use this method to use already found big fixing changes.
  319. *
  320. * @param path the path to the json file where the changes are stored.
  321. */
  322. public Set<RevCommit> readBugFixCommits(String path) throws IOException {
  323. if (repo == null) return Collections.emptySet();
  324. this.issues = new Issues();
  325. JSONParser commitParser = new JSONParser();
  326. try {
  327. JSONObject object = (JSONObject) commitParser.parse(new FileReader(path));
  328. this.issues.revisions = new HashSet<>();
  329. this.issues.dates = new HashMap<>();
  330. for (Object issue : object.keySet()) {
  331. Map<String, String> issueInfo = (Map<String, String>) object.get(issue);
  332. String rev = issueInfo.get("hash");
  333. RevCommit revCommit = this.repo.parseCommit(this.repo.resolve(rev));
  334. Map<String, String> dates = new HashMap<>();
  335. dates.put("resolutiondate", issueInfo.get("resolutiondate"));
  336. dates.put("commitdate", issueInfo.get("commitdate"));
  337. dates.put("creationdate", issueInfo.get("creationdate"));
  338. this.issues.dates.put(rev, dates);
  339. this.issues.revisions.add(revCommit);
  340. }
  341. } catch (FileNotFoundException | ParseException e) {
  342. return Collections.emptySet();
  343. }
  344. this.logger.info(String.format("Found %d number of commits.", this.issues.revisions.size()));
  345. if (this.issues.revisions.size() == 0) return Collections.emptySet();
  346. return this.issues.revisions;
  347. }
  348. }