GitParser.java 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2018 Axis Communications AB
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in all
  14. * copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. */
  24. package parser;
  25. import data.Issues;
  26. import diff.JavaFileExtension;
  27. import graph.AnnotationMap;
  28. import graph.FileAnnotationGraph;
  29. import org.eclipse.jgit.api.BlameCommand;
  30. import org.eclipse.jgit.api.errors.GitAPIException;
  31. import org.eclipse.jgit.blame.BlameResult;
  32. import org.eclipse.jgit.diff.DiffEntry;
  33. import org.eclipse.jgit.lib.ObjectId;
  34. import org.eclipse.jgit.lib.ObjectLoader;
  35. import org.eclipse.jgit.lib.ObjectReader;
  36. import org.eclipse.jgit.lib.Repository;
  37. import org.eclipse.jgit.revwalk.RevCommit;
  38. import org.eclipse.jgit.revwalk.RevTree;
  39. import org.eclipse.jgit.revwalk.RevWalk;
  40. import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
  41. import org.eclipse.jgit.treewalk.TreeWalk;
  42. import org.incava.diffj.lang.DiffJException;
  43. import org.json.simple.JSONObject;
  44. import org.json.simple.parser.JSONParser;
  45. import org.json.simple.parser.ParseException;
  46. import org.slf4j.Logger;
  47. import util.CommitUtil;
  48. import util.JSONUtil;
  49. import java.io.File;
  50. import java.io.FileNotFoundException;
  51. import java.io.FileReader;
  52. import java.io.IOException;
  53. import java.nio.charset.StandardCharsets;
  54. import java.util.*;
  55. import java.util.concurrent.atomic.AtomicBoolean;
  56. import java.util.stream.Collectors;
  57. /**
  58. * A class which is capable to search and build line mapping graphs from a local repository. Uses
  59. * JGit to parse the repository and the revision trees.
  60. *
  61. * @author Oscar Svensson
  62. */
  63. public class GitParser {
  64. private static final List<String> USELESS_FILE_EXTENSIONS = Arrays.asList(".md", ".txt", ".markdown");
  65. private CommitUtil util;
  66. private Repository repo;
  67. private RevWalk revWalk;
  68. private Issues issues;
  69. private String resultPath;
  70. private Logger logger;
  71. private int depth;
  72. private BlameCommand blameCommand;
  73. /**
  74. * The constructor for the GitParser class. It requires the repository to exist and will fail if
  75. * its not. The resultPath is also created if it's not existing.
  76. *
  77. * @param path the path to where the local repository can be found.
  78. * @param resultPath the path to where the JSON files will be written.
  79. */
  80. public GitParser(String path, String resultPath, int depth, int customContext)
  81. throws IOException {
  82. FileRepositoryBuilder builder = new FileRepositoryBuilder();
  83. builder.setMustExist(true);
  84. builder.addCeilingDirectory(new File(path));
  85. builder.findGitDir(new File(path));
  86. this.repo = builder.build();
  87. this.revWalk = new RevWalk(repo);
  88. this.blameCommand = new BlameCommand(this.repo);
  89. this.resultPath = resultPath;
  90. /*
  91. * Check if the resultpath exists otherwise create it.
  92. */
  93. if (this.resultPath != null) {
  94. File resDirectory = new File(resultPath);
  95. if (!resDirectory.exists()) resDirectory.mkdirs();
  96. } else {
  97. System.err.println("Resultpath not set! Using deafult directory instead.");
  98. this.resultPath = "./results";
  99. }
  100. this.util = new CommitUtil(this.repo, customContext);
  101. this.depth = depth;
  102. }
  103. public String getResultPath() {
  104. return this.resultPath;
  105. }
  106. public Repository getRepository() {
  107. return this.repo;
  108. }
  109. public Issues getIssues() {
  110. return this.issues;
  111. }
  112. public void useLogger(Logger logger) {
  113. this.logger = logger;
  114. }
  115. private int getSourceLine(BlameResult foundCommit, int index) throws IOException {
  116. foundCommit.computeAll();
  117. try {
  118. return foundCommit.getSourceLine(index);
  119. } catch (ArrayIndexOutOfBoundsException e) {
  120. return -1;
  121. }
  122. }
  123. /**
  124. * Traces a file change that have occured before a given commmit.
  125. *
  126. * @param filePath specifies which file to trace changes on.
  127. * @param source the source commit from which the trace should start at.
  128. */
  129. private FileAnnotationGraph traceFileChanges(String filePath, Commit source, int step)
  130. throws IOException, GitAPIException {
  131. if (step == 0 || !source.diffWithParent.containsKey(filePath)) return null;
  132. /*
  133. * Save all line numbers for the source commits deletions.
  134. */
  135. List<Integer> delIndexes = buildDelIndexes(filePath, source);
  136. FileAnnotationGraph graph = createEmptyGraph(filePath);
  137. graph.revisions.add(ObjectId.toString(source.commit.toObjectId()));
  138. BlameResult found = callBlameCommand(filePath, source.commit.getParent(0));
  139. if (found == null) return graph;
  140. Map<RevCommit, Map<Integer, Integer>> foundRevisions = linkRevisionsWithLineNumbers(delIndexes, found);
  141. populateGraphWithMappings(graph, foundRevisions);
  142. populateSubgraphs(filePath, step, graph, foundRevisions);
  143. return graph;
  144. }
  145. private List<Integer> buildDelIndexes(String filePath, Commit source) {
  146. List<Integer> delIndexes = source
  147. .diffWithParent
  148. .get(filePath)
  149. .deletions
  150. .stream()
  151. .map(s -> parseInt(s[0]))
  152. .collect(Collectors.toList());
  153. if(filePath.endsWith(".java")) {
  154. Set<Integer> changesFromDiffJ = changesFromDiffJ(filePath, source);
  155. delIndexes = delIndexes.stream().filter(changesFromDiffJ::contains).collect(Collectors.toList());
  156. }
  157. return delIndexes;
  158. }
  159. private Set<Integer> changesFromDiffJ(String filePath, Commit source) {
  160. try {
  161. JavaFileExtension revision = getFileContentAtRevision(filePath, source.commit);
  162. JavaFileExtension parentRev = getFileContentAtRevision(filePath, source.commit.getParent(0));
  163. if(revision == null || parentRev == null) {
  164. return Collections.emptySet();
  165. }
  166. // Converting line numbers to indexes.
  167. return revision.affectedLineNumbers(parentRev).stream().map(it ->
  168. it-1
  169. ).collect(Collectors.toSet());
  170. } catch (Exception e) {
  171. logger.warn(String.format("Exception ### File %s from: %s to: %s", filePath, source.commit.toString(), source.commit.getParent(0).toString()));
  172. return Collections.emptySet();
  173. }
  174. }
  175. private JavaFileExtension getFileContentAtRevision(String filePath, RevCommit revision) throws IOException, DiffJException {
  176. RevTree tree = revWalk.parseCommit(revision.getId()).getTree();
  177. TreeWalk treeWalk = TreeWalk.forPath(repo, filePath, tree);
  178. if(treeWalk == null) {
  179. return null;
  180. }
  181. ObjectId blobId = treeWalk.getObjectId(0);
  182. ObjectReader objectReader = repo.newObjectReader();
  183. ObjectLoader objectLoader = objectReader.open(blobId);
  184. byte[] bytes = objectLoader.getBytes();
  185. return new JavaFileExtension(new String(bytes, StandardCharsets.UTF_8));
  186. }
  187. /*
  188. * Start building subgraphs.
  189. */
  190. private void populateSubgraphs(String filePath, int step, FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) throws IOException, GitAPIException {
  191. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  192. Commit subCommit = this.util.getCommitDiffingLines(rev.getKey());
  193. FileAnnotationGraph subGraph = traceFileChanges(filePath, subCommit, step - 1);
  194. if (subGraph == null) break;
  195. graph.sub_graphs.put(subCommit.getHashString(), subGraph);
  196. }
  197. }
  198. /*
  199. * Save all mappings in the annotationgraph.
  200. */
  201. private void populateGraphWithMappings(FileAnnotationGraph graph, Map<RevCommit, Map<Integer, Integer>> foundRevisions) {
  202. for (Map.Entry<RevCommit, Map<Integer, Integer>> rev : foundRevisions.entrySet()) {
  203. String revSha = ObjectId.toString(rev.getKey().toObjectId());
  204. if (!graph.mappings.containsKey(revSha)) {
  205. graph.revisions.add(revSha);
  206. graph.mappings.put(revSha, rev.getValue());
  207. } else {
  208. Map<Integer, Integer> linemapping = graph.mappings.get(revSha);
  209. // Add missing mappings.
  210. for (Map.Entry<Integer, Integer> entry : rev.getValue().entrySet()) {
  211. if (!linemapping.containsKey(entry.getKey())) {
  212. linemapping.put(entry.getKey(), entry.getValue());
  213. }
  214. }
  215. }
  216. }
  217. }
  218. /*
  219. * Grab the blamed commits and get the line numbers.
  220. */
  221. private Map<RevCommit, Map<Integer, Integer>> linkRevisionsWithLineNumbers(List<Integer> delIndexes, BlameResult found) {
  222. int index;
  223. Map<RevCommit, Map<Integer, Integer>> foundRevisions = new HashMap<>();
  224. for (Integer delIndex : delIndexes) {
  225. index = delIndex;
  226. if (index == -1) continue;
  227. try {
  228. RevCommit foundRev = found.getSourceCommit(index);
  229. if (!foundRevisions.containsKey(foundRev)) {
  230. Map<Integer, Integer> blamedLines = new LinkedHashMap<>();
  231. blamedLines.put(index, getSourceLine(found, index));
  232. foundRevisions.put(foundRev, blamedLines);
  233. } else {
  234. foundRevisions.get(foundRev).put(index, getSourceLine(found, index));
  235. }
  236. } catch (Exception e) {
  237. // This means that a row didn't exist in a previous revision..
  238. }
  239. }
  240. return foundRevisions;
  241. }
  242. private BlameResult callBlameCommand(String filePath, RevCommit startCommit) throws GitAPIException {
  243. blameCommand.setStartCommit(startCommit);
  244. blameCommand.setFilePath(filePath);
  245. return blameCommand.call();
  246. }
  247. /*
  248. * Create a graph to store line mappings in.
  249. */
  250. private FileAnnotationGraph createEmptyGraph(String filePath) {
  251. FileAnnotationGraph graph = new FileAnnotationGraph();
  252. graph.filePath = filePath;
  253. graph.revisions = new LinkedList<>();
  254. graph.mappings = new HashMap<>();
  255. graph.sub_graphs = new HashMap<>();
  256. return graph;
  257. }
  258. /**
  259. * With each revision, check all files and build their line mapping graphs for each changed line.
  260. *
  261. * @param commits list of commits that should be traced.
  262. * @return the map containing annotation graphs for each file change by a commit.
  263. */
  264. private AnnotationMap<String, List<FileAnnotationGraph>> buildLineMappingGraph(
  265. List<Commit> commits) throws IOException, GitAPIException {
  266. AnnotationMap<String, List<FileAnnotationGraph>> fileGraph = new AnnotationMap<>();
  267. for (Commit commit : commits) {
  268. List<FileAnnotationGraph> graphs = new LinkedList<>();
  269. for (Map.Entry<String, DiffEntry.ChangeType> file : commit.changeTypes.entrySet()) {
  270. String filePath = file.getKey();
  271. if (checkFileType(filePath)) {
  272. FileAnnotationGraph tracedCommits = traceFileChanges(filePath, commit, this.depth);
  273. graphs.add(tracedCommits);
  274. }
  275. }
  276. fileGraph.put(commit.getHashString(), graphs);
  277. }
  278. return fileGraph;
  279. }
  280. private boolean checkFileType(String filePath) {
  281. return !filePath.contains("/test/") && checkFileExtension(filePath);
  282. }
  283. private boolean checkFileExtension(String filePath) {
  284. AtomicBoolean validExtension = new AtomicBoolean(true);
  285. USELESS_FILE_EXTENSIONS.forEach(it -> {
  286. validExtension.set(!filePath.endsWith(it));
  287. });
  288. return validExtension.get();
  289. }
  290. /**
  291. * Wrapper method to catch a faulty value.
  292. *
  293. * @param value the string to convert to an int.
  294. * @return the value of the string as an int.
  295. */
  296. private int parseInt(String value) {
  297. try {
  298. return Integer.parseInt(value);
  299. } catch (Exception e) {
  300. return -1;
  301. }
  302. }
  303. /**
  304. * Searchs for commits that have certain keywords in their messages, indicating that they have
  305. * fiexd bugs.
  306. *
  307. * <p>It then saves the found commits and the line mapping graph to two JSON files.
  308. *
  309. * @param commits a set containing references to commits.
  310. */
  311. public AnnotationMap<String, List<FileAnnotationGraph>> annotateCommits(Set<RevCommit> commits)
  312. throws IOException, GitAPIException {
  313. this.logger.info("Parsing difflines for all found commits.");
  314. List<Commit> parsedCommits = this.util.getDiffingLines(commits);
  315. this.logger.info("Saving parsed commits to file");
  316. JSONUtil.saveFoundCommits(parsedCommits, this.resultPath);
  317. this.logger.info("Building line mapping graph.");
  318. AnnotationMap<String, List<FileAnnotationGraph>> mapping = buildLineMappingGraph(parsedCommits);
  319. this.logger.info("Saving results to file");
  320. mapping.saveToJSON(this.resultPath);
  321. return mapping;
  322. }
  323. /**
  324. * Use this method to use already found big fixing changes.
  325. *
  326. * @param path the path to the json file where the changes are stored.
  327. */
  328. public Set<RevCommit> readBugFixCommits(String path) throws IOException {
  329. if (repo == null) return Collections.emptySet();
  330. this.issues = new Issues();
  331. JSONParser commitParser = new JSONParser();
  332. try {
  333. JSONObject object = (JSONObject) commitParser.parse(new FileReader(path));
  334. this.issues.revisions = new HashSet<>();
  335. this.issues.dates = new HashMap<>();
  336. for (Object issue : object.keySet()) {
  337. Map<String, String> issueInfo = (Map<String, String>) object.get(issue);
  338. String rev = issueInfo.get("hash");
  339. RevCommit revCommit = this.repo.parseCommit(this.repo.resolve(rev));
  340. Map<String, String> dates = new HashMap<>();
  341. dates.put("resolutiondate", issueInfo.get("resolutiondate"));
  342. dates.put("commitdate", issueInfo.get("commitdate"));
  343. dates.put("creationdate", issueInfo.get("creationdate"));
  344. this.issues.dates.put(rev, dates);
  345. this.issues.revisions.add(revCommit);
  346. }
  347. } catch (FileNotFoundException | ParseException e) {
  348. return Collections.emptySet();
  349. }
  350. this.logger.info(String.format("Found %d number of commits.", this.issues.revisions.size()));
  351. if (this.issues.revisions.size() == 0) return Collections.emptySet();
  352. return this.issues.revisions;
  353. }
  354. }