symbolize.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
  2. // Copyright (c) 2009, Google Inc.
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // ---
  31. // Author: Craig Silverstein
  32. //
  33. // This forks out to pprof to do the actual symbolizing. We might
  34. // be better off writing our own in C++.
  35. #include "config.h"
  36. #include "symbolize.h"
  37. #include <stdlib.h>
  38. #ifdef HAVE_UNISTD_H
  39. #include <unistd.h> // for write()
  40. #endif
  41. #ifdef HAVE_SYS_SOCKET_H
  42. #include <sys/socket.h> // for socketpair() -- needed by Symbolize
  43. #endif
  44. #ifdef HAVE_SYS_WAIT_H
  45. #include <sys/wait.h> // for wait() -- needed by Symbolize
  46. #endif
  47. #ifdef HAVE_POLL_H
  48. #include <poll.h>
  49. #endif
  50. #ifdef __MACH__
  51. #include <mach-o/dyld.h> // for GetProgramInvocationName()
  52. #include <limits.h> // for PATH_MAX
  53. #endif
  54. #if defined(__CYGWIN__) || defined(__CYGWIN32__)
  55. #include <io.h> // for get_osfhandle()
  56. #endif
  57. #include <string>
  58. #include "base/commandlineflags.h"
  59. #include "base/logging.h"
  60. #ifndef TCMALLOC_SGX
  61. #include "base/sysinfo.h"
  62. #endif
  63. using std::string;
  64. #ifndef TCMALLOC_SGX
  65. using tcmalloc::DumpProcSelfMaps; // from sysinfo.h
  66. #endif
  67. DEFINE_string(symbolize_pprof,
  68. EnvToString("PPROF_PATH", "pprof"),
  69. "Path to pprof to call for reporting function names.");
  70. // heap_profile_table_pprof may be referenced after destructors are
  71. // called (since that's when leak-checking is done), so we make
  72. // a more-permanent copy that won't ever get destroyed.
  73. static string* g_pprof_path = new string(FLAGS_symbolize_pprof);
  74. #ifndef TCMALLOC_SGX /*IN SGX HAVE_SYS_SOCKET_H is disabled*/
  75. // Returns NULL if we're on an OS where we can't get the invocation name.
  76. // Using a static var is ok because we're not called from a thread.
  77. static const char* GetProgramInvocationName() {
  78. #if defined(HAVE_PROGRAM_INVOCATION_NAME)
  79. #ifdef __UCLIBC__
  80. extern const char* program_invocation_name; // uclibc provides this
  81. #else
  82. extern char* program_invocation_name; // gcc provides this
  83. #endif
  84. return program_invocation_name;
  85. #elif defined(__MACH__)
  86. // We don't want to allocate memory for this since we may be
  87. // calculating it when memory is corrupted.
  88. static char program_invocation_name[PATH_MAX];
  89. if (program_invocation_name[0] == '\0') { // first time calculating
  90. uint32_t length = sizeof(program_invocation_name);
  91. if (_NSGetExecutablePath(program_invocation_name, &length))
  92. return NULL;
  93. }
  94. return program_invocation_name;
  95. #else
  96. return NULL; // figure out a way to get argv[0]
  97. #endif
  98. }
  99. #endif
  100. // Prints an error message when you can't run Symbolize().
  101. static void PrintError(const char* reason) {
  102. RAW_LOG(ERROR,
  103. "*** WARNING: Cannot convert addresses to symbols in output below.\n"
  104. "*** Reason: %s\n"
  105. "*** If you cannot fix this, try running pprof directly.\n",
  106. reason);
  107. }
  108. void SymbolTable::Add(const void* addr) {
  109. symbolization_table_[addr] = "";
  110. }
  111. const char* SymbolTable::GetSymbol(const void* addr) {
  112. return symbolization_table_[addr];
  113. }
  114. // Updates symbolization_table with the pointers to symbol names corresponding
  115. // to its keys. The symbol names are stored in out, which is allocated and
  116. // freed by the caller of this routine.
  117. // Note that the forking/etc is not thread-safe or re-entrant. That's
  118. // ok for the purpose we need -- reporting leaks detected by heap-checker
  119. // -- but be careful if you decide to use this routine for other purposes.
  120. // Returns number of symbols read on error. If can't symbolize, returns 0
  121. // and emits an error message about why.
  122. int SymbolTable::Symbolize() {
  123. #if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
  124. PrintError("Perftools does not know how to call a sub-process on this O/S");
  125. return 0;
  126. #else
  127. const char* argv0 = GetProgramInvocationName();
  128. if (argv0 == NULL) { // can't call symbolize if we can't figure out our name
  129. PrintError("Cannot figure out the name of this executable (argv0)");
  130. return 0;
  131. }
  132. if (access(g_pprof_path->c_str(), R_OK) != 0) {
  133. PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
  134. return 0;
  135. }
  136. // All this work is to do two-way communication. ugh.
  137. int *child_in = NULL; // file descriptors
  138. int *child_out = NULL; // for now, we don't worry about child_err
  139. int child_fds[5][2]; // socketpair may be called up to five times below
  140. // The client program may close its stdin and/or stdout and/or stderr
  141. // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
  142. // In this case the communication between the forked processes may be broken
  143. // if either the parent or the child tries to close or duplicate these
  144. // descriptors. The loop below produces two pairs of file descriptors, each
  145. // greater than 2 (stderr).
  146. for (int i = 0; i < 5; i++) {
  147. if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
  148. for (int j = 0; j < i; j++) {
  149. close(child_fds[j][0]);
  150. close(child_fds[j][1]);
  151. PrintError("Cannot create a socket pair");
  152. }
  153. return 0;
  154. } else {
  155. if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
  156. if (child_in == NULL) {
  157. child_in = child_fds[i];
  158. } else {
  159. child_out = child_fds[i];
  160. for (int j = 0; j < i; j++) {
  161. if (child_fds[j] == child_in) continue;
  162. close(child_fds[j][0]);
  163. close(child_fds[j][1]);
  164. }
  165. break;
  166. }
  167. }
  168. }
  169. }
  170. switch (fork()) {
  171. case -1: { // error
  172. close(child_in[0]);
  173. close(child_in[1]);
  174. close(child_out[0]);
  175. close(child_out[1]);
  176. PrintError("Unknown error calling fork()");
  177. return 0;
  178. }
  179. case 0: { // child
  180. close(child_in[1]); // child uses the 0's, parent uses the 1's
  181. close(child_out[1]); // child uses the 0's, parent uses the 1's
  182. close(0);
  183. close(1);
  184. if (dup2(child_in[0], 0) == -1) _exit(1);
  185. if (dup2(child_out[0], 1) == -1) _exit(2);
  186. // Unset vars that might cause trouble when we fork
  187. unsetenv("CPUPROFILE");
  188. unsetenv("HEAPPROFILE");
  189. unsetenv("HEAPCHECK");
  190. unsetenv("PERFTOOLS_VERBOSE");
  191. execlp(g_pprof_path->c_str(), g_pprof_path->c_str(),
  192. "--symbols", argv0, NULL);
  193. _exit(3); // if execvp fails, it's bad news for us
  194. }
  195. default: { // parent
  196. close(child_in[0]); // child uses the 0's, parent uses the 1's
  197. close(child_out[0]); // child uses the 0's, parent uses the 1's
  198. #ifdef HAVE_POLL_H
  199. // Waiting for 1ms seems to give the OS time to notice any errors.
  200. poll(0, 0, 1);
  201. // For maximum safety, we check to make sure the execlp
  202. // succeeded before trying to write. (Otherwise we'll get a
  203. // SIGPIPE.) For systems without poll.h, we'll just skip this
  204. // check, and trust that the user set PPROF_PATH correctly!
  205. struct pollfd pfd = { child_in[1], POLLOUT, 0 };
  206. if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
  207. (pfd.revents & (POLLHUP|POLLERR))) {
  208. PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
  209. return 0;
  210. }
  211. #endif
  212. #if defined(__CYGWIN__) || defined(__CYGWIN32__)
  213. // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert.
  214. const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
  215. DumpProcSelfMaps(symbols_handle);
  216. #else
  217. DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin
  218. #endif
  219. // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
  220. // address to feed to pprof.
  221. const int kOutBufSize = 24 * symbolization_table_.size();
  222. char *pprof_buffer = new char[kOutBufSize];
  223. int written = 0;
  224. for (SymbolMap::const_iterator iter = symbolization_table_.begin();
  225. iter != symbolization_table_.end(); ++iter) {
  226. written += snprintf(pprof_buffer + written, kOutBufSize - written,
  227. // pprof expects format to be 0xXXXXXX
  228. "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
  229. }
  230. write(child_in[1], pprof_buffer, strlen(pprof_buffer));
  231. close(child_in[1]); // that's all we need to write
  232. const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
  233. int total_bytes_read = 0;
  234. delete[] symbol_buffer_;
  235. symbol_buffer_ = new char[kSymbolBufferSize];
  236. memset(symbol_buffer_, '\0', kSymbolBufferSize);
  237. while (1) {
  238. int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
  239. kSymbolBufferSize - total_bytes_read);
  240. if (bytes_read < 0) {
  241. close(child_out[1]);
  242. PrintError("Cannot read data from pprof");
  243. return 0;
  244. } else if (bytes_read == 0) {
  245. close(child_out[1]);
  246. wait(NULL);
  247. break;
  248. } else {
  249. total_bytes_read += bytes_read;
  250. }
  251. }
  252. // We have successfully read the output of pprof into out. Make sure
  253. // the last symbol is full (we can tell because it ends with a \n).
  254. if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
  255. return 0;
  256. // make the symbolization_table_ values point to the output vector
  257. SymbolMap::iterator fill = symbolization_table_.begin();
  258. int num_symbols = 0;
  259. const char *current_name = symbol_buffer_;
  260. for (int i = 0; i < total_bytes_read; i++) {
  261. if (symbol_buffer_[i] == '\n') {
  262. fill->second = current_name;
  263. symbol_buffer_[i] = '\0';
  264. current_name = symbol_buffer_ + i + 1;
  265. fill++;
  266. num_symbols++;
  267. }
  268. }
  269. return num_symbols;
  270. }
  271. }
  272. PrintError("Unkown error (should never occur!)");
  273. return 0; // shouldn't be reachable
  274. #endif
  275. }