scanf.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /* Copyright (c) 2003-2004, Roger Dingledine
  2. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  3. * Copyright (c) 2007-2018, The Tor Project, Inc. */
  4. /* See LICENSE for licensing information */
  5. /**
  6. * \file scanf.c
  7. * \brief Locale-independent minimal implementation of sscanf().
  8. **/
  9. #include "lib/string/scanf.h"
  10. #include "lib/string/compat_ctype.h"
  11. #include "lib/cc/torint.h"
  12. #include "lib/err/torerr.h"
  13. #include <stdlib.h>
  14. #define MAX_SCANF_WIDTH 9999
  15. /** Helper: given an ASCII-encoded decimal digit, return its numeric value.
  16. * NOTE: requires that its input be in-bounds. */
  17. static int
  18. digit_to_num(char d)
  19. {
  20. int num = ((int)d) - (int)'0';
  21. raw_assert(num <= 9 && num >= 0);
  22. return num;
  23. }
  24. /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
  25. * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
  26. * success, store the result in <b>out</b>, advance bufp to the next
  27. * character, and return 0. On failure, return -1. */
  28. static int
  29. scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
  30. {
  31. unsigned long result = 0;
  32. int scanned_so_far = 0;
  33. const int hex = base==16;
  34. raw_assert(base == 10 || base == 16);
  35. if (!bufp || !*bufp || !out)
  36. return -1;
  37. if (width<0)
  38. width=MAX_SCANF_WIDTH;
  39. while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
  40. && scanned_so_far < width) {
  41. unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
  42. // Check for overflow beforehand, without actually causing any overflow
  43. // This preserves functionality on compilers that don't wrap overflow
  44. // (i.e. that trap or optimise away overflow)
  45. // result * base + digit > ULONG_MAX
  46. // result * base > ULONG_MAX - digit
  47. if (result > (ULONG_MAX - digit)/base)
  48. return -1; /* Processing this digit would overflow */
  49. result = result * base + digit;
  50. ++scanned_so_far;
  51. }
  52. if (!scanned_so_far) /* No actual digits scanned */
  53. return -1;
  54. *out = result;
  55. return 0;
  56. }
  57. /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
  58. * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
  59. * success, store the result in <b>out</b>, advance bufp to the next
  60. * character, and return 0. On failure, return -1. */
  61. static int
  62. scan_signed(const char **bufp, long *out, int width)
  63. {
  64. int neg = 0;
  65. unsigned long result = 0;
  66. if (!bufp || !*bufp || !out)
  67. return -1;
  68. if (width<0)
  69. width=MAX_SCANF_WIDTH;
  70. if (**bufp == '-') {
  71. neg = 1;
  72. ++*bufp;
  73. --width;
  74. }
  75. if (scan_unsigned(bufp, &result, width, 10) < 0)
  76. return -1;
  77. if (neg && result > 0) {
  78. if (result > ((unsigned long)LONG_MAX) + 1)
  79. return -1; /* Underflow */
  80. else if (result == ((unsigned long)LONG_MAX) + 1)
  81. *out = LONG_MIN;
  82. else {
  83. /* We once had a far more clever no-overflow conversion here, but
  84. * some versions of GCC apparently ran it into the ground. Now
  85. * we just check for LONG_MIN explicitly.
  86. */
  87. *out = -(long)result;
  88. }
  89. } else {
  90. if (result > LONG_MAX)
  91. return -1; /* Overflow */
  92. *out = (long)result;
  93. }
  94. return 0;
  95. }
  96. /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
  97. * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less
  98. * than 0.) On success, store the result in <b>out</b>, advance bufp to the
  99. * next character, and return 0. On failure, return -1. */
  100. static int
  101. scan_double(const char **bufp, double *out, int width)
  102. {
  103. int neg = 0;
  104. double result = 0;
  105. int scanned_so_far = 0;
  106. if (!bufp || !*bufp || !out)
  107. return -1;
  108. if (width<0)
  109. width=MAX_SCANF_WIDTH;
  110. if (**bufp == '-') {
  111. neg = 1;
  112. ++*bufp;
  113. }
  114. while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
  115. const int digit = digit_to_num(*(*bufp)++);
  116. result = result * 10 + digit;
  117. ++scanned_so_far;
  118. }
  119. if (**bufp == '.') {
  120. double fracval = 0, denominator = 1;
  121. ++*bufp;
  122. ++scanned_so_far;
  123. while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
  124. const int digit = digit_to_num(*(*bufp)++);
  125. fracval = fracval * 10 + digit;
  126. denominator *= 10;
  127. ++scanned_so_far;
  128. }
  129. result += fracval / denominator;
  130. }
  131. if (!scanned_so_far) /* No actual digits scanned */
  132. return -1;
  133. *out = neg ? -result : result;
  134. return 0;
  135. }
  136. /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
  137. * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
  138. * to the next non-space character or the EOS. */
  139. static int
  140. scan_string(const char **bufp, char *out, int width)
  141. {
  142. int scanned_so_far = 0;
  143. if (!bufp || !out || width < 0)
  144. return -1;
  145. while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
  146. *out++ = *(*bufp)++;
  147. ++scanned_so_far;
  148. }
  149. *out = '\0';
  150. return 0;
  151. }
  152. /** Locale-independent, minimal, no-surprises scanf variant, accepting only a
  153. * restricted pattern format. For more info on what it supports, see
  154. * tor_sscanf() documentation. */
  155. int
  156. tor_vsscanf(const char *buf, const char *pattern, va_list ap)
  157. {
  158. int n_matched = 0;
  159. while (*pattern) {
  160. if (*pattern != '%') {
  161. if (*buf == *pattern) {
  162. ++buf;
  163. ++pattern;
  164. continue;
  165. } else {
  166. return n_matched;
  167. }
  168. } else {
  169. int width = -1;
  170. int longmod = 0;
  171. ++pattern;
  172. if (TOR_ISDIGIT(*pattern)) {
  173. width = digit_to_num(*pattern++);
  174. while (TOR_ISDIGIT(*pattern)) {
  175. width *= 10;
  176. width += digit_to_num(*pattern++);
  177. if (width > MAX_SCANF_WIDTH)
  178. return -1;
  179. }
  180. if (!width) /* No zero-width things. */
  181. return -1;
  182. }
  183. if (*pattern == 'l') {
  184. longmod = 1;
  185. ++pattern;
  186. }
  187. if (*pattern == 'u' || *pattern == 'x') {
  188. unsigned long u;
  189. const int base = (*pattern == 'u') ? 10 : 16;
  190. if (!*buf)
  191. return n_matched;
  192. if (scan_unsigned(&buf, &u, width, base)<0)
  193. return n_matched;
  194. if (longmod) {
  195. unsigned long *out = va_arg(ap, unsigned long *);
  196. *out = u;
  197. } else {
  198. unsigned *out = va_arg(ap, unsigned *);
  199. if (u > UINT_MAX)
  200. return n_matched;
  201. *out = (unsigned) u;
  202. }
  203. ++pattern;
  204. ++n_matched;
  205. } else if (*pattern == 'f') {
  206. double *d = va_arg(ap, double *);
  207. if (!longmod)
  208. return -1; /* float not supported */
  209. if (!*buf)
  210. return n_matched;
  211. if (scan_double(&buf, d, width)<0)
  212. return n_matched;
  213. ++pattern;
  214. ++n_matched;
  215. } else if (*pattern == 'd') {
  216. long lng=0;
  217. if (scan_signed(&buf, &lng, width)<0)
  218. return n_matched;
  219. if (longmod) {
  220. long *out = va_arg(ap, long *);
  221. *out = lng;
  222. } else {
  223. int *out = va_arg(ap, int *);
  224. #if LONG_MAX > INT_MAX
  225. if (lng < INT_MIN || lng > INT_MAX)
  226. return n_matched;
  227. #endif
  228. *out = (int)lng;
  229. }
  230. ++pattern;
  231. ++n_matched;
  232. } else if (*pattern == 's') {
  233. char *s = va_arg(ap, char *);
  234. if (longmod)
  235. return -1;
  236. if (width < 0)
  237. return -1;
  238. if (scan_string(&buf, s, width)<0)
  239. return n_matched;
  240. ++pattern;
  241. ++n_matched;
  242. } else if (*pattern == 'c') {
  243. char *ch = va_arg(ap, char *);
  244. if (longmod)
  245. return -1;
  246. if (width != -1)
  247. return -1;
  248. if (!*buf)
  249. return n_matched;
  250. *ch = *buf++;
  251. ++pattern;
  252. ++n_matched;
  253. } else if (*pattern == '%') {
  254. if (*buf != '%')
  255. return n_matched;
  256. if (longmod)
  257. return -1;
  258. ++buf;
  259. ++pattern;
  260. } else {
  261. return -1; /* Unrecognized pattern component. */
  262. }
  263. }
  264. }
  265. return n_matched;
  266. }
  267. /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
  268. * and store the results in the corresponding argument fields. Differs from
  269. * sscanf in that:
  270. * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
  271. * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
  272. * <li>It does not handle arbitrarily long widths.
  273. * <li>Numbers do not consume any space characters.
  274. * <li>It is locale-independent.
  275. * <li>%u and %x do not consume any space.
  276. * <li>It returns -1 on malformed patterns.</ul>
  277. *
  278. * (As with other locale-independent functions, we need this to parse data that
  279. * is in ASCII without worrying that the C library's locale-handling will make
  280. * miscellaneous characters look like numbers, spaces, and so on.)
  281. */
  282. int
  283. tor_sscanf(const char *buf, const char *pattern, ...)
  284. {
  285. int r;
  286. va_list ap;
  287. va_start(ap, pattern);
  288. r = tor_vsscanf(buf, pattern, ap);
  289. va_end(ap);
  290. return r;
  291. }