parsecommon.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /* Copyright (c) 2016-2019, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. /**
  4. * \file parsecommon.c
  5. * \brief Common code to parse and validate various type of descriptors.
  6. **/
  7. #include "feature/dirparse/parsecommon.h"
  8. #include "lib/log/log.h"
  9. #include "lib/log/util_bug.h"
  10. #include "lib/encoding/binascii.h"
  11. #include "lib/container/smartlist.h"
  12. #include "lib/string/util_string.h"
  13. #include "lib/string/printf.h"
  14. #include "lib/memarea/memarea.h"
  15. #include "lib/crypt_ops/crypto_rsa.h"
  16. #include "lib/ctime/di_ops.h"
  17. #include <string.h>
  18. #define MIN_ANNOTATION A_PURPOSE
  19. #define MAX_ANNOTATION A_UNKNOWN_
  20. #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
  21. #define ALLOC(sz) memarea_alloc(area,sz)
  22. #define STRDUP(str) memarea_strdup(area,str)
  23. #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
  24. #define RET_ERR(msg) \
  25. STMT_BEGIN \
  26. if (tok) token_clear(tok); \
  27. tok = ALLOC_ZERO(sizeof(directory_token_t)); \
  28. tok->tp = ERR_; \
  29. tok->error = STRDUP(msg); \
  30. goto done_tokenizing; \
  31. STMT_END
  32. /** Free all resources allocated for <b>tok</b> */
  33. void
  34. token_clear(directory_token_t *tok)
  35. {
  36. if (tok->key)
  37. crypto_pk_free(tok->key);
  38. }
  39. /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
  40. * them to <b>out</b>. Parse according to the token rules in <b>table</b>.
  41. * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the
  42. * entire string.
  43. */
  44. int
  45. tokenize_string(memarea_t *area,
  46. const char *start, const char *end, smartlist_t *out,
  47. const token_rule_t *table, int flags)
  48. {
  49. const char **s;
  50. directory_token_t *tok = NULL;
  51. int counts[NIL_];
  52. int i;
  53. int first_nonannotation;
  54. int prev_len = smartlist_len(out);
  55. tor_assert(area);
  56. s = &start;
  57. if (!end) {
  58. end = start+strlen(start);
  59. } else {
  60. /* it's only meaningful to check for nuls if we got an end-of-string ptr */
  61. if (memchr(start, '\0', end-start)) {
  62. log_warn(LD_DIR, "parse error: internal NUL character.");
  63. return -1;
  64. }
  65. }
  66. for (i = 0; i < NIL_; ++i)
  67. counts[i] = 0;
  68. SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
  69. while (*s < end && (!tok || tok->tp != EOF_)) {
  70. tok = get_next_token(area, s, end, table);
  71. if (tok->tp == ERR_) {
  72. log_warn(LD_DIR, "parse error: %s", tok->error);
  73. token_clear(tok);
  74. return -1;
  75. }
  76. ++counts[tok->tp];
  77. smartlist_add(out, tok);
  78. *s = eat_whitespace_eos(*s, end);
  79. }
  80. if (flags & TS_NOCHECK)
  81. return 0;
  82. if ((flags & TS_ANNOTATIONS_OK)) {
  83. first_nonannotation = -1;
  84. for (i = 0; i < smartlist_len(out); ++i) {
  85. tok = smartlist_get(out, i);
  86. if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
  87. first_nonannotation = i;
  88. break;
  89. }
  90. }
  91. if (first_nonannotation < 0) {
  92. log_warn(LD_DIR, "parse error: item contains only annotations");
  93. return -1;
  94. }
  95. for (i=first_nonannotation; i < smartlist_len(out); ++i) {
  96. tok = smartlist_get(out, i);
  97. if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
  98. log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
  99. return -1;
  100. }
  101. }
  102. if ((flags & TS_NO_NEW_ANNOTATIONS)) {
  103. if (first_nonannotation != prev_len) {
  104. log_warn(LD_DIR, "parse error: Unexpected annotations.");
  105. return -1;
  106. }
  107. }
  108. } else {
  109. for (i=0; i < smartlist_len(out); ++i) {
  110. tok = smartlist_get(out, i);
  111. if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
  112. log_warn(LD_DIR, "parse error: no annotations allowed.");
  113. return -1;
  114. }
  115. }
  116. first_nonannotation = 0;
  117. }
  118. for (i = 0; table[i].t; ++i) {
  119. if (counts[table[i].v] < table[i].min_cnt) {
  120. log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
  121. return -1;
  122. }
  123. if (counts[table[i].v] > table[i].max_cnt) {
  124. log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
  125. return -1;
  126. }
  127. if (table[i].pos & AT_START) {
  128. if (smartlist_len(out) < 1 ||
  129. (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
  130. log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
  131. return -1;
  132. }
  133. }
  134. if (table[i].pos & AT_END) {
  135. if (smartlist_len(out) < 1 ||
  136. (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
  137. log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
  138. return -1;
  139. }
  140. }
  141. }
  142. return 0;
  143. }
  144. /** Helper: parse space-separated arguments from the string <b>s</b> ending at
  145. * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the
  146. * number of parsed elements into the n_args field of <b>tok</b>. Allocate
  147. * all storage in <b>area</b>. Return the number of arguments parsed, or
  148. * return -1 if there was an insanely high number of arguments. */
  149. static inline int
  150. get_token_arguments(memarea_t *area, directory_token_t *tok,
  151. const char *s, const char *eol)
  152. {
  153. /** Largest number of arguments we'll accept to any token, ever. */
  154. #define MAX_ARGS 512
  155. char *mem = memarea_strndup(area, s, eol-s);
  156. char *cp = mem;
  157. int j = 0;
  158. char *args[MAX_ARGS];
  159. while (*cp) {
  160. if (j == MAX_ARGS)
  161. return -1;
  162. args[j++] = cp;
  163. cp = (char*)find_whitespace(cp);
  164. if (!cp || !*cp)
  165. break; /* End of the line. */
  166. *cp++ = '\0';
  167. cp = (char*)eat_whitespace(cp);
  168. }
  169. tok->n_args = j;
  170. tok->args = memarea_memdup(area, args, j*sizeof(char*));
  171. return j;
  172. #undef MAX_ARGS
  173. }
  174. /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
  175. * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>.
  176. * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
  177. * conform to the syntax we wanted.
  178. **/
  179. static inline directory_token_t *
  180. token_check_object(memarea_t *area, const char *kwd,
  181. directory_token_t *tok, obj_syntax o_syn)
  182. {
  183. char ebuf[128];
  184. switch (o_syn) {
  185. case NO_OBJ:
  186. /* No object is allowed for this token. */
  187. if (tok->object_body) {
  188. tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
  189. RET_ERR(ebuf);
  190. }
  191. if (tok->key) {
  192. tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
  193. RET_ERR(ebuf);
  194. }
  195. break;
  196. case NEED_OBJ:
  197. /* There must be a (non-key) object. */
  198. if (!tok->object_body) {
  199. tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
  200. RET_ERR(ebuf);
  201. }
  202. break;
  203. case NEED_KEY_1024: /* There must be a 1024-bit public key. */
  204. case NEED_SKEY_1024: /* There must be a 1024-bit private key. */
  205. if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
  206. tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
  207. kwd, crypto_pk_num_bits(tok->key));
  208. RET_ERR(ebuf);
  209. }
  210. /* fall through */
  211. case NEED_KEY: /* There must be some kind of key. */
  212. if (!tok->key) {
  213. tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
  214. RET_ERR(ebuf);
  215. }
  216. if (o_syn != NEED_SKEY_1024) {
  217. if (crypto_pk_key_is_private(tok->key)) {
  218. tor_snprintf(ebuf, sizeof(ebuf),
  219. "Private key given for %s, which wants a public key", kwd);
  220. RET_ERR(ebuf);
  221. }
  222. } else { /* o_syn == NEED_SKEY_1024 */
  223. if (!crypto_pk_key_is_private(tok->key)) {
  224. tor_snprintf(ebuf, sizeof(ebuf),
  225. "Public key given for %s, which wants a private key", kwd);
  226. RET_ERR(ebuf);
  227. }
  228. }
  229. break;
  230. case OBJ_OK:
  231. /* Anything goes with this token. */
  232. break;
  233. }
  234. done_tokenizing:
  235. return tok;
  236. }
  237. /** Return true iff the <b>memlen</b>-byte chunk of memory at
  238. * <b>memlen</b> is the same length as <b>token</b>, and their
  239. * contents are equal. */
  240. static bool
  241. mem_eq_token(const void *mem, size_t memlen, const char *token)
  242. {
  243. size_t len = strlen(token);
  244. return memlen == len && fast_memeq(mem, token, len);
  245. }
  246. /** Helper function: read the next token from *s, advance *s to the end of the
  247. * token, and return the parsed token. Parse *<b>s</b> according to the list
  248. * of tokens in <b>table</b>.
  249. */
  250. directory_token_t *
  251. get_next_token(memarea_t *area,
  252. const char **s, const char *eos, const token_rule_t *table)
  253. {
  254. /** Reject any object at least this big; it is probably an overflow, an
  255. * attack, a bug, or some other nonsense. */
  256. #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
  257. /** Reject any line at least this big; it is probably an overflow, an
  258. * attack, a bug, or some other nonsense. */
  259. #define MAX_LINE_LENGTH (128*1024)
  260. const char *next, *eol;
  261. size_t obname_len;
  262. int i;
  263. directory_token_t *tok;
  264. obj_syntax o_syn = NO_OBJ;
  265. char ebuf[128];
  266. const char *kwd = "";
  267. tor_assert(area);
  268. tok = ALLOC_ZERO(sizeof(directory_token_t));
  269. tok->tp = ERR_;
  270. /* Set *s to first token, eol to end-of-line, next to after first token */
  271. *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
  272. tor_assert(eos >= *s);
  273. eol = memchr(*s, '\n', eos-*s);
  274. if (!eol)
  275. eol = eos;
  276. if (eol - *s > MAX_LINE_LENGTH) {
  277. RET_ERR("Line far too long");
  278. }
  279. next = find_whitespace_eos(*s, eol);
  280. if (mem_eq_token(*s, next-*s, "opt")) {
  281. /* Skip past an "opt" at the start of the line. */
  282. *s = eat_whitespace_eos_no_nl(next, eol);
  283. next = find_whitespace_eos(*s, eol);
  284. } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */
  285. RET_ERR("Unexpected EOF");
  286. }
  287. /* Search the table for the appropriate entry. (I tried a binary search
  288. * instead, but it wasn't any faster.) */
  289. for (i = 0; table[i].t ; ++i) {
  290. if (mem_eq_token(*s, next-*s, table[i].t)) {
  291. /* We've found the keyword. */
  292. kwd = table[i].t;
  293. tok->tp = table[i].v;
  294. o_syn = table[i].os;
  295. *s = eat_whitespace_eos_no_nl(next, eol);
  296. /* We go ahead whether there are arguments or not, so that tok->args is
  297. * always set if we want arguments. */
  298. if (table[i].concat_args) {
  299. /* The keyword takes the line as a single argument */
  300. tok->args = ALLOC(sizeof(char*));
  301. tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
  302. tok->n_args = 1;
  303. } else {
  304. /* This keyword takes multiple arguments. */
  305. if (get_token_arguments(area, tok, *s, eol)<0) {
  306. tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
  307. RET_ERR(ebuf);
  308. }
  309. *s = eol;
  310. }
  311. if (tok->n_args < table[i].min_args) {
  312. tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
  313. RET_ERR(ebuf);
  314. } else if (tok->n_args > table[i].max_args) {
  315. tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
  316. RET_ERR(ebuf);
  317. }
  318. break;
  319. }
  320. }
  321. if (tok->tp == ERR_) {
  322. /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
  323. if (*s < eol && **s == '@')
  324. tok->tp = A_UNKNOWN_;
  325. else
  326. tok->tp = K_OPT;
  327. tok->args = ALLOC(sizeof(char*));
  328. tok->args[0] = STRNDUP(*s, eol-*s);
  329. tok->n_args = 1;
  330. o_syn = OBJ_OK;
  331. }
  332. /* Check whether there's an object present */
  333. *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */
  334. tor_assert(eos >= *s);
  335. eol = memchr(*s, '\n', eos-*s);
  336. if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
  337. goto check_object;
  338. if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
  339. !mem_eq_token(eol-5, 5, "-----") || /* nuls or invalid endings */
  340. (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */
  341. RET_ERR("Malformed object: bad begin line");
  342. }
  343. tok->object_type = STRNDUP(*s+11, eol-*s-16);
  344. obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
  345. *s = eol+1; /* Set *s to possible start of object data (could be eos) */
  346. /* Go to the end of the object */
  347. next = tor_memstr(*s, eos-*s, "-----END ");
  348. if (!next) {
  349. RET_ERR("Malformed object: missing object end line");
  350. }
  351. tor_assert(eos >= next);
  352. eol = memchr(next, '\n', eos-next);
  353. if (!eol) /* end-of-line marker, or eos if there's no '\n' */
  354. eol = eos;
  355. /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
  356. if ((size_t)(eol-next) != 9+obname_len+5 ||
  357. !mem_eq_token(next+9, obname_len, tok->object_type) ||
  358. !mem_eq_token(eol-5, 5, "-----")) {
  359. tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
  360. tok->object_type);
  361. ebuf[sizeof(ebuf)-1] = '\0';
  362. RET_ERR(ebuf);
  363. }
  364. if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
  365. RET_ERR("Couldn't parse object: missing footer or object much too big.");
  366. {
  367. int r;
  368. size_t maxsize = base64_decode_maxsize(next-*s);
  369. tok->object_body = ALLOC(maxsize);
  370. r = base64_decode(tok->object_body, maxsize, *s, next-*s);
  371. if (r<0)
  372. RET_ERR("Malformed object: bad base64-encoded data");
  373. tok->object_size = r;
  374. }
  375. if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
  376. tok->key = crypto_pk_asn1_decode(tok->object_body, tok->object_size);
  377. if (! tok->key)
  378. RET_ERR("Couldn't parse public key.");
  379. } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */
  380. tok->key = crypto_pk_asn1_decode_private(tok->object_body,
  381. tok->object_size);
  382. if (! tok->key)
  383. RET_ERR("Couldn't parse private key.");
  384. }
  385. *s = eol;
  386. check_object:
  387. tok = token_check_object(area, kwd, tok, o_syn);
  388. done_tokenizing:
  389. return tok;
  390. #undef RET_ERR
  391. #undef ALLOC
  392. #undef ALLOC_ZERO
  393. #undef STRDUP
  394. #undef STRNDUP
  395. }
  396. /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
  397. * with an assert if no such keyword is found.
  398. */
  399. directory_token_t *
  400. find_by_keyword_(smartlist_t *s, directory_keyword keyword,
  401. const char *keyword_as_string)
  402. {
  403. directory_token_t *tok = find_opt_by_keyword(s, keyword);
  404. if (PREDICT_UNLIKELY(!tok)) {
  405. log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
  406. "been validated. Internal error.", keyword_as_string, (int)keyword);
  407. tor_assert(tok);
  408. }
  409. return tok;
  410. }
  411. /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
  412. * NULL if no such keyword is found.
  413. */
  414. directory_token_t *
  415. find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
  416. {
  417. SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
  418. return NULL;
  419. }
  420. /** If there are any directory_token_t entries in <b>s</b> whose keyword is
  421. * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
  422. * in the same order in which they occur in <b>s</b>. Otherwise return
  423. * NULL. */
  424. smartlist_t *
  425. find_all_by_keyword(const smartlist_t *s, directory_keyword k)
  426. {
  427. smartlist_t *out = NULL;
  428. SMARTLIST_FOREACH(s, directory_token_t *, t,
  429. if (t->tp == k) {
  430. if (!out)
  431. out = smartlist_new();
  432. smartlist_add(out, t);
  433. });
  434. return out;
  435. }