Browse Source

Extract key string manipulation functions into a new library.

Nick Mathewson 5 years ago
parent
commit
1abadee3fd

+ 2 - 0
.gitignore

@@ -175,6 +175,8 @@ uptime-*.json
 /src/lib/libtor-err-testing.a
 /src/lib/libtor-malloc.a
 /src/lib/libtor-malloc-testing.a
+/src/lib/libtor-string.a
+/src/lib/libtor-string-testing.a
 /src/lib/libtor-tls.a
 /src/lib/libtor-tls-testing.a
 /src/lib/libtor-trace.a

+ 2 - 0
Makefile.am

@@ -41,6 +41,7 @@ endif
 TOR_UTIL_LIBS = \
 	src/common/libor.a \
 	src/lib/libtor-container.a \
+        src/lib/libtor-string.a \
 	src/lib/libtor-malloc.a \
         src/lib/libtor-wallclock.a \
         src/lib/libtor-err.a \
@@ -51,6 +52,7 @@ TOR_UTIL_LIBS = \
 TOR_UTIL_TESTING_LIBS = \
 	src/common/libor-testing.a \
 	src/lib/libtor-container-testing.a \
+        src/lib/libtor-string-testing.a \
 	src/lib/libtor-malloc-testing.a \
         src/lib/libtor-wallclock-testing.a \
         src/lib/libtor-err-testing.a \

+ 0 - 202
src/common/compat.c

@@ -404,147 +404,6 @@ tor_munmap_file(tor_mmap_t *handle)
 #error "cannot implement tor_mmap_file"
 #endif /* defined(HAVE_MMAP) || ... || ... */
 
-/** Replacement for snprintf.  Differs from platform snprintf in two
- * ways: First, always NUL-terminates its output.  Second, always
- * returns -1 if the result is truncated.  (Note that this return
- * behavior does <i>not</i> conform to C99; it just happens to be
- * easier to emulate "return -1" with conformant implementations than
- * it is to emulate "return number that would be written" with
- * non-conformant implementations.) */
-int
-tor_snprintf(char *str, size_t size, const char *format, ...)
-{
-  va_list ap;
-  int r;
-  va_start(ap,format);
-  r = tor_vsnprintf(str,size,format,ap);
-  va_end(ap);
-  return r;
-}
-
-/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from
- * snprintf.
- */
-int
-tor_vsnprintf(char *str, size_t size, const char *format, va_list args)
-{
-  int r;
-  if (size == 0)
-    return -1; /* no place for the NUL */
-  if (size > SIZE_T_CEILING)
-    return -1;
-#ifdef _WIN32
-  r = _vsnprintf(str, size, format, args);
-#else
-  r = vsnprintf(str, size, format, args);
-#endif
-  str[size-1] = '\0';
-  if (r < 0 || r >= (ssize_t)size)
-    return -1;
-  return r;
-}
-
-/**
- * Portable asprintf implementation.  Does a printf() into a newly malloc'd
- * string.  Sets *<b>strp</b> to this string, and returns its length (not
- * including the terminating NUL character).
- *
- * You can treat this function as if its implementation were something like
-   <pre>
-     char buf[_INFINITY_];
-     tor_snprintf(buf, sizeof(buf), fmt, args);
-     *strp = tor_strdup(buf);
-     return strlen(*strp):
-   </pre>
- * Where _INFINITY_ is an imaginary constant so big that any string can fit
- * into it.
- */
-int
-tor_asprintf(char **strp, const char *fmt, ...)
-{
-  int r;
-  va_list args;
-  va_start(args, fmt);
-  r = tor_vasprintf(strp, fmt, args);
-  va_end(args);
-  if (!*strp || r < 0) {
-    /* LCOV_EXCL_START */
-    log_err(LD_BUG, "Internal error in asprintf");
-    tor_assert(0);
-    /* LCOV_EXCL_STOP */
-  }
-  return r;
-}
-
-/**
- * Portable vasprintf implementation.  Does a printf() into a newly malloc'd
- * string.  Differs from regular vasprintf in the same ways that
- * tor_asprintf() differs from regular asprintf.
- */
-int
-tor_vasprintf(char **strp, const char *fmt, va_list args)
-{
-  /* use a temporary variable in case *strp is in args. */
-  char *strp_tmp=NULL;
-#ifdef HAVE_VASPRINTF
-  /* If the platform gives us one, use it. */
-  int r = vasprintf(&strp_tmp, fmt, args);
-  if (r < 0)
-    *strp = NULL;
-  else
-    *strp = strp_tmp;
-  return r;
-#elif defined(HAVE__VSCPRINTF)
-  /* On Windows, _vsnprintf won't tell us the length of the string if it
-   * overflows, so we need to use _vcsprintf to tell how much to allocate */
-  int len, r;
-  va_list tmp_args;
-  va_copy(tmp_args, args);
-  len = _vscprintf(fmt, tmp_args);
-  va_end(tmp_args);
-  if (len < 0) {
-    *strp = NULL;
-    return -1;
-  }
-  strp_tmp = tor_malloc(len + 1);
-  r = _vsnprintf(strp_tmp, len+1, fmt, args);
-  if (r != len) {
-    tor_free(strp_tmp);
-    *strp = NULL;
-    return -1;
-  }
-  *strp = strp_tmp;
-  return len;
-#else
-  /* Everywhere else, we have a decent vsnprintf that tells us how many
-   * characters we need.  We give it a try on a short buffer first, since
-   * it might be nice to avoid the second vsnprintf call.
-   */
-  char buf[128];
-  int len, r;
-  va_list tmp_args;
-  va_copy(tmp_args, args);
-  /* vsnprintf() was properly checked but tor_vsnprintf() available so
-   * why not use it? */
-  len = tor_vsnprintf(buf, sizeof(buf), fmt, tmp_args);
-  va_end(tmp_args);
-  if (len < (int)sizeof(buf)) {
-    *strp = tor_strdup(buf);
-    return len;
-  }
-  strp_tmp = tor_malloc(len+1);
-  /* use of tor_vsnprintf() will ensure string is null terminated */
-  r = tor_vsnprintf(strp_tmp, len+1, fmt, args);
-  if (r != len) {
-    tor_free(strp_tmp);
-    *strp = NULL;
-    return -1;
-  }
-  *strp = strp_tmp;
-  return len;
-#endif /* defined(HAVE_VASPRINTF) || ... */
-}
-
 /** Given <b>hlen</b> bytes at <b>haystack</b> and <b>nlen</b> bytes at
  * <b>needle</b>, return a pointer to the first occurrence of the needle
  * within the haystack, or NULL if there is no such occurrence.
@@ -591,67 +450,6 @@ tor_memmem(const void *_haystack, size_t hlen,
 #endif /* defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2) */
 }
 
-/**
- * Tables to implement ctypes-replacement TOR_IS*() functions.  Each table
- * has 256 bits to look up whether a character is in some set or not.  This
- * fails on non-ASCII platforms, but it is hard to find a platform whose
- * character set is not a superset of ASCII nowadays. */
-
-/**@{*/
-const uint32_t TOR_ISALPHA_TABLE[8] =
-  { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 };
-const uint32_t TOR_ISALNUM_TABLE[8] =
-  { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 };
-const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 };
-const uint32_t TOR_ISXDIGIT_TABLE[8] =
-  { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 };
-const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 };
-const uint32_t TOR_ISPRINT_TABLE[8] =
-  { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 };
-const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 };
-const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 };
-
-/** Upper-casing and lowercasing tables to map characters to upper/lowercase
- * equivalents.  Used by tor_toupper() and tor_tolower(). */
-/**@{*/
-const uint8_t TOR_TOUPPER_TABLE[256] = {
-  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
-  32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
-  48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
-  64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
-  80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
-  96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
-  80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127,
-  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
-  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
-  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
-  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
-  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
-  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
-  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
-  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
-};
-const uint8_t TOR_TOLOWER_TABLE[256] = {
-  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
-  32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
-  48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
-  64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
-  112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95,
-  96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
-  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
-  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
-  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
-  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
-  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
-  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
-  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
-  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
-  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
-};
-/**@}*/
-
 /** Helper for tor_strtok_r_impl: Advances cp past all characters in
  * <b>sep</b>, and returns its new value. */
 static char *

+ 2 - 32
src/common/compat.h

@@ -47,6 +47,8 @@
 
 #include "lib/cc/compat_compiler.h"
 #include "common/compat_time.h"
+#include "lib/string/compat_ctype.h"
+#include "lib/string/printf.h"
 
 #include <stdio.h>
 #include <errno.h>
@@ -97,16 +99,6 @@ typedef struct tor_mmap_t {
 tor_mmap_t *tor_mmap_file(const char *filename) ATTR_NONNULL((1));
 int tor_munmap_file(tor_mmap_t *handle) ATTR_NONNULL((1));
 
-int tor_snprintf(char *str, size_t size, const char *format, ...)
-  CHECK_PRINTF(3,4) ATTR_NONNULL((1,3));
-int tor_vsnprintf(char *str, size_t size, const char *format, va_list args)
-  CHECK_PRINTF(3,0) ATTR_NONNULL((1,3));
-
-int tor_asprintf(char **strp, const char *fmt, ...)
-  CHECK_PRINTF(2,3);
-int tor_vasprintf(char **strp, const char *fmt, va_list args)
-  CHECK_PRINTF(2,0);
-
 const void *tor_memmem(const void *haystack, size_t hlen, const void *needle,
                        size_t nlen) ATTR_NONNULL((1,3));
 static const void *tor_memstr(const void *haystack, size_t hlen,
@@ -117,28 +109,6 @@ tor_memstr(const void *haystack, size_t hlen, const char *needle)
   return tor_memmem(haystack, hlen, needle, strlen(needle));
 }
 
-/* Much of the time when we're checking ctypes, we're doing spec compliance,
- * which all assumes we're doing ASCII. */
-#define DECLARE_CTYPE_FN(name)                                          \
-  static int TOR_##name(char c);                                        \
-  extern const uint32_t TOR_##name##_TABLE[];                           \
-  static inline int TOR_##name(char c) {                                \
-    uint8_t u = c;                                                      \
-    return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31)));     \
-  }
-DECLARE_CTYPE_FN(ISALPHA)
-DECLARE_CTYPE_FN(ISALNUM)
-DECLARE_CTYPE_FN(ISSPACE)
-DECLARE_CTYPE_FN(ISDIGIT)
-DECLARE_CTYPE_FN(ISXDIGIT)
-DECLARE_CTYPE_FN(ISPRINT)
-DECLARE_CTYPE_FN(ISLOWER)
-DECLARE_CTYPE_FN(ISUPPER)
-extern const uint8_t TOR_TOUPPER_TABLE[];
-extern const uint8_t TOR_TOLOWER_TABLE[];
-#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c])
-#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c])
-
 char *tor_strtok_r_impl(char *str, const char *sep, char **lasts);
 #ifdef HAVE_STRTOK_R
 #define tor_strtok_r(str, sep, lasts) strtok_r(str, sep, lasts)

+ 0 - 629
src/common/util.c

@@ -385,22 +385,6 @@ n_bits_set_u8(uint8_t v)
  * String manipulation
  * ===== */
 
-/** Remove from the string <b>s</b> every character which appears in
- * <b>strip</b>. */
-void
-tor_strstrip(char *s, const char *strip)
-{
-  char *readp = s;
-  while (*readp) {
-    if (strchr(strip, *readp)) {
-      ++readp;
-    } else {
-      *s++ = *readp++;
-    }
-  }
-  *s = '\0';
-}
-
 /** Return a pointer to a NUL-terminated hexadecimal string encoding
  * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
  * result does not need to be deallocated, but repeated calls to
@@ -416,145 +400,6 @@ hex_str(const char *from, size_t fromlen)
   return buf;
 }
 
-/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to
- * lowercase. */
-void
-tor_strlower(char *s)
-{
-  while (*s) {
-    *s = TOR_TOLOWER(*s);
-    ++s;
-  }
-}
-
-/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to
- * lowercase. */
-void
-tor_strupper(char *s)
-{
-  while (*s) {
-    *s = TOR_TOUPPER(*s);
-    ++s;
-  }
-}
-
-/** Return 1 if every character in <b>s</b> is printable, else return 0.
- */
-int
-tor_strisprint(const char *s)
-{
-  while (*s) {
-    if (!TOR_ISPRINT(*s))
-      return 0;
-    s++;
-  }
-  return 1;
-}
-
-/** Return 1 if no character in <b>s</b> is uppercase, else return 0.
- */
-int
-tor_strisnonupper(const char *s)
-{
-  while (*s) {
-    if (TOR_ISUPPER(*s))
-      return 0;
-    s++;
-  }
-  return 1;
-}
-
-/** Return true iff every character in <b>s</b> is whitespace space; else
- * return false. */
-int
-tor_strisspace(const char *s)
-{
-  while (*s) {
-    if (!TOR_ISSPACE(*s))
-      return 0;
-    s++;
-  }
-  return 1;
-}
-
-/** As strcmp, except that either string may be NULL.  The NULL string is
- * considered to be before any non-NULL string. */
-int
-strcmp_opt(const char *s1, const char *s2)
-{
-  if (!s1) {
-    if (!s2)
-      return 0;
-    else
-      return -1;
-  } else if (!s2) {
-    return 1;
-  } else {
-    return strcmp(s1, s2);
-  }
-}
-
-/** Compares the first strlen(s2) characters of s1 with s2.  Returns as for
- * strcmp.
- */
-int
-strcmpstart(const char *s1, const char *s2)
-{
-  size_t n = strlen(s2);
-  return strncmp(s1, s2, n);
-}
-
-/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>,
- * without depending on a terminating nul in s1.  Sorting order is first by
- * length, then lexically; return values are as for strcmp.
- */
-int
-strcmp_len(const char *s1, const char *s2, size_t s1_len)
-{
-  size_t s2_len = strlen(s2);
-  if (s1_len < s2_len)
-    return -1;
-  if (s1_len > s2_len)
-    return 1;
-  return fast_memcmp(s1, s2, s2_len);
-}
-
-/** Compares the first strlen(s2) characters of s1 with s2.  Returns as for
- * strcasecmp.
- */
-int
-strcasecmpstart(const char *s1, const char *s2)
-{
-  size_t n = strlen(s2);
-  return strncasecmp(s1, s2, n);
-}
-
-/** Compares the last strlen(s2) characters of s1 with s2.  Returns as for
- * strcmp.
- */
-int
-strcmpend(const char *s1, const char *s2)
-{
-  size_t n1 = strlen(s1), n2 = strlen(s2);
-  if (n2>n1)
-    return strcmp(s1,s2);
-  else
-    return strncmp(s1+(n1-n2), s2, n2);
-}
-
-/** Compares the last strlen(s2) characters of s1 with s2.  Returns as for
- * strcasecmp.
- */
-int
-strcasecmpend(const char *s1, const char *s2)
-{
-  size_t n1 = strlen(s1), n2 = strlen(s2);
-  if (n2>n1) /* then they can't be the same; figure out which is bigger */
-    return strcasecmp(s1,s2);
-  else
-    return strncasecmp(s1+(n1-n2), s2, n2);
-}
-
 /** Compare the value of the string <b>prefix</b> with the start of the
  * <b>memlen</b>-byte memory chunk at <b>mem</b>.  Return as for strcmp.
  *
@@ -571,179 +416,6 @@ fast_memcmpstart(const void *mem, size_t memlen,
   return fast_memcmp(mem, prefix, plen);
 }
 
-/** Return a pointer to the first char of s that is not whitespace and
- * not a comment, or to the terminating NUL if no such character exists.
- */
-const char *
-eat_whitespace(const char *s)
-{
-  tor_assert(s);
-
-  while (1) {
-    switch (*s) {
-    case '\0':
-    default:
-      return s;
-    case ' ':
-    case '\t':
-    case '\n':
-    case '\r':
-      ++s;
-      break;
-    case '#':
-      ++s;
-      while (*s && *s != '\n')
-        ++s;
-    }
-  }
-}
-
-/** Return a pointer to the first char of s that is not whitespace and
- * not a comment, or to the terminating NUL if no such character exists.
- */
-const char *
-eat_whitespace_eos(const char *s, const char *eos)
-{
-  tor_assert(s);
-  tor_assert(eos && s <= eos);
-
-  while (s < eos) {
-    switch (*s) {
-    case '\0':
-    default:
-      return s;
-    case ' ':
-    case '\t':
-    case '\n':
-    case '\r':
-      ++s;
-      break;
-    case '#':
-      ++s;
-      while (s < eos && *s && *s != '\n')
-        ++s;
-    }
-  }
-  return s;
-}
-
-/** Return a pointer to the first char of s that is not a space or a tab
- * or a \\r, or to the terminating NUL if no such character exists. */
-const char *
-eat_whitespace_no_nl(const char *s)
-{
-  while (*s == ' ' || *s == '\t' || *s == '\r')
-    ++s;
-  return s;
-}
-
-/** As eat_whitespace_no_nl, but stop at <b>eos</b> whether we have
- * found a non-whitespace character or not. */
-const char *
-eat_whitespace_eos_no_nl(const char *s, const char *eos)
-{
-  while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r'))
-    ++s;
-  return s;
-}
-
-/** Return a pointer to the first char of s that is whitespace or <b>#</b>,
- * or to the terminating NUL if no such character exists.
- */
-const char *
-find_whitespace(const char *s)
-{
-  /* tor_assert(s); */
-  while (1) {
-    switch (*s)
-    {
-    case '\0':
-    case '#':
-    case ' ':
-    case '\r':
-    case '\n':
-    case '\t':
-      return s;
-    default:
-      ++s;
-    }
-  }
-}
-
-/** As find_whitespace, but stop at <b>eos</b> whether we have found a
- * whitespace or not. */
-const char *
-find_whitespace_eos(const char *s, const char *eos)
-{
-  /* tor_assert(s); */
-  while (s < eos) {
-    switch (*s)
-    {
-    case '\0':
-    case '#':
-    case ' ':
-    case '\r':
-    case '\n':
-    case '\t':
-      return s;
-    default:
-      ++s;
-    }
-  }
-  return s;
-}
-
-/** Return the first occurrence of <b>needle</b> in <b>haystack</b> that
- * occurs at the start of a line (that is, at the beginning of <b>haystack</b>
- * or immediately after a newline).  Return NULL if no such string is found.
- */
-const char *
-find_str_at_start_of_line(const char *haystack, const char *needle)
-{
-  size_t needle_len = strlen(needle);
-
-  do {
-    if (!strncmp(haystack, needle, needle_len))
-      return haystack;
-
-    haystack = strchr(haystack, '\n');
-    if (!haystack)
-      return NULL;
-    else
-      ++haystack;
-  } while (*haystack);
-
-  return NULL;
-}
-
-/** Returns true if <b>string</b> could be a C identifier.
-    A C identifier must begin with a letter or an underscore and the
-    rest of its characters can be letters, numbers or underscores. No
-    length limit is imposed. */
-int
-string_is_C_identifier(const char *string)
-{
-  size_t iter;
-  size_t length = strlen(string);
-  if (!length)
-    return 0;
-
-  for (iter = 0; iter < length ; iter++) {
-    if (iter == 0) {
-      if (!(TOR_ISALPHA(string[iter]) ||
-            string[iter] == '_'))
-        return 0;
-    } else {
-      if (!(TOR_ISALPHA(string[iter]) ||
-            TOR_ISDIGIT(string[iter]) ||
-            string[iter] == '_'))
-        return 0;
-    }
-  }
-
-  return 1;
-}
-
 /** Return true iff the 'len' bytes at 'mem' are all zero. */
 int
 tor_mem_is_zero(const char *mem, size_t len)
@@ -2923,307 +2595,6 @@ expand_filename(const char *filename)
 #endif /* defined(_WIN32) */
 }
 
-#define MAX_SCANF_WIDTH 9999
-
-/** Helper: given an ASCII-encoded decimal digit, return its numeric value.
- * NOTE: requires that its input be in-bounds. */
-static int
-digit_to_num(char d)
-{
-  int num = ((int)d) - (int)'0';
-  tor_assert(num <= 9 && num >= 0);
-  return num;
-}
-
-/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
- * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
- * success, store the result in <b>out</b>, advance bufp to the next
- * character, and return 0.  On failure, return -1. */
-static int
-scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
-{
-  unsigned long result = 0;
-  int scanned_so_far = 0;
-  const int hex = base==16;
-  tor_assert(base == 10 || base == 16);
-  if (!bufp || !*bufp || !out)
-    return -1;
-  if (width<0)
-    width=MAX_SCANF_WIDTH;
-
-  while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
-         && scanned_so_far < width) {
-    unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
-    // Check for overflow beforehand, without actually causing any overflow
-    // This preserves functionality on compilers that don't wrap overflow
-    // (i.e. that trap or optimise away overflow)
-    // result * base + digit > ULONG_MAX
-    // result * base > ULONG_MAX - digit
-    if (result > (ULONG_MAX - digit)/base)
-      return -1; /* Processing this digit would overflow */
-    result = result * base + digit;
-    ++scanned_so_far;
-  }
-
-  if (!scanned_so_far) /* No actual digits scanned */
-    return -1;
-
-  *out = result;
-  return 0;
-}
-
-/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
- * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
- * success, store the result in <b>out</b>, advance bufp to the next
- * character, and return 0.  On failure, return -1. */
-static int
-scan_signed(const char **bufp, long *out, int width)
-{
-  int neg = 0;
-  unsigned long result = 0;
-
-  if (!bufp || !*bufp || !out)
-    return -1;
-  if (width<0)
-    width=MAX_SCANF_WIDTH;
-
-  if (**bufp == '-') {
-    neg = 1;
-    ++*bufp;
-    --width;
-  }
-
-  if (scan_unsigned(bufp, &result, width, 10) < 0)
-    return -1;
-
-  if (neg && result > 0) {
-    if (result > ((unsigned long)LONG_MAX) + 1)
-      return -1; /* Underflow */
-    else if (result == ((unsigned long)LONG_MAX) + 1)
-      *out = LONG_MIN;
-    else {
-      /* We once had a far more clever no-overflow conversion here, but
-       * some versions of GCC apparently ran it into the ground.  Now
-       * we just check for LONG_MIN explicitly.
-       */
-      *out = -(long)result;
-    }
-  } else {
-    if (result > LONG_MAX)
-      return -1; /* Overflow */
-    *out = (long)result;
-  }
-
-  return 0;
-}
-
-/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
- * <b>width</b> characters.  (Handle arbitrary width if <b>width</b> is less
- * than 0.)  On success, store the result in <b>out</b>, advance bufp to the
- * next character, and return 0.  On failure, return -1. */
-static int
-scan_double(const char **bufp, double *out, int width)
-{
-  int neg = 0;
-  double result = 0;
-  int scanned_so_far = 0;
-
-  if (!bufp || !*bufp || !out)
-    return -1;
-  if (width<0)
-    width=MAX_SCANF_WIDTH;
-
-  if (**bufp == '-') {
-    neg = 1;
-    ++*bufp;
-  }
-
-  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
-    const int digit = digit_to_num(*(*bufp)++);
-    result = result * 10 + digit;
-    ++scanned_so_far;
-  }
-  if (**bufp == '.') {
-    double fracval = 0, denominator = 1;
-    ++*bufp;
-    ++scanned_so_far;
-    while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
-      const int digit = digit_to_num(*(*bufp)++);
-      fracval = fracval * 10 + digit;
-      denominator *= 10;
-      ++scanned_so_far;
-    }
-    result += fracval / denominator;
-  }
-
-  if (!scanned_so_far) /* No actual digits scanned */
-    return -1;
-
-  *out = neg ? -result : result;
-  return 0;
-}
-
-/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
- * <b>out</b>.  Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
- * to the next non-space character or the EOS. */
-static int
-scan_string(const char **bufp, char *out, int width)
-{
-  int scanned_so_far = 0;
-  if (!bufp || !out || width < 0)
-    return -1;
-  while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
-    *out++ = *(*bufp)++;
-    ++scanned_so_far;
-  }
-  *out = '\0';
-  return 0;
-}
-
-/** Locale-independent, minimal, no-surprises scanf variant, accepting only a
- * restricted pattern format.  For more info on what it supports, see
- * tor_sscanf() documentation.  */
-int
-tor_vsscanf(const char *buf, const char *pattern, va_list ap)
-{
-  int n_matched = 0;
-
-  while (*pattern) {
-    if (*pattern != '%') {
-      if (*buf == *pattern) {
-        ++buf;
-        ++pattern;
-        continue;
-      } else {
-        return n_matched;
-      }
-    } else {
-      int width = -1;
-      int longmod = 0;
-      ++pattern;
-      if (TOR_ISDIGIT(*pattern)) {
-        width = digit_to_num(*pattern++);
-        while (TOR_ISDIGIT(*pattern)) {
-          width *= 10;
-          width += digit_to_num(*pattern++);
-          if (width > MAX_SCANF_WIDTH)
-            return -1;
-        }
-        if (!width) /* No zero-width things. */
-          return -1;
-      }
-      if (*pattern == 'l') {
-        longmod = 1;
-        ++pattern;
-      }
-      if (*pattern == 'u' || *pattern == 'x') {
-        unsigned long u;
-        const int base = (*pattern == 'u') ? 10 : 16;
-        if (!*buf)
-          return n_matched;
-        if (scan_unsigned(&buf, &u, width, base)<0)
-          return n_matched;
-        if (longmod) {
-          unsigned long *out = va_arg(ap, unsigned long *);
-          *out = u;
-        } else {
-          unsigned *out = va_arg(ap, unsigned *);
-          if (u > UINT_MAX)
-            return n_matched;
-          *out = (unsigned) u;
-        }
-        ++pattern;
-        ++n_matched;
-      } else if (*pattern == 'f') {
-        double *d = va_arg(ap, double *);
-        if (!longmod)
-          return -1; /* float not supported */
-        if (!*buf)
-          return n_matched;
-        if (scan_double(&buf, d, width)<0)
-          return n_matched;
-        ++pattern;
-        ++n_matched;
-      } else if (*pattern == 'd') {
-        long lng=0;
-        if (scan_signed(&buf, &lng, width)<0)
-          return n_matched;
-        if (longmod) {
-          long *out = va_arg(ap, long *);
-          *out = lng;
-        } else {
-          int *out = va_arg(ap, int *);
-#if LONG_MAX > INT_MAX
-          if (lng < INT_MIN || lng > INT_MAX)
-            return n_matched;
-#endif
-          *out = (int)lng;
-        }
-        ++pattern;
-        ++n_matched;
-      } else if (*pattern == 's') {
-        char *s = va_arg(ap, char *);
-        if (longmod)
-          return -1;
-        if (width < 0)
-          return -1;
-        if (scan_string(&buf, s, width)<0)
-          return n_matched;
-        ++pattern;
-        ++n_matched;
-      } else if (*pattern == 'c') {
-        char *ch = va_arg(ap, char *);
-        if (longmod)
-          return -1;
-        if (width != -1)
-          return -1;
-        if (!*buf)
-          return n_matched;
-        *ch = *buf++;
-        ++pattern;
-        ++n_matched;
-      } else if (*pattern == '%') {
-        if (*buf != '%')
-          return n_matched;
-        if (longmod)
-          return -1;
-        ++buf;
-        ++pattern;
-      } else {
-        return -1; /* Unrecognized pattern component. */
-      }
-    }
-  }
-
-  return n_matched;
-}
-
-/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
- * and store the results in the corresponding argument fields.  Differs from
- * sscanf in that:
- * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
- *     <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
- *     <li>It does not handle arbitrarily long widths.
- *     <li>Numbers do not consume any space characters.
- *     <li>It is locale-independent.
- *     <li>%u and %x do not consume any space.
- *     <li>It returns -1 on malformed patterns.</ul>
- *
- * (As with other locale-independent functions, we need this to parse data that
- * is in ASCII without worrying that the C library's locale-handling will make
- * miscellaneous characters look like numbers, spaces, and so on.)
- */
-int
-tor_sscanf(const char *buf, const char *pattern, ...)
-{
-  int r;
-  va_list ap;
-  va_start(ap, pattern);
-  r = tor_vsscanf(buf, pattern, ap);
-  va_end(ap);
-  return r;
-}
-
 /** Append the string produced by tor_asprintf(<b>pattern</b>, <b>...</b>)
  * to <b>sl</b>. */
 void

+ 5 - 30
src/common/util.h

@@ -25,6 +25,8 @@
 #include "lib/err/torerr.h"
 #include "lib/malloc/util_malloc.h"
 #include "lib/wallclock/approx_time.h"
+#include "lib/string/util_string.h"
+#include "lib/string/scanf.h"
 #include "common/util_bug.h"
 
 #ifndef O_BINARY
@@ -96,22 +98,6 @@ uint32_t tor_add_u32_nowrap(uint32_t a, uint32_t b);
 
 /* String manipulation */
 
-/** Allowable characters in a hexadecimal string. */
-#define HEX_CHARACTERS "0123456789ABCDEFabcdef"
-void tor_strlower(char *s) ATTR_NONNULL((1));
-void tor_strupper(char *s) ATTR_NONNULL((1));
-int tor_strisprint(const char *s) ATTR_NONNULL((1));
-int tor_strisnonupper(const char *s) ATTR_NONNULL((1));
-int tor_strisspace(const char *s);
-int strcmp_opt(const char *s1, const char *s2);
-int strcmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2));
-int strcmp_len(const char *s1, const char *s2, size_t len) ATTR_NONNULL((1,2));
-int strcasecmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2));
-int strcmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2));
-int strcasecmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2));
-int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix);
-
-void tor_strstrip(char *s, const char *strip) ATTR_NONNULL((1,2));
 long tor_parse_long(const char *s, int base, long min,
                     long max, int *ok, char **next);
 unsigned long tor_parse_ulong(const char *s, int base, unsigned long min,
@@ -120,16 +106,9 @@ double tor_parse_double(const char *s, double min, double max, int *ok,
                         char **next);
 uint64_t tor_parse_uint64(const char *s, int base, uint64_t min,
                          uint64_t max, int *ok, char **next);
+
 const char *hex_str(const char *from, size_t fromlen) ATTR_NONNULL((1));
-const char *eat_whitespace(const char *s);
-const char *eat_whitespace_eos(const char *s, const char *eos);
-const char *eat_whitespace_no_nl(const char *s);
-const char *eat_whitespace_eos_no_nl(const char *s, const char *eos);
-const char *find_whitespace(const char *s);
-const char *find_whitespace_eos(const char *s, const char *eos);
-const char *find_str_at_start_of_line(const char *haystack,
-                                      const char *needle);
-int string_is_C_identifier(const char *string);
+
 int string_is_key_value(int severity, const char *string);
 int string_is_valid_dest(const char *string);
 int string_is_valid_nonrfc_hostname(const char *string);
@@ -139,6 +118,7 @@ int string_is_valid_ipv6_address(const char *string);
 int tor_mem_is_zero(const char *mem, size_t len);
 int tor_digest_is_zero(const char *digest);
 int tor_digest256_is_zero(const char *digest);
+
 char *esc_for_log(const char *string) ATTR_MALLOC;
 char *esc_for_log_len(const char *chars, size_t n) ATTR_MALLOC;
 const char *escaped(const char *string);
@@ -147,11 +127,6 @@ char *tor_escape_str_for_pt_args(const char *string,
                                  const char *chars_to_escape);
 
 struct smartlist_t;
-int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \
-  CHECK_SCANF(2, 0);
-int tor_sscanf(const char *buf, const char *pattern, ...)
-  CHECK_SCANF(2, 3);
-
 void smartlist_add_asprintf(struct smartlist_t *sl, const char *pattern, ...)
   CHECK_PRINTF(2, 3);
 void smartlist_add_vasprintf(struct smartlist_t *sl, const char *pattern,

+ 2 - 35
src/common/util_format.c

@@ -465,39 +465,6 @@ base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
   *cp = '\0';
 }
 
-/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
-static inline int
-hex_decode_digit_(char c)
-{
-  switch (c) {
-    case '0': return 0;
-    case '1': return 1;
-    case '2': return 2;
-    case '3': return 3;
-    case '4': return 4;
-    case '5': return 5;
-    case '6': return 6;
-    case '7': return 7;
-    case '8': return 8;
-    case '9': return 9;
-    case 'A': case 'a': return 10;
-    case 'B': case 'b': return 11;
-    case 'C': case 'c': return 12;
-    case 'D': case 'd': return 13;
-    case 'E': case 'e': return 14;
-    case 'F': case 'f': return 15;
-    default:
-      return -1;
-  }
-}
-
-/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
-int
-hex_decode_digit(char c)
-{
-  return hex_decode_digit_(c);
-}
-
 /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
  * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
  * Return the number of bytes decoded on success, -1 on failure. If
@@ -520,8 +487,8 @@ base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
 
   end = src+srclen;
   while (src<end) {
-    v1 = hex_decode_digit_(*src);
-    v2 = hex_decode_digit_(*(src+1));
+    v1 = hex_decode_digit(*src);
+    v2 = hex_decode_digit(*(src+1));
     if (v1<0||v2<0)
       return -1;
     *(uint8_t*)dest = (v1<<4)|v2;

+ 0 - 2
src/common/util_format.h

@@ -44,9 +44,7 @@ void base32_encode(char *dest, size_t destlen, const char *src, size_t srclen);
 int base32_decode(char *dest, size_t destlen, const char *src, size_t srclen);
 size_t base32_encoded_size(size_t srclen);
 
-int hex_decode_digit(char c);
 void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen);
 int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen);
 
 #endif /* !defined(TOR_UTIL_FORMAT_H) */
-

+ 1 - 0
src/include.am

@@ -8,6 +8,7 @@ include src/lib/crypt_ops/include.am
 include src/lib/defs/include.am
 include src/lib/include.libdonna.am
 include src/lib/malloc/include.am
+include src/lib/string/include.am
 include src/lib/testsupport/include.am
 include src/lib/tls/include.am
 include src/lib/trace/include.am

+ 1 - 0
src/lib/container/.may_include

@@ -5,6 +5,7 @@ lib/ctime/*.h
 lib/defs/*.h
 lib/malloc/*.h
 lib/err/*.h
+lib/string/*.h
 lib/testsupport/testsupport.h
 
 ht.h

+ 2 - 1
src/lib/container/map.c

@@ -14,9 +14,10 @@
 #include "lib/container/map.h"
 #include "lib/ctime/di_ops.h"
 #include "lib/defs/digest_sizes.h"
+#include "lib/string/util_string.h"
+#include "lib/malloc/util_malloc.h"
 
 #include "common/util_bug.h"
-#include "common/util.h" // For strlower
 
 #include <stdlib.h>
 #include <string.h>

+ 2 - 1
src/lib/container/smartlist.c

@@ -14,9 +14,10 @@
 #include "lib/malloc/util_malloc.h"
 #include "lib/container/smartlist.h"
 #include "lib/err/torerr.h"
-#include "common/util.h" // For strstrip.
+#include "lib/malloc/util_malloc.h"
 #include "lib/defs/digest_sizes.h"
 #include "lib/ctime/di_ops.h"
+#include "lib/string/util_string.h"
 
 #include <stdlib.h>
 #include <string.h>

+ 1 - 0
src/lib/crypt_ops/.may_include

@@ -6,6 +6,7 @@ lib/ctime/*.h
 lib/defs/*.h
 lib/malloc/*.h
 lib/err/*.h
+lib/string/*.h
 lib/testsupport/testsupport.h
 
 trunnel/pwbox.h

+ 1 - 0
src/lib/crypt_ops/crypto_format.c

@@ -20,6 +20,7 @@
 #include "lib/crypt_ops/crypto_ed25519.h"
 #include "lib/crypt_ops/crypto_format.h"
 #include "lib/crypt_ops/crypto_util.h"
+#include "lib/string/util_string.h"
 #include "common/util.h"
 #include "common/util_format.h"
 #include "common/torlog.h"

+ 1 - 1
src/lib/crypt_ops/crypto_openssl_mgt.c

@@ -12,6 +12,7 @@
 
 #include "lib/crypt_ops/compat_openssl.h"
 #include "lib/crypt_ops/crypto_openssl_mgt.h"
+#include "lib/string/util_string.h"
 
 DISABLE_GCC_WARNING(redundant-decls)
 
@@ -158,4 +159,3 @@ crypto_openssl_free_all(void)
   }
 #endif /* !defined(NEW_THREAD_API) */
 }
-

+ 6 - 0
src/lib/string/.may_include

@@ -0,0 +1,6 @@
+orconfig.h
+lib/cc/*.h
+lib/err/*.h
+lib/malloc/*.h
+lib/ctime/*.h
+lib/string/*.h

+ 67 - 0
src/lib/string/compat_ctype.c

@@ -0,0 +1,67 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "lib/string/compat_ctype.h"
+
+/**
+ * Tables to implement ctypes-replacement TOR_IS*() functions.  Each table
+ * has 256 bits to look up whether a character is in some set or not.  This
+ * fails on non-ASCII platforms, but it is hard to find a platform whose
+ * character set is not a superset of ASCII nowadays. */
+
+/**@{*/
+const uint32_t TOR_ISALPHA_TABLE[8] =
+  { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 };
+const uint32_t TOR_ISALNUM_TABLE[8] =
+  { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 };
+const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 };
+const uint32_t TOR_ISXDIGIT_TABLE[8] =
+  { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 };
+const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 };
+const uint32_t TOR_ISPRINT_TABLE[8] =
+  { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 };
+const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 };
+const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 };
+
+/** Upper-casing and lowercasing tables to map characters to upper/lowercase
+ * equivalents.  Used by tor_toupper() and tor_tolower(). */
+/**@{*/
+const uint8_t TOR_TOUPPER_TABLE[256] = {
+  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+  32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+  48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
+  64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
+  80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
+  96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
+  80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+};
+const uint8_t TOR_TOLOWER_TABLE[256] = {
+  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+  32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+  48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
+  64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95,
+  96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+};
+/**@}*/

+ 60 - 0
src/lib/string/compat_ctype.h

@@ -0,0 +1,60 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_COMPAT_CTYPE_H
+#define TOR_COMPAT_CTYPE_H
+
+#include "orconfig.h"
+#include "lib/cc/torint.h"
+
+/* Much of the time when we're checking ctypes, we're doing spec compliance,
+ * which all assumes we're doing ASCII. */
+#define DECLARE_CTYPE_FN(name)                                          \
+  static int TOR_##name(char c);                                        \
+  extern const uint32_t TOR_##name##_TABLE[];                           \
+  static inline int TOR_##name(char c) {                                \
+    uint8_t u = c;                                                      \
+    return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31)));     \
+  }
+DECLARE_CTYPE_FN(ISALPHA)
+DECLARE_CTYPE_FN(ISALNUM)
+DECLARE_CTYPE_FN(ISSPACE)
+DECLARE_CTYPE_FN(ISDIGIT)
+DECLARE_CTYPE_FN(ISXDIGIT)
+DECLARE_CTYPE_FN(ISPRINT)
+DECLARE_CTYPE_FN(ISLOWER)
+DECLARE_CTYPE_FN(ISUPPER)
+extern const uint8_t TOR_TOUPPER_TABLE[];
+extern const uint8_t TOR_TOLOWER_TABLE[];
+#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c])
+#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c])
+
+/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
+inline int
+hex_decode_digit(char c)
+{
+  switch (c) {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'A': case 'a': return 10;
+    case 'B': case 'b': return 11;
+    case 'C': case 'c': return 12;
+    case 'D': case 'd': return 13;
+    case 'E': case 'e': return 14;
+    case 'F': case 'f': return 15;
+    default:
+      return -1;
+  }
+}
+
+#endif /* !defined(TOR_COMPAT_CTYPE_H) */

+ 23 - 0
src/lib/string/include.am

@@ -0,0 +1,23 @@
+
+noinst_LIBRARIES += src/lib/libtor-string.a
+
+if UNITTESTS_ENABLED
+noinst_LIBRARIES += src/lib/libtor-string-testing.a
+endif
+
+src_lib_libtor_string_a_SOURCES =			\
+	src/lib/string/compat_ctype.c			\
+	src/lib/string/util_string.c			\
+	src/lib/string/printf.c				\
+	src/lib/string/scanf.c
+
+src_lib_libtor_string_testing_a_SOURCES = \
+	$(src_lib_libtor_string_a_SOURCES)
+src_lib_libtor_string_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS)
+src_lib_libtor_string_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
+
+noinst_HEADERS +=					\
+	src/lib/string/compat_ctype.h			\
+	src/lib/string/util_string.h			\
+	src/lib/string/printf.h				\
+	src/lib/string/scanf.h

+ 152 - 0
src/lib/string/printf.c

@@ -0,0 +1,152 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "lib/string/printf.h"
+#include "lib/err/torerr.h"
+#include "lib/cc/torint.h"
+#include "lib/malloc/util_malloc.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/** Replacement for snprintf.  Differs from platform snprintf in two
+ * ways: First, always NUL-terminates its output.  Second, always
+ * returns -1 if the result is truncated.  (Note that this return
+ * behavior does <i>not</i> conform to C99; it just happens to be
+ * easier to emulate "return -1" with conformant implementations than
+ * it is to emulate "return number that would be written" with
+ * non-conformant implementations.) */
+int
+tor_snprintf(char *str, size_t size, const char *format, ...)
+{
+  va_list ap;
+  int r;
+  va_start(ap,format);
+  r = tor_vsnprintf(str,size,format,ap);
+  va_end(ap);
+  return r;
+}
+
+/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from
+ * snprintf.
+ */
+int
+tor_vsnprintf(char *str, size_t size, const char *format, va_list args)
+{
+  int r;
+  if (size == 0)
+    return -1; /* no place for the NUL */
+  if (size > SIZE_T_CEILING)
+    return -1;
+#ifdef _WIN32
+  r = _vsnprintf(str, size, format, args);
+#else
+  r = vsnprintf(str, size, format, args);
+#endif
+  str[size-1] = '\0';
+  if (r < 0 || r >= (ssize_t)size)
+    return -1;
+  return r;
+}
+
+/**
+ * Portable asprintf implementation.  Does a printf() into a newly malloc'd
+ * string.  Sets *<b>strp</b> to this string, and returns its length (not
+ * including the terminating NUL character).
+ *
+ * You can treat this function as if its implementation were something like
+   <pre>
+     char buf[_INFINITY_];
+     tor_snprintf(buf, sizeof(buf), fmt, args);
+     *strp = tor_strdup(buf);
+     return strlen(*strp):
+   </pre>
+ * Where _INFINITY_ is an imaginary constant so big that any string can fit
+ * into it.
+ */
+int
+tor_asprintf(char **strp, const char *fmt, ...)
+{
+  int r;
+  va_list args;
+  va_start(args, fmt);
+  r = tor_vasprintf(strp, fmt, args);
+  va_end(args);
+  if (!*strp || r < 0) {
+    /* LCOV_EXCL_START */
+    raw_assert_unreached_msg("Internal error in asprintf");
+    /* LCOV_EXCL_STOP */
+  }
+  return r;
+}
+
+/**
+ * Portable vasprintf implementation.  Does a printf() into a newly malloc'd
+ * string.  Differs from regular vasprintf in the same ways that
+ * tor_asprintf() differs from regular asprintf.
+ */
+int
+tor_vasprintf(char **strp, const char *fmt, va_list args)
+{
+  /* use a temporary variable in case *strp is in args. */
+  char *strp_tmp=NULL;
+#ifdef HAVE_VASPRINTF
+  /* If the platform gives us one, use it. */
+  int r = vasprintf(&strp_tmp, fmt, args);
+  if (r < 0)
+    *strp = NULL;
+  else
+    *strp = strp_tmp;
+  return r;
+#elif defined(HAVE__VSCPRINTF)
+  /* On Windows, _vsnprintf won't tell us the length of the string if it
+   * overflows, so we need to use _vcsprintf to tell how much to allocate */
+  int len, r;
+  va_list tmp_args;
+  va_copy(tmp_args, args);
+  len = _vscprintf(fmt, tmp_args);
+  va_end(tmp_args);
+  if (len < 0) {
+    *strp = NULL;
+    return -1;
+  }
+  strp_tmp = tor_malloc(len + 1);
+  r = _vsnprintf(strp_tmp, len+1, fmt, args);
+  if (r != len) {
+    tor_free(strp_tmp);
+    *strp = NULL;
+    return -1;
+  }
+  *strp = strp_tmp;
+  return len;
+#else
+  /* Everywhere else, we have a decent vsnprintf that tells us how many
+   * characters we need.  We give it a try on a short buffer first, since
+   * it might be nice to avoid the second vsnprintf call.
+   */
+  char buf[128];
+  int len, r;
+  va_list tmp_args;
+  va_copy(tmp_args, args);
+  /* vsnprintf() was properly checked but tor_vsnprintf() available so
+   * why not use it? */
+  len = tor_vsnprintf(buf, sizeof(buf), fmt, tmp_args);
+  va_end(tmp_args);
+  if (len < (int)sizeof(buf)) {
+    *strp = tor_strdup(buf);
+    return len;
+  }
+  strp_tmp = tor_malloc(len+1);
+  /* use of tor_vsnprintf() will ensure string is null terminated */
+  r = tor_vsnprintf(strp_tmp, len+1, fmt, args);
+  if (r != len) {
+    tor_free(strp_tmp);
+    *strp = NULL;
+    return -1;
+  }
+  *strp = strp_tmp;
+  return len;
+#endif /* defined(HAVE_VASPRINTF) || ... */
+}

+ 25 - 0
src/lib/string/printf.h

@@ -0,0 +1,25 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_UTIL_PRINTF_H
+#define TOR_UTIL_PRINTF_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+
+int tor_snprintf(char *str, size_t size, const char *format, ...)
+  CHECK_PRINTF(3,4) ATTR_NONNULL((1,3));
+int tor_vsnprintf(char *str, size_t size, const char *format, va_list args)
+  CHECK_PRINTF(3,0) ATTR_NONNULL((1,3));
+
+int tor_asprintf(char **strp, const char *fmt, ...)
+  CHECK_PRINTF(2,3);
+int tor_vasprintf(char **strp, const char *fmt, va_list args)
+  CHECK_PRINTF(2,0);
+
+#endif /* !defined(TOR_UTIL_STRING_H) */

+ 312 - 0
src/lib/string/scanf.c

@@ -0,0 +1,312 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "lib/string/scanf.h"
+#include "lib/string/compat_ctype.h"
+#include "lib/cc/torint.h"
+#include "lib/err/torerr.h"
+
+#include <stdlib.h>
+
+#define MAX_SCANF_WIDTH 9999
+
+/** Helper: given an ASCII-encoded decimal digit, return its numeric value.
+ * NOTE: requires that its input be in-bounds. */
+static int
+digit_to_num(char d)
+{
+  int num = ((int)d) - (int)'0';
+  raw_assert(num <= 9 && num >= 0);
+  return num;
+}
+
+/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
+ * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
+ * success, store the result in <b>out</b>, advance bufp to the next
+ * character, and return 0.  On failure, return -1. */
+static int
+scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
+{
+  unsigned long result = 0;
+  int scanned_so_far = 0;
+  const int hex = base==16;
+  raw_assert(base == 10 || base == 16);
+  if (!bufp || !*bufp || !out)
+    return -1;
+  if (width<0)
+    width=MAX_SCANF_WIDTH;
+
+  while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
+         && scanned_so_far < width) {
+    unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
+    // Check for overflow beforehand, without actually causing any overflow
+    // This preserves functionality on compilers that don't wrap overflow
+    // (i.e. that trap or optimise away overflow)
+    // result * base + digit > ULONG_MAX
+    // result * base > ULONG_MAX - digit
+    if (result > (ULONG_MAX - digit)/base)
+      return -1; /* Processing this digit would overflow */
+    result = result * base + digit;
+    ++scanned_so_far;
+  }
+
+  if (!scanned_so_far) /* No actual digits scanned */
+    return -1;
+
+  *out = result;
+  return 0;
+}
+
+/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
+ * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
+ * success, store the result in <b>out</b>, advance bufp to the next
+ * character, and return 0.  On failure, return -1. */
+static int
+scan_signed(const char **bufp, long *out, int width)
+{
+  int neg = 0;
+  unsigned long result = 0;
+
+  if (!bufp || !*bufp || !out)
+    return -1;
+  if (width<0)
+    width=MAX_SCANF_WIDTH;
+
+  if (**bufp == '-') {
+    neg = 1;
+    ++*bufp;
+    --width;
+  }
+
+  if (scan_unsigned(bufp, &result, width, 10) < 0)
+    return -1;
+
+  if (neg && result > 0) {
+    if (result > ((unsigned long)LONG_MAX) + 1)
+      return -1; /* Underflow */
+    else if (result == ((unsigned long)LONG_MAX) + 1)
+      *out = LONG_MIN;
+    else {
+      /* We once had a far more clever no-overflow conversion here, but
+       * some versions of GCC apparently ran it into the ground.  Now
+       * we just check for LONG_MIN explicitly.
+       */
+      *out = -(long)result;
+    }
+  } else {
+    if (result > LONG_MAX)
+      return -1; /* Overflow */
+    *out = (long)result;
+  }
+
+  return 0;
+}
+
+/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
+ * <b>width</b> characters.  (Handle arbitrary width if <b>width</b> is less
+ * than 0.)  On success, store the result in <b>out</b>, advance bufp to the
+ * next character, and return 0.  On failure, return -1. */
+static int
+scan_double(const char **bufp, double *out, int width)
+{
+  int neg = 0;
+  double result = 0;
+  int scanned_so_far = 0;
+
+  if (!bufp || !*bufp || !out)
+    return -1;
+  if (width<0)
+    width=MAX_SCANF_WIDTH;
+
+  if (**bufp == '-') {
+    neg = 1;
+    ++*bufp;
+  }
+
+  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+    const int digit = digit_to_num(*(*bufp)++);
+    result = result * 10 + digit;
+    ++scanned_so_far;
+  }
+  if (**bufp == '.') {
+    double fracval = 0, denominator = 1;
+    ++*bufp;
+    ++scanned_so_far;
+    while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+      const int digit = digit_to_num(*(*bufp)++);
+      fracval = fracval * 10 + digit;
+      denominator *= 10;
+      ++scanned_so_far;
+    }
+    result += fracval / denominator;
+  }
+
+  if (!scanned_so_far) /* No actual digits scanned */
+    return -1;
+
+  *out = neg ? -result : result;
+  return 0;
+}
+
+/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
+ * <b>out</b>.  Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
+ * to the next non-space character or the EOS. */
+static int
+scan_string(const char **bufp, char *out, int width)
+{
+  int scanned_so_far = 0;
+  if (!bufp || !out || width < 0)
+    return -1;
+  while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
+    *out++ = *(*bufp)++;
+    ++scanned_so_far;
+  }
+  *out = '\0';
+  return 0;
+}
+
+/** Locale-independent, minimal, no-surprises scanf variant, accepting only a
+ * restricted pattern format.  For more info on what it supports, see
+ * tor_sscanf() documentation.  */
+int
+tor_vsscanf(const char *buf, const char *pattern, va_list ap)
+{
+  int n_matched = 0;
+
+  while (*pattern) {
+    if (*pattern != '%') {
+      if (*buf == *pattern) {
+        ++buf;
+        ++pattern;
+        continue;
+      } else {
+        return n_matched;
+      }
+    } else {
+      int width = -1;
+      int longmod = 0;
+      ++pattern;
+      if (TOR_ISDIGIT(*pattern)) {
+        width = digit_to_num(*pattern++);
+        while (TOR_ISDIGIT(*pattern)) {
+          width *= 10;
+          width += digit_to_num(*pattern++);
+          if (width > MAX_SCANF_WIDTH)
+            return -1;
+        }
+        if (!width) /* No zero-width things. */
+          return -1;
+      }
+      if (*pattern == 'l') {
+        longmod = 1;
+        ++pattern;
+      }
+      if (*pattern == 'u' || *pattern == 'x') {
+        unsigned long u;
+        const int base = (*pattern == 'u') ? 10 : 16;
+        if (!*buf)
+          return n_matched;
+        if (scan_unsigned(&buf, &u, width, base)<0)
+          return n_matched;
+        if (longmod) {
+          unsigned long *out = va_arg(ap, unsigned long *);
+          *out = u;
+        } else {
+          unsigned *out = va_arg(ap, unsigned *);
+          if (u > UINT_MAX)
+            return n_matched;
+          *out = (unsigned) u;
+        }
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 'f') {
+        double *d = va_arg(ap, double *);
+        if (!longmod)
+          return -1; /* float not supported */
+        if (!*buf)
+          return n_matched;
+        if (scan_double(&buf, d, width)<0)
+          return n_matched;
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 'd') {
+        long lng=0;
+        if (scan_signed(&buf, &lng, width)<0)
+          return n_matched;
+        if (longmod) {
+          long *out = va_arg(ap, long *);
+          *out = lng;
+        } else {
+          int *out = va_arg(ap, int *);
+#if LONG_MAX > INT_MAX
+          if (lng < INT_MIN || lng > INT_MAX)
+            return n_matched;
+#endif
+          *out = (int)lng;
+        }
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 's') {
+        char *s = va_arg(ap, char *);
+        if (longmod)
+          return -1;
+        if (width < 0)
+          return -1;
+        if (scan_string(&buf, s, width)<0)
+          return n_matched;
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 'c') {
+        char *ch = va_arg(ap, char *);
+        if (longmod)
+          return -1;
+        if (width != -1)
+          return -1;
+        if (!*buf)
+          return n_matched;
+        *ch = *buf++;
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == '%') {
+        if (*buf != '%')
+          return n_matched;
+        if (longmod)
+          return -1;
+        ++buf;
+        ++pattern;
+      } else {
+        return -1; /* Unrecognized pattern component. */
+      }
+    }
+  }
+
+  return n_matched;
+}
+
+/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
+ * and store the results in the corresponding argument fields.  Differs from
+ * sscanf in that:
+ * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
+ *     <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
+ *     <li>It does not handle arbitrarily long widths.
+ *     <li>Numbers do not consume any space characters.
+ *     <li>It is locale-independent.
+ *     <li>%u and %x do not consume any space.
+ *     <li>It returns -1 on malformed patterns.</ul>
+ *
+ * (As with other locale-independent functions, we need this to parse data that
+ * is in ASCII without worrying that the C library's locale-handling will make
+ * miscellaneous characters look like numbers, spaces, and so on.)
+ */
+int
+tor_sscanf(const char *buf, const char *pattern, ...)
+{
+  int r;
+  va_list ap;
+  va_start(ap, pattern);
+  r = tor_vsscanf(buf, pattern, ap);
+  va_end(ap);
+  return r;
+}

+ 19 - 0
src/lib/string/scanf.h

@@ -0,0 +1,19 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_UTIL_SCANF_H
+#define TOR_UTIL_SCANF_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stdarg.h>
+
+int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \
+  CHECK_SCANF(2, 0);
+int tor_sscanf(const char *buf, const char *pattern, ...)
+  CHECK_SCANF(2, 3);
+
+#endif /* !defined(TOR_UTIL_STRING_H) */

+ 340 - 0
src/lib/string/util_string.c

@@ -0,0 +1,340 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "lib/string/util_string.h"
+#include "lib/string/compat_ctype.h"
+#include "lib/err/torerr.h"
+#include "lib/ctime/di_ops.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+/** Remove from the string <b>s</b> every character which appears in
+ * <b>strip</b>. */
+void
+tor_strstrip(char *s, const char *strip)
+{
+  char *readp = s;
+  while (*readp) {
+    if (strchr(strip, *readp)) {
+      ++readp;
+    } else {
+      *s++ = *readp++;
+    }
+  }
+  *s = '\0';
+}
+
+/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to
+ * lowercase. */
+void
+tor_strlower(char *s)
+{
+  while (*s) {
+    *s = TOR_TOLOWER(*s);
+    ++s;
+  }
+}
+
+/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to
+ * lowercase. */
+void
+tor_strupper(char *s)
+{
+  while (*s) {
+    *s = TOR_TOUPPER(*s);
+    ++s;
+  }
+}
+
+/** Return 1 if every character in <b>s</b> is printable, else return 0.
+ */
+int
+tor_strisprint(const char *s)
+{
+  while (*s) {
+    if (!TOR_ISPRINT(*s))
+      return 0;
+    s++;
+  }
+  return 1;
+}
+
+/** Return 1 if no character in <b>s</b> is uppercase, else return 0.
+ */
+int
+tor_strisnonupper(const char *s)
+{
+  while (*s) {
+    if (TOR_ISUPPER(*s))
+      return 0;
+    s++;
+  }
+  return 1;
+}
+
+/** Return true iff every character in <b>s</b> is whitespace space; else
+ * return false. */
+int
+tor_strisspace(const char *s)
+{
+  while (*s) {
+    if (!TOR_ISSPACE(*s))
+      return 0;
+    s++;
+  }
+  return 1;
+}
+
+/** As strcmp, except that either string may be NULL.  The NULL string is
+ * considered to be before any non-NULL string. */
+int
+strcmp_opt(const char *s1, const char *s2)
+{
+  if (!s1) {
+    if (!s2)
+      return 0;
+    else
+      return -1;
+  } else if (!s2) {
+    return 1;
+  } else {
+    return strcmp(s1, s2);
+  }
+}
+
+/** Compares the first strlen(s2) characters of s1 with s2.  Returns as for
+ * strcmp.
+ */
+int
+strcmpstart(const char *s1, const char *s2)
+{
+  size_t n = strlen(s2);
+  return strncmp(s1, s2, n);
+}
+
+/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>,
+ * without depending on a terminating nul in s1.  Sorting order is first by
+ * length, then lexically; return values are as for strcmp.
+ */
+int
+strcmp_len(const char *s1, const char *s2, size_t s1_len)
+{
+  size_t s2_len = strlen(s2);
+  if (s1_len < s2_len)
+    return -1;
+  if (s1_len > s2_len)
+    return 1;
+  return fast_memcmp(s1, s2, s2_len);
+}
+
+/** Compares the first strlen(s2) characters of s1 with s2.  Returns as for
+ * strcasecmp.
+ */
+int
+strcasecmpstart(const char *s1, const char *s2)
+{
+  size_t n = strlen(s2);
+  return strncasecmp(s1, s2, n);
+}
+
+/** Compares the last strlen(s2) characters of s1 with s2.  Returns as for
+ * strcmp.
+ */
+int
+strcmpend(const char *s1, const char *s2)
+{
+  size_t n1 = strlen(s1), n2 = strlen(s2);
+  if (n2>n1)
+    return strcmp(s1,s2);
+  else
+    return strncmp(s1+(n1-n2), s2, n2);
+}
+
+/** Compares the last strlen(s2) characters of s1 with s2.  Returns as for
+ * strcasecmp.
+ */
+int
+strcasecmpend(const char *s1, const char *s2)
+{
+  size_t n1 = strlen(s1), n2 = strlen(s2);
+  if (n2>n1) /* then they can't be the same; figure out which is bigger */
+    return strcasecmp(s1,s2);
+  else
+    return strncasecmp(s1+(n1-n2), s2, n2);
+}
+
+/** Return a pointer to the first char of s that is not whitespace and
+ * not a comment, or to the terminating NUL if no such character exists.
+ */
+const char *
+eat_whitespace(const char *s)
+{
+  raw_assert(s);
+
+  while (1) {
+    switch (*s) {
+    case '\0':
+    default:
+      return s;
+    case ' ':
+    case '\t':
+    case '\n':
+    case '\r':
+      ++s;
+      break;
+    case '#':
+      ++s;
+      while (*s && *s != '\n')
+        ++s;
+    }
+  }
+}
+
+/** Return a pointer to the first char of s that is not whitespace and
+ * not a comment, or to the terminating NUL if no such character exists.
+ */
+const char *
+eat_whitespace_eos(const char *s, const char *eos)
+{
+  raw_assert(s);
+  raw_assert(eos && s <= eos);
+
+  while (s < eos) {
+    switch (*s) {
+    case '\0':
+    default:
+      return s;
+    case ' ':
+    case '\t':
+    case '\n':
+    case '\r':
+      ++s;
+      break;
+    case '#':
+      ++s;
+      while (s < eos && *s && *s != '\n')
+        ++s;
+    }
+  }
+  return s;
+}
+
+/** Return a pointer to the first char of s that is not a space or a tab
+ * or a \\r, or to the terminating NUL if no such character exists. */
+const char *
+eat_whitespace_no_nl(const char *s)
+{
+  while (*s == ' ' || *s == '\t' || *s == '\r')
+    ++s;
+  return s;
+}
+
+/** As eat_whitespace_no_nl, but stop at <b>eos</b> whether we have
+ * found a non-whitespace character or not. */
+const char *
+eat_whitespace_eos_no_nl(const char *s, const char *eos)
+{
+  while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r'))
+    ++s;
+  return s;
+}
+
+/** Return a pointer to the first char of s that is whitespace or <b>#</b>,
+ * or to the terminating NUL if no such character exists.
+ */
+const char *
+find_whitespace(const char *s)
+{
+  /* tor_assert(s); */
+  while (1) {
+    switch (*s)
+    {
+    case '\0':
+    case '#':
+    case ' ':
+    case '\r':
+    case '\n':
+    case '\t':
+      return s;
+    default:
+      ++s;
+    }
+  }
+}
+
+/** As find_whitespace, but stop at <b>eos</b> whether we have found a
+ * whitespace or not. */
+const char *
+find_whitespace_eos(const char *s, const char *eos)
+{
+  /* tor_assert(s); */
+  while (s < eos) {
+    switch (*s)
+    {
+    case '\0':
+    case '#':
+    case ' ':
+    case '\r':
+    case '\n':
+    case '\t':
+      return s;
+    default:
+      ++s;
+    }
+  }
+  return s;
+}
+
+/** Return the first occurrence of <b>needle</b> in <b>haystack</b> that
+ * occurs at the start of a line (that is, at the beginning of <b>haystack</b>
+ * or immediately after a newline).  Return NULL if no such string is found.
+ */
+const char *
+find_str_at_start_of_line(const char *haystack, const char *needle)
+{
+  size_t needle_len = strlen(needle);
+
+  do {
+    if (!strncmp(haystack, needle, needle_len))
+      return haystack;
+
+    haystack = strchr(haystack, '\n');
+    if (!haystack)
+      return NULL;
+    else
+      ++haystack;
+  } while (*haystack);
+
+  return NULL;
+}
+
+/** Returns true if <b>string</b> could be a C identifier.
+    A C identifier must begin with a letter or an underscore and the
+    rest of its characters can be letters, numbers or underscores. No
+    length limit is imposed. */
+int
+string_is_C_identifier(const char *string)
+{
+  size_t iter;
+  size_t length = strlen(string);
+  if (!length)
+    return 0;
+
+  for (iter = 0; iter < length ; iter++) {
+    if (iter == 0) {
+      if (!(TOR_ISALPHA(string[iter]) ||
+            string[iter] == '_'))
+        return 0;
+    } else {
+      if (!(TOR_ISALPHA(string[iter]) ||
+            TOR_ISDIGIT(string[iter]) ||
+            string[iter] == '_'))
+        return 0;
+    }
+  }
+
+  return 1;
+}

+ 42 - 0
src/lib/string/util_string.h

@@ -0,0 +1,42 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_UTIL_STRING_H
+#define TOR_UTIL_STRING_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stddef.h>
+
+/** Allowable characters in a hexadecimal string. */
+#define HEX_CHARACTERS "0123456789ABCDEFabcdef"
+void tor_strlower(char *s) ATTR_NONNULL((1));
+void tor_strupper(char *s) ATTR_NONNULL((1));
+int tor_strisprint(const char *s) ATTR_NONNULL((1));
+int tor_strisnonupper(const char *s) ATTR_NONNULL((1));
+int tor_strisspace(const char *s);
+int strcmp_opt(const char *s1, const char *s2);
+int strcmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2));
+int strcmp_len(const char *s1, const char *s2, size_t len) ATTR_NONNULL((1,2));
+int strcasecmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2));
+int strcmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2));
+int strcasecmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2));
+int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix);
+
+void tor_strstrip(char *s, const char *strip) ATTR_NONNULL((1,2));
+
+const char *eat_whitespace(const char *s);
+const char *eat_whitespace_eos(const char *s, const char *eos);
+const char *eat_whitespace_no_nl(const char *s);
+const char *eat_whitespace_eos_no_nl(const char *s, const char *eos);
+const char *find_whitespace(const char *s);
+const char *find_whitespace_eos(const char *s, const char *eos);
+const char *find_str_at_start_of_line(const char *haystack,
+                                      const char *needle);
+
+int string_is_C_identifier(const char *string);
+
+#endif /* !defined(TOR_UTIL_STRING_H) */