diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c index a2789ed1..1bbcd1bc 100644 --- a/crypto/asn1/a_mbstr.c +++ b/crypto/asn1/a_mbstr.c @@ -56,23 +56,16 @@ #include +#include #include +#include #include #include #include "asn1_locl.h" +#include "../bytestring/internal.h" -static int traverse_string(const unsigned char *p, int len, int inform, - int (*rfunc) (uint32_t value, void *in), - void *arg); -static int in_utf8(uint32_t value, void *arg); -static int out_utf8(uint32_t value, void *arg); -static int type_str(uint32_t value, void *arg); -static int cpy_asc(uint32_t value, void *arg); -static int cpy_bmp(uint32_t value, void *arg); -static int cpy_univ(uint32_t value, void *arg); -static int cpy_utf8(uint32_t value, void *arg); static int is_printable(uint32_t value); /* @@ -90,55 +83,45 @@ int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len, return ASN1_mbstring_ncopy(out, in, len, inform, mask, 0, 0); } +OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_BMPSTRING) +OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_UNIVERSALSTRING) +OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_UTF8STRING) + int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, int inform, unsigned long mask, long minsize, long maxsize) { int str_type; - int ret; char free_out; - int outform, outlen = 0; ASN1_STRING *dest; - unsigned char *p; - int nchar; + size_t nchar = 0; char strbuf[32]; - int (*cpyfunc) (uint32_t, void *) = NULL; if (len == -1) len = strlen((const char *)in); if (!mask) mask = DIRSTRING_TYPE; - /* First do a string check and work out the number of characters */ + int (*decode_func)(CBS *, uint32_t*); + int error; switch (inform) { - case MBSTRING_BMP: - if (len & 1) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING_LENGTH); - return -1; - } - nchar = len >> 1; + decode_func = cbs_get_ucs2_be; + error = ASN1_R_INVALID_BMPSTRING; break; case MBSTRING_UNIV: - if (len & 3) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING_LENGTH); - return -1; - } - nchar = len >> 2; + decode_func = cbs_get_utf32_be; + error = ASN1_R_INVALID_UNIVERSALSTRING; break; case MBSTRING_UTF8: - nchar = 0; - /* This counts the characters and does utf8 syntax checking */ - ret = traverse_string(in, len, MBSTRING_UTF8, in_utf8, &nchar); - if (ret < 0) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UTF8STRING); - return -1; - } + decode_func = cbs_get_utf8; + error = ASN1_R_INVALID_UTF8STRING; break; case MBSTRING_ASC: - nchar = len; + decode_func = cbs_get_latin1; + error = ERR_R_INTERNAL_ERROR; // Latin-1 inputs are never invalid. break; default: @@ -146,44 +129,92 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, return -1; } - if ((minsize > 0) && (nchar < minsize)) { + /* Check |minsize| and |maxsize| and work out the minimal type, if any. */ + CBS cbs; + CBS_init(&cbs, in, len); + size_t utf8_len = 0; + while (CBS_len(&cbs) != 0) { + uint32_t c; + if (!decode_func(&cbs, &c)) { + OPENSSL_PUT_ERROR(ASN1, error); + return -1; + } + if (nchar == 0 && + (inform == MBSTRING_BMP || inform == MBSTRING_UNIV) && + c == 0xfeff) { + /* Reject byte-order mark. We could drop it but that would mean + * adding ambiguity around whether a BOM was included or not when + * matching strings. + * + * For a little-endian UCS-2 string, the BOM will appear as 0xfffe + * and will be rejected as noncharacter, below. */ + OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS); + return -1; + } + + /* Update which output formats are still possible. */ + if ((mask & B_ASN1_PRINTABLESTRING) && !is_printable(c)) { + mask &= ~B_ASN1_PRINTABLESTRING; + } + if ((mask & B_ASN1_IA5STRING) && (c > 127)) { + mask &= ~B_ASN1_IA5STRING; + } + if ((mask & B_ASN1_T61STRING) && (c > 0xff)) { + mask &= ~B_ASN1_T61STRING; + } + if ((mask & B_ASN1_BMPSTRING) && (c > 0xffff)) { + mask &= ~B_ASN1_BMPSTRING; + } + if (!mask) { + OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS); + return -1; + } + + nchar++; + utf8_len += cbb_get_utf8_len(c); + } + + if (minsize > 0 && nchar < (size_t)minsize) { OPENSSL_PUT_ERROR(ASN1, ASN1_R_STRING_TOO_SHORT); BIO_snprintf(strbuf, sizeof strbuf, "%ld", minsize); ERR_add_error_data(2, "minsize=", strbuf); return -1; } - if ((maxsize > 0) && (nchar > maxsize)) { + if (maxsize > 0 && nchar > (size_t)maxsize) { OPENSSL_PUT_ERROR(ASN1, ASN1_R_STRING_TOO_LONG); BIO_snprintf(strbuf, sizeof strbuf, "%ld", maxsize); ERR_add_error_data(2, "maxsize=", strbuf); return -1; } - /* Now work out minimal type (if any) */ - if (traverse_string(in, len, inform, type_str, &mask) < 0) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS); - return -1; - } - /* Now work out output format and string type */ - outform = MBSTRING_ASC; - if (mask & B_ASN1_PRINTABLESTRING) + int (*encode_func)(CBB *, uint32_t) = cbb_add_latin1; + size_t size_estimate = nchar; + int outform = MBSTRING_ASC; + if (mask & B_ASN1_PRINTABLESTRING) { str_type = V_ASN1_PRINTABLESTRING; - else if (mask & B_ASN1_IA5STRING) + } else if (mask & B_ASN1_IA5STRING) { str_type = V_ASN1_IA5STRING; - else if (mask & B_ASN1_T61STRING) + } else if (mask & B_ASN1_T61STRING) { str_type = V_ASN1_T61STRING; - else if (mask & B_ASN1_BMPSTRING) { + } else if (mask & B_ASN1_BMPSTRING) { str_type = V_ASN1_BMPSTRING; outform = MBSTRING_BMP; + encode_func = cbb_add_ucs2_be; + size_estimate = 2 * nchar; } else if (mask & B_ASN1_UNIVERSALSTRING) { str_type = V_ASN1_UNIVERSALSTRING; + encode_func = cbb_add_utf32_be; + size_estimate = 4 * nchar; outform = MBSTRING_UNIV; } else { str_type = V_ASN1_UTF8STRING; outform = MBSTRING_UTF8; + encode_func = cbb_add_utf8; + size_estimate = utf8_len; } + if (!out) return str_type; if (*out) { @@ -204,6 +235,7 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, } *out = dest; } + /* If both the same type just copy across */ if (inform == outform) { if (!ASN1_STRING_set(dest, in, len)) { @@ -213,179 +245,41 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, return str_type; } - /* Work out how much space the destination will need */ - switch (outform) { - case MBSTRING_ASC: - outlen = nchar; - cpyfunc = cpy_asc; - break; - - case MBSTRING_BMP: - outlen = nchar << 1; - cpyfunc = cpy_bmp; - break; - - case MBSTRING_UNIV: - outlen = nchar << 2; - cpyfunc = cpy_univ; - break; - - case MBSTRING_UTF8: - outlen = 0; - traverse_string(in, len, inform, out_utf8, &outlen); - cpyfunc = cpy_utf8; - break; - } - if (!(p = OPENSSL_malloc(outlen + 1))) { - if (free_out) - ASN1_STRING_free(dest); + CBB cbb; + if (!CBB_init(&cbb, size_estimate + 1)) { OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE); - return -1; + goto err; } - dest->length = outlen; - dest->data = p; - p[outlen] = 0; - traverse_string(in, len, inform, cpyfunc, &p); + CBS_init(&cbs, in, len); + while (CBS_len(&cbs) != 0) { + uint32_t c; + if (!decode_func(&cbs, &c) || + !encode_func(&cbb, c)) { + OPENSSL_PUT_ERROR(ASN1, ERR_R_INTERNAL_ERROR); + goto err; + } + } + uint8_t *data = NULL; + size_t data_len; + if (/* OpenSSL historically NUL-terminated this value with a single byte, + * even for |MBSTRING_BMP| and |MBSTRING_UNIV|. */ + !CBB_add_u8(&cbb, 0) || + !CBB_finish(&cbb, &data, &data_len) || + data_len < 1 || + data_len > INT_MAX) { + OPENSSL_PUT_ERROR(ASN1, ERR_R_INTERNAL_ERROR); + OPENSSL_free(data); + goto err; + } + dest->length = (int)(data_len - 1); + dest->data = data; return str_type; -} -/* - * This function traverses a string and passes the value of each character to - * an optional function along with a void * argument. - */ - -static int traverse_string(const unsigned char *p, int len, int inform, - int (*rfunc) (uint32_t value, void *in), - void *arg) -{ - uint32_t value; - int ret; - while (len) { - if (inform == MBSTRING_ASC) { - value = *p++; - len--; - } else if (inform == MBSTRING_BMP) { - value = *p++ << 8; - value |= *p++; - len -= 2; - } else if (inform == MBSTRING_UNIV) { - value = ((uint32_t)*p++) << 24; - value |= ((uint32_t)*p++) << 16; - value |= *p++ << 8; - value |= *p++; - len -= 4; - } else { - ret = UTF8_getc(p, len, &value); - if (ret < 0) - return -1; - len -= ret; - p += ret; - } - if (rfunc) { - ret = rfunc(value, arg); - if (ret <= 0) - return ret; - } - } - return 1; -} - -/* Various utility functions for traverse_string */ - -/* Just count number of characters */ - -static int in_utf8(uint32_t value, void *arg) -{ - int *nchar; - nchar = arg; - (*nchar)++; - return 1; -} - -/* Determine size of output as a UTF8 String */ - -static int out_utf8(uint32_t value, void *arg) -{ - int *outlen; - outlen = arg; - *outlen += UTF8_putc(NULL, -1, value); - return 1; -} - -/* - * Determine the "type" of a string: check each character against a supplied - * "mask". - */ - -static int type_str(uint32_t value, void *arg) -{ - unsigned long types; - types = *((unsigned long *)arg); - if ((types & B_ASN1_PRINTABLESTRING) && !is_printable(value)) - types &= ~B_ASN1_PRINTABLESTRING; - if ((types & B_ASN1_IA5STRING) && (value > 127)) - types &= ~B_ASN1_IA5STRING; - if ((types & B_ASN1_T61STRING) && (value > 0xff)) - types &= ~B_ASN1_T61STRING; - if ((types & B_ASN1_BMPSTRING) && (value > 0xffff)) - types &= ~B_ASN1_BMPSTRING; - if (!types) - return -1; - *((unsigned long *)arg) = types; - return 1; -} - -/* Copy one byte per character ASCII like strings */ - -static int cpy_asc(uint32_t value, void *arg) -{ - unsigned char **p, *q; - p = arg; - q = *p; - *q = (unsigned char)value; - (*p)++; - return 1; -} - -/* Copy two byte per character BMPStrings */ - -static int cpy_bmp(uint32_t value, void *arg) -{ - unsigned char **p, *q; - p = arg; - q = *p; - *q++ = (unsigned char)((value >> 8) & 0xff); - *q = (unsigned char)(value & 0xff); - *p += 2; - return 1; -} - -/* Copy four byte per character UniversalStrings */ - -static int cpy_univ(uint32_t value, void *arg) -{ - unsigned char **p, *q; - p = arg; - q = *p; - *q++ = (unsigned char)((value >> 24) & 0xff); - *q++ = (unsigned char)((value >> 16) & 0xff); - *q++ = (unsigned char)((value >> 8) & 0xff); - *q = (unsigned char)(value & 0xff); - *p += 4; - return 1; -} - -/* Copy to a UTF8String */ - -static int cpy_utf8(uint32_t value, void *arg) -{ - unsigned char **p; - int ret; - p = arg; - /* We already know there is enough room so pass 0xff as the length */ - ret = UTF8_putc(*p, 0xff, value); - *p += ret; - return 1; + err: + if (free_out) + ASN1_STRING_free(dest); + CBB_cleanup(&cbb); + return -1; } /* Return 1 if the character is permitted in a PrintableString */ diff --git a/crypto/err/asn1.errordata b/crypto/err/asn1.errordata index 56cbbe52..271561bc 100644 --- a/crypto/err/asn1.errordata +++ b/crypto/err/asn1.errordata @@ -40,14 +40,14 @@ ASN1,138,ILLEGAL_TIME_VALUE ASN1,139,INTEGER_NOT_ASCII_FORMAT ASN1,140,INTEGER_TOO_LARGE_FOR_LONG ASN1,141,INVALID_BIT_STRING_BITS_LEFT -ASN1,142,INVALID_BMPSTRING_LENGTH +ASN1,142,INVALID_BMPSTRING ASN1,143,INVALID_DIGIT ASN1,144,INVALID_MODIFIER ASN1,145,INVALID_NUMBER ASN1,146,INVALID_OBJECT_ENCODING ASN1,147,INVALID_SEPARATOR ASN1,148,INVALID_TIME_FORMAT -ASN1,149,INVALID_UNIVERSALSTRING_LENGTH +ASN1,149,INVALID_UNIVERSALSTRING ASN1,150,INVALID_UTF8STRING ASN1,151,LIST_ERROR ASN1,152,MISSING_ASN1_EOS diff --git a/crypto/test/test_util.cc b/crypto/test/test_util.cc index 493b1247..4ad777f5 100644 --- a/crypto/test/test_util.cc +++ b/crypto/test/test_util.cc @@ -30,6 +30,10 @@ void hexdump(FILE *fp, const char *msg, const void *in, size_t len) { } std::ostream &operator<<(std::ostream &os, const Bytes &in) { + if (in.len == 0) { + return os << ""; + } + // Print a byte slice as hex. static const char hex[] = "0123456789abcdef"; for (size_t i = 0; i < in.len; i++) { diff --git a/crypto/x509/a_strex.c b/crypto/x509/a_strex.c index c0c346df..6dc183ac 100644 --- a/crypto/x509/a_strex.c +++ b/crypto/x509/a_strex.c @@ -189,13 +189,13 @@ static int do_buf(unsigned char *buf, int buflen, switch (charwidth) { case 4: if (buflen & 3) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING_LENGTH); + OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING); return -1; } break; case 2: if (buflen & 1) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING_LENGTH); + OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING); return -1; } break; diff --git a/crypto/x509/x509_test.cc b/crypto/x509/x509_test.cc index 7615324d..9c99aee4 100644 --- a/crypto/x509/x509_test.cc +++ b/crypto/x509/x509_test.cc @@ -20,8 +20,8 @@ #include #include -#include #include +#include #include #include #include @@ -30,6 +30,7 @@ #include #include "../internal.h" +#include "../test/test_util.h" std::string GetTestData(const char *path); @@ -1283,3 +1284,62 @@ TEST(X509Test, X509NameSet) { EXPECT_EQ(X509_NAME_ENTRY_set(X509_NAME_get_entry(name.get(), 1)), 1); EXPECT_EQ(X509_NAME_ENTRY_set(X509_NAME_get_entry(name.get(), 2)), 2); } + +TEST(X509Test, StringDecoding) { + static const struct { + std::vector in; + int type; + const char *expected; + } kTests[] = { + // Non-minimal, two-byte UTF-8. + {{0xc0, 0x81}, V_ASN1_UTF8STRING, nullptr}, + // Non-minimal, three-byte UTF-8. + {{0xe0, 0x80, 0x81}, V_ASN1_UTF8STRING, nullptr}, + // Non-minimal, four-byte UTF-8. + {{0xf0, 0x80, 0x80, 0x81}, V_ASN1_UTF8STRING, nullptr}, + // Truncated, four-byte UTF-8. + {{0xf0, 0x80, 0x80}, V_ASN1_UTF8STRING, nullptr}, + // Low-surrogate value. + {{0xed, 0xa0, 0x80}, V_ASN1_UTF8STRING, nullptr}, + // High-surrogate value. + {{0xed, 0xb0, 0x81}, V_ASN1_UTF8STRING, nullptr}, + // Initial BOMs should be rejected from UCS-2 and UCS-4. + {{0xfe, 0xff, 0, 88}, V_ASN1_BMPSTRING, nullptr}, + {{0, 0, 0xfe, 0xff, 0, 0, 0, 88}, V_ASN1_UNIVERSALSTRING, nullptr}, + // Otherwise, BOMs should pass through. + {{0, 88, 0xfe, 0xff}, V_ASN1_BMPSTRING, "X\xef\xbb\xbf"}, + {{0, 0, 0, 88, 0, 0, 0xfe, 0xff}, V_ASN1_UNIVERSALSTRING, + "X\xef\xbb\xbf"}, + // The maximum code-point should pass though. + {{0, 16, 0xff, 0xfd}, V_ASN1_UNIVERSALSTRING, "\xf4\x8f\xbf\xbd"}, + // Values outside the Unicode space should not. + {{0, 17, 0, 0}, V_ASN1_UNIVERSALSTRING, nullptr}, + // Non-characters should be rejected. + {{0, 1, 0xff, 0xff}, V_ASN1_UNIVERSALSTRING, nullptr}, + {{0, 1, 0xff, 0xfe}, V_ASN1_UNIVERSALSTRING, nullptr}, + {{0, 0, 0xfd, 0xd5}, V_ASN1_UNIVERSALSTRING, nullptr}, + // BMPString is UCS-2, not UTF-16, so surrogate pairs are invalid. + {{0xd8, 0, 0xdc, 1}, V_ASN1_BMPSTRING, nullptr}, + }; + + for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kTests); i++) { + SCOPED_TRACE(i); + const auto& test = kTests[i]; + ASN1_STRING s; + s.type = test.type; + s.data = const_cast(test.in.data()); + s.length = test.in.size(); + + uint8_t *utf8; + const int utf8_len = ASN1_STRING_to_UTF8(&utf8, &s); + EXPECT_EQ(utf8_len < 0, test.expected == nullptr); + if (utf8_len >= 0) { + if (test.expected != nullptr) { + EXPECT_EQ(Bytes(test.expected), Bytes(utf8, utf8_len)); + } + OPENSSL_free(utf8); + } else { + ERR_clear_error(); + } + } +} diff --git a/include/openssl/asn1.h b/include/openssl/asn1.h index f2e92a77..f7b6b861 100644 --- a/include/openssl/asn1.h +++ b/include/openssl/asn1.h @@ -152,6 +152,9 @@ extern "C" { /* For use with ASN1_mbstring_copy() */ #define MBSTRING_FLAG 0x1000 #define MBSTRING_UTF8 (MBSTRING_FLAG) +/* |MBSTRING_ASC| refers to Latin-1, not ASCII. It is used with TeletexString + * which, in turn, is treated as Latin-1 rather than T.61 by OpenSSL and most + * other software. */ #define MBSTRING_ASC (MBSTRING_FLAG|1) #define MBSTRING_BMP (MBSTRING_FLAG|2) #define MBSTRING_UNIV (MBSTRING_FLAG|4) @@ -926,14 +929,14 @@ BORINGSSL_MAKE_DELETER(ASN1_TYPE, ASN1_TYPE_free) #define ASN1_R_INTEGER_NOT_ASCII_FORMAT 139 #define ASN1_R_INTEGER_TOO_LARGE_FOR_LONG 140 #define ASN1_R_INVALID_BIT_STRING_BITS_LEFT 141 -#define ASN1_R_INVALID_BMPSTRING_LENGTH 142 +#define ASN1_R_INVALID_BMPSTRING 142 #define ASN1_R_INVALID_DIGIT 143 #define ASN1_R_INVALID_MODIFIER 144 #define ASN1_R_INVALID_NUMBER 145 #define ASN1_R_INVALID_OBJECT_ENCODING 146 #define ASN1_R_INVALID_SEPARATOR 147 #define ASN1_R_INVALID_TIME_FORMAT 148 -#define ASN1_R_INVALID_UNIVERSALSTRING_LENGTH 149 +#define ASN1_R_INVALID_UNIVERSALSTRING 149 #define ASN1_R_INVALID_UTF8STRING 150 #define ASN1_R_LIST_ERROR 151 #define ASN1_R_MISSING_ASN1_EOS 152