Use new encoding functions in ASN1_mbstring_ncopy.

Update-Note: This changes causes BoringSSL to be stricter about handling
Unicode strings:
  · Reject code points outside of Unicode
  · Reject surrogate values
  · Don't allow invalid UTF-8 to pass through when the source claims to
    be UTF-8 already.
  · Drop byte-order marks.

Previously, for example, a UniversalString could contain a large-valued
code point that would cause the UTF-8 encoder to emit invalid UTF-8.

Change-Id: I94d9db7796b70491b04494be84249907ff8fb46c
Reviewed-on: https://boringssl-review.googlesource.com/28325
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
David Benjamin 2018-05-10 16:23:03 -04:00 committed by Adam Langley
parent 99767ecdd4
commit ae153bb9a6
6 changed files with 185 additions and 224 deletions

View File

@ -56,23 +56,16 @@
#include <openssl/asn1.h>
#include <limits.h>
#include <string.h>
#include <openssl/bytestring.h>
#include <openssl/err.h>
#include <openssl/mem.h>
#include "asn1_locl.h"
#include "../bytestring/internal.h"
static int traverse_string(const unsigned char *p, int len, int inform,
int (*rfunc) (uint32_t value, void *in),
void *arg);
static int in_utf8(uint32_t value, void *arg);
static int out_utf8(uint32_t value, void *arg);
static int type_str(uint32_t value, void *arg);
static int cpy_asc(uint32_t value, void *arg);
static int cpy_bmp(uint32_t value, void *arg);
static int cpy_univ(uint32_t value, void *arg);
static int cpy_utf8(uint32_t value, void *arg);
static int is_printable(uint32_t value);
/*
@ -90,55 +83,45 @@ int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len,
return ASN1_mbstring_ncopy(out, in, len, inform, mask, 0, 0);
}
OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_BMPSTRING)
OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_UNIVERSALSTRING)
OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_UTF8STRING)
int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
int inform, unsigned long mask,
long minsize, long maxsize)
{
int str_type;
int ret;
char free_out;
int outform, outlen = 0;
ASN1_STRING *dest;
unsigned char *p;
int nchar;
size_t nchar = 0;
char strbuf[32];
int (*cpyfunc) (uint32_t, void *) = NULL;
if (len == -1)
len = strlen((const char *)in);
if (!mask)
mask = DIRSTRING_TYPE;
/* First do a string check and work out the number of characters */
int (*decode_func)(CBS *, uint32_t*);
int error;
switch (inform) {
case MBSTRING_BMP:
if (len & 1) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING_LENGTH);
return -1;
}
nchar = len >> 1;
decode_func = cbs_get_ucs2_be;
error = ASN1_R_INVALID_BMPSTRING;
break;
case MBSTRING_UNIV:
if (len & 3) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING_LENGTH);
return -1;
}
nchar = len >> 2;
decode_func = cbs_get_utf32_be;
error = ASN1_R_INVALID_UNIVERSALSTRING;
break;
case MBSTRING_UTF8:
nchar = 0;
/* This counts the characters and does utf8 syntax checking */
ret = traverse_string(in, len, MBSTRING_UTF8, in_utf8, &nchar);
if (ret < 0) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UTF8STRING);
return -1;
}
decode_func = cbs_get_utf8;
error = ASN1_R_INVALID_UTF8STRING;
break;
case MBSTRING_ASC:
nchar = len;
decode_func = cbs_get_latin1;
error = ERR_R_INTERNAL_ERROR; // Latin-1 inputs are never invalid.
break;
default:
@ -146,44 +129,92 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
return -1;
}
if ((minsize > 0) && (nchar < minsize)) {
/* Check |minsize| and |maxsize| and work out the minimal type, if any. */
CBS cbs;
CBS_init(&cbs, in, len);
size_t utf8_len = 0;
while (CBS_len(&cbs) != 0) {
uint32_t c;
if (!decode_func(&cbs, &c)) {
OPENSSL_PUT_ERROR(ASN1, error);
return -1;
}
if (nchar == 0 &&
(inform == MBSTRING_BMP || inform == MBSTRING_UNIV) &&
c == 0xfeff) {
/* Reject byte-order mark. We could drop it but that would mean
* adding ambiguity around whether a BOM was included or not when
* matching strings.
*
* For a little-endian UCS-2 string, the BOM will appear as 0xfffe
* and will be rejected as noncharacter, below. */
OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
return -1;
}
/* Update which output formats are still possible. */
if ((mask & B_ASN1_PRINTABLESTRING) && !is_printable(c)) {
mask &= ~B_ASN1_PRINTABLESTRING;
}
if ((mask & B_ASN1_IA5STRING) && (c > 127)) {
mask &= ~B_ASN1_IA5STRING;
}
if ((mask & B_ASN1_T61STRING) && (c > 0xff)) {
mask &= ~B_ASN1_T61STRING;
}
if ((mask & B_ASN1_BMPSTRING) && (c > 0xffff)) {
mask &= ~B_ASN1_BMPSTRING;
}
if (!mask) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
return -1;
}
nchar++;
utf8_len += cbb_get_utf8_len(c);
}
if (minsize > 0 && nchar < (size_t)minsize) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_STRING_TOO_SHORT);
BIO_snprintf(strbuf, sizeof strbuf, "%ld", minsize);
ERR_add_error_data(2, "minsize=", strbuf);
return -1;
}
if ((maxsize > 0) && (nchar > maxsize)) {
if (maxsize > 0 && nchar > (size_t)maxsize) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_STRING_TOO_LONG);
BIO_snprintf(strbuf, sizeof strbuf, "%ld", maxsize);
ERR_add_error_data(2, "maxsize=", strbuf);
return -1;
}
/* Now work out minimal type (if any) */
if (traverse_string(in, len, inform, type_str, &mask) < 0) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
return -1;
}
/* Now work out output format and string type */
outform = MBSTRING_ASC;
if (mask & B_ASN1_PRINTABLESTRING)
int (*encode_func)(CBB *, uint32_t) = cbb_add_latin1;
size_t size_estimate = nchar;
int outform = MBSTRING_ASC;
if (mask & B_ASN1_PRINTABLESTRING) {
str_type = V_ASN1_PRINTABLESTRING;
else if (mask & B_ASN1_IA5STRING)
} else if (mask & B_ASN1_IA5STRING) {
str_type = V_ASN1_IA5STRING;
else if (mask & B_ASN1_T61STRING)
} else if (mask & B_ASN1_T61STRING) {
str_type = V_ASN1_T61STRING;
else if (mask & B_ASN1_BMPSTRING) {
} else if (mask & B_ASN1_BMPSTRING) {
str_type = V_ASN1_BMPSTRING;
outform = MBSTRING_BMP;
encode_func = cbb_add_ucs2_be;
size_estimate = 2 * nchar;
} else if (mask & B_ASN1_UNIVERSALSTRING) {
str_type = V_ASN1_UNIVERSALSTRING;
encode_func = cbb_add_utf32_be;
size_estimate = 4 * nchar;
outform = MBSTRING_UNIV;
} else {
str_type = V_ASN1_UTF8STRING;
outform = MBSTRING_UTF8;
encode_func = cbb_add_utf8;
size_estimate = utf8_len;
}
if (!out)
return str_type;
if (*out) {
@ -204,6 +235,7 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
}
*out = dest;
}
/* If both the same type just copy across */
if (inform == outform) {
if (!ASN1_STRING_set(dest, in, len)) {
@ -213,179 +245,41 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
return str_type;
}
/* Work out how much space the destination will need */
switch (outform) {
case MBSTRING_ASC:
outlen = nchar;
cpyfunc = cpy_asc;
break;
case MBSTRING_BMP:
outlen = nchar << 1;
cpyfunc = cpy_bmp;
break;
case MBSTRING_UNIV:
outlen = nchar << 2;
cpyfunc = cpy_univ;
break;
case MBSTRING_UTF8:
outlen = 0;
traverse_string(in, len, inform, out_utf8, &outlen);
cpyfunc = cpy_utf8;
break;
}
if (!(p = OPENSSL_malloc(outlen + 1))) {
if (free_out)
ASN1_STRING_free(dest);
CBB cbb;
if (!CBB_init(&cbb, size_estimate + 1)) {
OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
return -1;
goto err;
}
dest->length = outlen;
dest->data = p;
p[outlen] = 0;
traverse_string(in, len, inform, cpyfunc, &p);
CBS_init(&cbs, in, len);
while (CBS_len(&cbs) != 0) {
uint32_t c;
if (!decode_func(&cbs, &c) ||
!encode_func(&cbb, c)) {
OPENSSL_PUT_ERROR(ASN1, ERR_R_INTERNAL_ERROR);
goto err;
}
}
uint8_t *data = NULL;
size_t data_len;
if (/* OpenSSL historically NUL-terminated this value with a single byte,
* even for |MBSTRING_BMP| and |MBSTRING_UNIV|. */
!CBB_add_u8(&cbb, 0) ||
!CBB_finish(&cbb, &data, &data_len) ||
data_len < 1 ||
data_len > INT_MAX) {
OPENSSL_PUT_ERROR(ASN1, ERR_R_INTERNAL_ERROR);
OPENSSL_free(data);
goto err;
}
dest->length = (int)(data_len - 1);
dest->data = data;
return str_type;
}
/*
* This function traverses a string and passes the value of each character to
* an optional function along with a void * argument.
*/
static int traverse_string(const unsigned char *p, int len, int inform,
int (*rfunc) (uint32_t value, void *in),
void *arg)
{
uint32_t value;
int ret;
while (len) {
if (inform == MBSTRING_ASC) {
value = *p++;
len--;
} else if (inform == MBSTRING_BMP) {
value = *p++ << 8;
value |= *p++;
len -= 2;
} else if (inform == MBSTRING_UNIV) {
value = ((uint32_t)*p++) << 24;
value |= ((uint32_t)*p++) << 16;
value |= *p++ << 8;
value |= *p++;
len -= 4;
} else {
ret = UTF8_getc(p, len, &value);
if (ret < 0)
return -1;
len -= ret;
p += ret;
}
if (rfunc) {
ret = rfunc(value, arg);
if (ret <= 0)
return ret;
}
}
return 1;
}
/* Various utility functions for traverse_string */
/* Just count number of characters */
static int in_utf8(uint32_t value, void *arg)
{
int *nchar;
nchar = arg;
(*nchar)++;
return 1;
}
/* Determine size of output as a UTF8 String */
static int out_utf8(uint32_t value, void *arg)
{
int *outlen;
outlen = arg;
*outlen += UTF8_putc(NULL, -1, value);
return 1;
}
/*
* Determine the "type" of a string: check each character against a supplied
* "mask".
*/
static int type_str(uint32_t value, void *arg)
{
unsigned long types;
types = *((unsigned long *)arg);
if ((types & B_ASN1_PRINTABLESTRING) && !is_printable(value))
types &= ~B_ASN1_PRINTABLESTRING;
if ((types & B_ASN1_IA5STRING) && (value > 127))
types &= ~B_ASN1_IA5STRING;
if ((types & B_ASN1_T61STRING) && (value > 0xff))
types &= ~B_ASN1_T61STRING;
if ((types & B_ASN1_BMPSTRING) && (value > 0xffff))
types &= ~B_ASN1_BMPSTRING;
if (!types)
return -1;
*((unsigned long *)arg) = types;
return 1;
}
/* Copy one byte per character ASCII like strings */
static int cpy_asc(uint32_t value, void *arg)
{
unsigned char **p, *q;
p = arg;
q = *p;
*q = (unsigned char)value;
(*p)++;
return 1;
}
/* Copy two byte per character BMPStrings */
static int cpy_bmp(uint32_t value, void *arg)
{
unsigned char **p, *q;
p = arg;
q = *p;
*q++ = (unsigned char)((value >> 8) & 0xff);
*q = (unsigned char)(value & 0xff);
*p += 2;
return 1;
}
/* Copy four byte per character UniversalStrings */
static int cpy_univ(uint32_t value, void *arg)
{
unsigned char **p, *q;
p = arg;
q = *p;
*q++ = (unsigned char)((value >> 24) & 0xff);
*q++ = (unsigned char)((value >> 16) & 0xff);
*q++ = (unsigned char)((value >> 8) & 0xff);
*q = (unsigned char)(value & 0xff);
*p += 4;
return 1;
}
/* Copy to a UTF8String */
static int cpy_utf8(uint32_t value, void *arg)
{
unsigned char **p;
int ret;
p = arg;
/* We already know there is enough room so pass 0xff as the length */
ret = UTF8_putc(*p, 0xff, value);
*p += ret;
return 1;
err:
if (free_out)
ASN1_STRING_free(dest);
CBB_cleanup(&cbb);
return -1;
}
/* Return 1 if the character is permitted in a PrintableString */

View File

@ -40,14 +40,14 @@ ASN1,138,ILLEGAL_TIME_VALUE
ASN1,139,INTEGER_NOT_ASCII_FORMAT
ASN1,140,INTEGER_TOO_LARGE_FOR_LONG
ASN1,141,INVALID_BIT_STRING_BITS_LEFT
ASN1,142,INVALID_BMPSTRING_LENGTH
ASN1,142,INVALID_BMPSTRING
ASN1,143,INVALID_DIGIT
ASN1,144,INVALID_MODIFIER
ASN1,145,INVALID_NUMBER
ASN1,146,INVALID_OBJECT_ENCODING
ASN1,147,INVALID_SEPARATOR
ASN1,148,INVALID_TIME_FORMAT
ASN1,149,INVALID_UNIVERSALSTRING_LENGTH
ASN1,149,INVALID_UNIVERSALSTRING
ASN1,150,INVALID_UTF8STRING
ASN1,151,LIST_ERROR
ASN1,152,MISSING_ASN1_EOS

View File

@ -30,6 +30,10 @@ void hexdump(FILE *fp, const char *msg, const void *in, size_t len) {
}
std::ostream &operator<<(std::ostream &os, const Bytes &in) {
if (in.len == 0) {
return os << "<empty Bytes>";
}
// Print a byte slice as hex.
static const char hex[] = "0123456789abcdef";
for (size_t i = 0; i < in.len; i++) {

View File

@ -189,13 +189,13 @@ static int do_buf(unsigned char *buf, int buflen,
switch (charwidth) {
case 4:
if (buflen & 3) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING_LENGTH);
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING);
return -1;
}
break;
case 2:
if (buflen & 1) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING_LENGTH);
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING);
return -1;
}
break;

View File

@ -20,8 +20,8 @@
#include <openssl/bio.h>
#include <openssl/bytestring.h>
#include <openssl/curve25519.h>
#include <openssl/crypto.h>
#include <openssl/curve25519.h>
#include <openssl/digest.h>
#include <openssl/err.h>
#include <openssl/pem.h>
@ -30,6 +30,7 @@
#include <openssl/x509v3.h>
#include "../internal.h"
#include "../test/test_util.h"
std::string GetTestData(const char *path);
@ -1283,3 +1284,62 @@ TEST(X509Test, X509NameSet) {
EXPECT_EQ(X509_NAME_ENTRY_set(X509_NAME_get_entry(name.get(), 1)), 1);
EXPECT_EQ(X509_NAME_ENTRY_set(X509_NAME_get_entry(name.get(), 2)), 2);
}
TEST(X509Test, StringDecoding) {
static const struct {
std::vector<uint8_t> in;
int type;
const char *expected;
} kTests[] = {
// Non-minimal, two-byte UTF-8.
{{0xc0, 0x81}, V_ASN1_UTF8STRING, nullptr},
// Non-minimal, three-byte UTF-8.
{{0xe0, 0x80, 0x81}, V_ASN1_UTF8STRING, nullptr},
// Non-minimal, four-byte UTF-8.
{{0xf0, 0x80, 0x80, 0x81}, V_ASN1_UTF8STRING, nullptr},
// Truncated, four-byte UTF-8.
{{0xf0, 0x80, 0x80}, V_ASN1_UTF8STRING, nullptr},
// Low-surrogate value.
{{0xed, 0xa0, 0x80}, V_ASN1_UTF8STRING, nullptr},
// High-surrogate value.
{{0xed, 0xb0, 0x81}, V_ASN1_UTF8STRING, nullptr},
// Initial BOMs should be rejected from UCS-2 and UCS-4.
{{0xfe, 0xff, 0, 88}, V_ASN1_BMPSTRING, nullptr},
{{0, 0, 0xfe, 0xff, 0, 0, 0, 88}, V_ASN1_UNIVERSALSTRING, nullptr},
// Otherwise, BOMs should pass through.
{{0, 88, 0xfe, 0xff}, V_ASN1_BMPSTRING, "X\xef\xbb\xbf"},
{{0, 0, 0, 88, 0, 0, 0xfe, 0xff}, V_ASN1_UNIVERSALSTRING,
"X\xef\xbb\xbf"},
// The maximum code-point should pass though.
{{0, 16, 0xff, 0xfd}, V_ASN1_UNIVERSALSTRING, "\xf4\x8f\xbf\xbd"},
// Values outside the Unicode space should not.
{{0, 17, 0, 0}, V_ASN1_UNIVERSALSTRING, nullptr},
// Non-characters should be rejected.
{{0, 1, 0xff, 0xff}, V_ASN1_UNIVERSALSTRING, nullptr},
{{0, 1, 0xff, 0xfe}, V_ASN1_UNIVERSALSTRING, nullptr},
{{0, 0, 0xfd, 0xd5}, V_ASN1_UNIVERSALSTRING, nullptr},
// BMPString is UCS-2, not UTF-16, so surrogate pairs are invalid.
{{0xd8, 0, 0xdc, 1}, V_ASN1_BMPSTRING, nullptr},
};
for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kTests); i++) {
SCOPED_TRACE(i);
const auto& test = kTests[i];
ASN1_STRING s;
s.type = test.type;
s.data = const_cast<uint8_t*>(test.in.data());
s.length = test.in.size();
uint8_t *utf8;
const int utf8_len = ASN1_STRING_to_UTF8(&utf8, &s);
EXPECT_EQ(utf8_len < 0, test.expected == nullptr);
if (utf8_len >= 0) {
if (test.expected != nullptr) {
EXPECT_EQ(Bytes(test.expected), Bytes(utf8, utf8_len));
}
OPENSSL_free(utf8);
} else {
ERR_clear_error();
}
}
}

View File

@ -152,6 +152,9 @@ extern "C" {
/* For use with ASN1_mbstring_copy() */
#define MBSTRING_FLAG 0x1000
#define MBSTRING_UTF8 (MBSTRING_FLAG)
/* |MBSTRING_ASC| refers to Latin-1, not ASCII. It is used with TeletexString
* which, in turn, is treated as Latin-1 rather than T.61 by OpenSSL and most
* other software. */
#define MBSTRING_ASC (MBSTRING_FLAG|1)
#define MBSTRING_BMP (MBSTRING_FLAG|2)
#define MBSTRING_UNIV (MBSTRING_FLAG|4)
@ -926,14 +929,14 @@ BORINGSSL_MAKE_DELETER(ASN1_TYPE, ASN1_TYPE_free)
#define ASN1_R_INTEGER_NOT_ASCII_FORMAT 139
#define ASN1_R_INTEGER_TOO_LARGE_FOR_LONG 140
#define ASN1_R_INVALID_BIT_STRING_BITS_LEFT 141
#define ASN1_R_INVALID_BMPSTRING_LENGTH 142
#define ASN1_R_INVALID_BMPSTRING 142
#define ASN1_R_INVALID_DIGIT 143
#define ASN1_R_INVALID_MODIFIER 144
#define ASN1_R_INVALID_NUMBER 145
#define ASN1_R_INVALID_OBJECT_ENCODING 146
#define ASN1_R_INVALID_SEPARATOR 147
#define ASN1_R_INVALID_TIME_FORMAT 148
#define ASN1_R_INVALID_UNIVERSALSTRING_LENGTH 149
#define ASN1_R_INVALID_UNIVERSALSTRING 149
#define ASN1_R_INVALID_UTF8STRING 150
#define ASN1_R_LIST_ERROR 151
#define ASN1_R_MISSING_ASN1_EOS 152