|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- /* Copyright (c) 2018, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
- #include <openssl/bytestring.h>
-
- #include "internal.h"
-
-
- static int is_valid_code_point(uint32_t v) {
- // References in the following are to Unicode 9.0.0.
- if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
- v > 0x10ffff ||
- // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
- // (3.4 D14)
- (v & 0xfffe) == 0xfffe ||
- (v >= 0xfdd0 && v <= 0xfdef) ||
- // Surrogate code points are invalid (3.2 C1).
- (v >= 0xd800 && v <= 0xdfff)) {
- return 0;
- }
- return 1;
- }
-
- // BOTTOM_BITS returns a byte with the bottom |n| bits set.
- #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
-
- // TOP_BITS returns a byte with the top |n| bits set.
- #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
-
- int cbs_get_utf8(CBS *cbs, uint32_t *out) {
- uint8_t c;
- if (!CBS_get_u8(cbs, &c)) {
- return 0;
- }
- if (c <= 0x7f) {
- *out = c;
- return 1;
- }
- uint32_t v, lower_bound;
- size_t len;
- if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
- v = c & BOTTOM_BITS(5);
- len = 1;
- lower_bound = 0x80;
- } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
- v = c & BOTTOM_BITS(4);
- len = 2;
- lower_bound = 0x800;
- } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
- v = c & BOTTOM_BITS(3);
- len = 3;
- lower_bound = 0x10000;
- } else {
- return 0;
- }
- for (size_t i = 0; i < len; i++) {
- if (!CBS_get_u8(cbs, &c) ||
- (c & TOP_BITS(2)) != TOP_BITS(1)) {
- return 0;
- }
- v <<= 6;
- v |= c & BOTTOM_BITS(6);
- }
- if (!is_valid_code_point(v) ||
- v < lower_bound) {
- return 0;
- }
- *out = v;
- return 1;
- }
-
- int cbs_get_latin1(CBS *cbs, uint32_t *out) {
- uint8_t c;
- if (!CBS_get_u8(cbs, &c)) {
- return 0;
- }
- *out = c;
- return 1;
- }
-
- int cbs_get_ucs2_be(CBS *cbs, uint32_t *out) {
- // Note UCS-2 (used by BMPString) does not support surrogates.
- uint16_t c;
- if (!CBS_get_u16(cbs, &c) ||
- !is_valid_code_point(c)) {
- return 0;
- }
- *out = c;
- return 1;
- }
-
- int cbs_get_utf32_be(CBS *cbs, uint32_t *out) {
- return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
- }
-
- size_t cbb_get_utf8_len(uint32_t u) {
- if (u <= 0x7f) {
- return 1;
- }
- if (u <= 0x7ff) {
- return 2;
- }
- if (u <= 0xffff) {
- return 3;
- }
- return 4;
- }
-
- int cbb_add_utf8(CBB *cbb, uint32_t u) {
- if (!is_valid_code_point(u)) {
- return 0;
- }
- if (u <= 0x7f) {
- return CBB_add_u8(cbb, (uint8_t)u);
- }
- if (u <= 0x7ff) {
- return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
- CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
- }
- if (u <= 0xffff) {
- return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
- CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
- CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
- }
- if (u <= 0x10ffff) {
- return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
- CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
- CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
- CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
- }
- return 0;
- }
-
- int cbb_add_latin1(CBB *cbb, uint32_t u) {
- return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
- }
-
- int cbb_add_ucs2_be(CBB *cbb, uint32_t u) {
- return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
- }
-
- int cbb_add_utf32_be(CBB *cbb, uint32_t u) {
- return is_valid_code_point(u) && CBB_add_u32(cbb, u);
- }
|