libcbor 0.12.0
libcbor is a C library for parsing and generating CBOR, the general-purpose schema-less binary data format.
Loading...
Searching...
No Matches
unicode.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
3 *
4 * libcbor is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
6 */
7
8#include "unicode.h"
9#include <stdint.h>
10
11#define UTF8_ACCEPT 0
12#define UTF8_REJECT 1
13
14static const uint8_t utf8d[] = {
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */
27 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
28 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
29 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */
30 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
31 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
32 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */
33 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
35 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */
36 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
37 0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
38 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
39 0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
40 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
41 0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
44 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */
45 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */
48 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */
51 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1,
52 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */
54};
55
56/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
57 * <bjoern@hoehrmann.de> */
58/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
59uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
60 uint32_t type = utf8d[byte];
61
62 *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
63 : (0xff >> type) & (byte);
64
65 *state = utf8d[256 + *state * 16 + type];
66 return *state;
67}
68
69size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
70 struct _cbor_unicode_status* status) {
71 *status =
72 (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
73 uint32_t codepoint, state = UTF8_ACCEPT, res;
74 size_t pos = 0, count = 0;
75
76 for (; pos < source_length; pos++) {
77 res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
78
79 if (res == UTF8_ACCEPT) {
80 count++;
81 } else if (res == UTF8_REJECT) {
82 goto error;
83 }
84 }
85
86 /* Unfinished multibyte codepoint */
87 if (state != UTF8_ACCEPT) goto error;
88
89 return count;
90
91error:
92 *status = (struct _cbor_unicode_status){.location = pos,
93 .status = _CBOR_UNICODE_BADCP};
94 return 0;
95}
const unsigned char * cbor_data
Definition data.h:20
Signals unicode validation error and possibly its location.
Definition unicode.h:20
#define UTF8_REJECT
Definition unicode.c:12
#define UTF8_ACCEPT
Definition unicode.c:11
size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length, struct _cbor_unicode_status *status)
Definition unicode.c:69
uint32_t _cbor_unicode_decode(uint32_t *state, uint32_t *codep, uint32_t byte)
Definition unicode.c:59
@ _CBOR_UNICODE_BADCP
Definition unicode.h:17
@ _CBOR_UNICODE_OK
Definition unicode.h:17