mirror of
https://github.com/zint/zint
synced 2024-11-16 20:57:25 +13:00
get_best_eci: check UTF-8 before returning 26; move is_valid_utf8 to common
This commit is contained in:
parent
7f74414666
commit
2d962c6321
@ -335,6 +335,20 @@ INTERNAL unsigned int decode_utf8(unsigned int *state, unsigned int *codep, cons
|
|||||||
return *state;
|
return *state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Is string valid UTF-8? */
|
||||||
|
INTERNAL int is_valid_utf8(const unsigned char source[], const int length) {
|
||||||
|
int i;
|
||||||
|
unsigned int codepoint, state = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < length; i++) {
|
||||||
|
if (decode_utf8(&state, &codepoint, source[i]) == 12) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return state == 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Convert UTF-8 to Unicode. If `disallow_4byte` unset, allow all values (UTF-32). If `disallow_4byte` set,
|
/* Convert UTF-8 to Unicode. If `disallow_4byte` unset, allow all values (UTF-32). If `disallow_4byte` set,
|
||||||
* only allow codepoints <= U+FFFF (ie four-byte sequences not allowed) (UTF-16, no surrogates) */
|
* only allow codepoints <= U+FFFF (ie four-byte sequences not allowed) (UTF-16, no surrogates) */
|
||||||
INTERNAL int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[],
|
INTERNAL int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[],
|
||||||
|
@ -112,6 +112,7 @@ extern "C" {
|
|||||||
INTERNAL int is_composite(const int symbology);
|
INTERNAL int is_composite(const int symbology);
|
||||||
INTERNAL int istwodigits(const unsigned char source[], const int length, const int position);
|
INTERNAL int istwodigits(const unsigned char source[], const int length, const int position);
|
||||||
INTERNAL unsigned int decode_utf8(unsigned int *state, unsigned int *codep, const unsigned char byte);
|
INTERNAL unsigned int decode_utf8(unsigned int *state, unsigned int *codep, const unsigned char byte);
|
||||||
|
INTERNAL int is_valid_utf8(const unsigned char source[], const int length);
|
||||||
INTERNAL int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[],
|
INTERNAL int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[],
|
||||||
int *length, const int disallow_4byte);
|
int *length, const int disallow_4byte);
|
||||||
INTERNAL void set_minimum_height(struct zint_symbol *symbol, const int min_height);
|
INTERNAL void set_minimum_height(struct zint_symbol *symbol, const int min_height);
|
||||||
|
@ -271,5 +271,9 @@ INTERNAL int get_best_eci(const unsigned char source[], int length) {
|
|||||||
eci++;
|
eci++;
|
||||||
} while (eci < 25);
|
} while (eci < 25);
|
||||||
|
|
||||||
|
if (!is_valid_utf8(source, length)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
return 26; // If all of these fail, use Unicode!
|
return 26; // If all of these fail, use Unicode!
|
||||||
}
|
}
|
||||||
|
@ -1072,20 +1072,6 @@ static int escape_char_process(struct zint_symbol *symbol, unsigned char *input_
|
|||||||
return error_number;
|
return error_number;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Is string valid UTF-8? */
|
|
||||||
STATIC_UNLESS_ZINT_TEST int is_valid_utf8(const unsigned char source[], const int length) {
|
|
||||||
int i;
|
|
||||||
unsigned int codepoint, state = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < length; i++) {
|
|
||||||
if (decode_utf8(&state, &codepoint, source[i]) == 12) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return state == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ZBarcode_Encode(struct zint_symbol *symbol, const unsigned char *source, int in_length) {
|
int ZBarcode_Encode(struct zint_symbol *symbol, const unsigned char *source, int in_length) {
|
||||||
int error_number, warn_number;
|
int error_number, warn_number;
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
@ -1327,9 +1313,8 @@ int ZBarcode_Encode(struct zint_symbol *symbol, const unsigned char *source, int
|
|||||||
&& (symbol->input_mode & 0x07) == UNICODE_MODE) {
|
&& (symbol->input_mode & 0x07) == UNICODE_MODE) {
|
||||||
/* Try another ECI mode */
|
/* Try another ECI mode */
|
||||||
symbol->eci = get_best_eci(local_source, in_length);
|
symbol->eci = get_best_eci(local_source, in_length);
|
||||||
|
if (symbol->eci != 0) {
|
||||||
error_number = extended_or_reduced_charset(symbol, local_source, in_length);
|
error_number = extended_or_reduced_charset(symbol, local_source, in_length);
|
||||||
|
|
||||||
if (error_number == 0) {
|
if (error_number == 0) {
|
||||||
error_number = ZINT_WARN_USES_ECI;
|
error_number = ZINT_WARN_USES_ECI;
|
||||||
if (!(symbol->debug & ZINT_DEBUG_TEST)) {
|
if (!(symbol->debug & ZINT_DEBUG_TEST)) {
|
||||||
@ -1338,6 +1323,7 @@ int ZBarcode_Encode(struct zint_symbol *symbol, const unsigned char *source, int
|
|||||||
if (symbol->debug & ZINT_DEBUG_PRINT) printf("Added ECI %d\n", symbol->eci);
|
if (symbol->debug & ZINT_DEBUG_PRINT) printf("Added ECI %d\n", symbol->eci);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (error_number == 0) {
|
if (error_number == 0) {
|
||||||
error_number = warn_number;
|
error_number = warn_number;
|
||||||
|
@ -79,6 +79,45 @@ static void test_utf8_to_unicode(int index, int debug) {
|
|||||||
testFinish();
|
testFinish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_is_valid_utf8(int index) {
|
||||||
|
|
||||||
|
testStart("");
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
struct item {
|
||||||
|
char* data;
|
||||||
|
int length;
|
||||||
|
int ret;
|
||||||
|
char* comment;
|
||||||
|
};
|
||||||
|
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
|
||||||
|
struct item data[] = {
|
||||||
|
/* 0*/ { "", -1, 1, "" },
|
||||||
|
/* 1*/ { "abcdefghijklmnopqrstuvwxyz", -1, 1, "" },
|
||||||
|
/* 2*/ { "éa", -1, 1, "" },
|
||||||
|
/* 3*/ { "a\000b", 3, 1, "Embedded nul" },
|
||||||
|
/* 4*/ { "\357\273\277a", -1, 1, "Bom" },
|
||||||
|
|
||||||
|
/* 5*/ { "a\xC2", -1, 0, "Missing 2nd byte" },
|
||||||
|
/* 6*/ { "a\200b", -1, 0, "Orphan continuation 0x80" },
|
||||||
|
/* 7*/ { "\300\201", -1, 0, "Overlong 0xC081" },
|
||||||
|
/* 8*/ { "\355\240\200", -1, 0, "Surrogate 0xEDA080" },
|
||||||
|
};
|
||||||
|
int data_size = ARRAY_SIZE(data);
|
||||||
|
|
||||||
|
for (int i = 0; i < data_size; i++) {
|
||||||
|
|
||||||
|
if (index != -1 && i != index) continue;
|
||||||
|
|
||||||
|
int length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
|
||||||
|
|
||||||
|
ret = is_valid_utf8((const unsigned char *) data[i].data, length);
|
||||||
|
assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
testFinish();
|
||||||
|
}
|
||||||
|
|
||||||
static void test_debug_test_codeword_dump_int(int index, int debug) {
|
static void test_debug_test_codeword_dump_int(int index, int debug) {
|
||||||
|
|
||||||
testStart("");
|
testStart("");
|
||||||
@ -115,6 +154,7 @@ int main(int argc, char *argv[]) {
|
|||||||
testFunction funcs[] = { /* name, func, has_index, has_generate, has_debug */
|
testFunction funcs[] = { /* name, func, has_index, has_generate, has_debug */
|
||||||
{ "test_utf8_to_unicode", test_utf8_to_unicode, 1, 0, 1 },
|
{ "test_utf8_to_unicode", test_utf8_to_unicode, 1, 0, 1 },
|
||||||
{ "test_debug_test_codeword_dump_int", test_debug_test_codeword_dump_int, 1, 0, 1 },
|
{ "test_debug_test_codeword_dump_int", test_debug_test_codeword_dump_int, 1, 0, 1 },
|
||||||
|
{ "test_is_valid_utf8", test_is_valid_utf8, 1, 0, 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
|
testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
|
||||||
|
@ -791,6 +791,42 @@ static void test_utf8_to_eci_ucs2be(void) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void test_get_best_eci(int index) {
|
||||||
|
|
||||||
|
testStart("");
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
struct item {
|
||||||
|
const char *data;
|
||||||
|
int length;
|
||||||
|
int ret;
|
||||||
|
};
|
||||||
|
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
|
||||||
|
struct item data[] = {
|
||||||
|
/* 0*/ { "\300\301", -1, 0 },
|
||||||
|
/* 1*/ { "ÀÁ", -1, 3 },
|
||||||
|
/* 2*/ { "Ђ", -1, 7 },
|
||||||
|
/* 3*/ { "Ѐ", -1, 26 }, // Cyrillic U+0400 not in single-byte code pages
|
||||||
|
/* 4*/ { "β", -1, 9 },
|
||||||
|
/* 5*/ { "˜", -1, 23 },
|
||||||
|
/* 6*/ { "βЂ", -1, 26 },
|
||||||
|
/* 7*/ { "AB\200", -1, 0 },
|
||||||
|
};
|
||||||
|
int data_size = ARRAY_SIZE(data);
|
||||||
|
|
||||||
|
for (int i = 0; i < data_size; i++) {
|
||||||
|
|
||||||
|
if (index != -1 && i != index) continue;
|
||||||
|
|
||||||
|
int length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
|
||||||
|
|
||||||
|
ret = get_best_eci((const unsigned char *) data[i].data, length);
|
||||||
|
assert_equal(ret, data[i].ret, "i:%d get_best_eci ret %d != %d\n", i, ret, data[i].ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
testFinish();
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
testFunction funcs[] = { /* name, func, has_index, has_generate, has_debug */
|
testFunction funcs[] = { /* name, func, has_index, has_generate, has_debug */
|
||||||
@ -800,6 +836,7 @@ int main(int argc, char *argv[]) {
|
|||||||
{ "test_utf8_to_eci_sb", test_utf8_to_eci_sb, 1, 0, 0 },
|
{ "test_utf8_to_eci_sb", test_utf8_to_eci_sb, 1, 0, 0 },
|
||||||
{ "test_utf8_to_eci_ascii", test_utf8_to_eci_ascii, 0, 0, 0 },
|
{ "test_utf8_to_eci_ascii", test_utf8_to_eci_ascii, 0, 0, 0 },
|
||||||
{ "test_utf8_to_eci_ucs2be", test_utf8_to_eci_ucs2be, 0, 0, 0 },
|
{ "test_utf8_to_eci_ucs2be", test_utf8_to_eci_ucs2be, 0, 0, 0 },
|
||||||
|
{ "test_get_best_eci", test_get_best_eci, 1, 0, 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
|
testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
|
||||||
|
@ -589,47 +589,6 @@ static void test_strip_bom(void) {
|
|||||||
testFinish();
|
testFinish();
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC_UNLESS_ZINT_TEST int is_valid_utf8(const unsigned char source[], const int length);
|
|
||||||
|
|
||||||
static void test_is_valid_utf8(int index) {
|
|
||||||
|
|
||||||
testStart("");
|
|
||||||
|
|
||||||
int ret;
|
|
||||||
struct item {
|
|
||||||
char* data;
|
|
||||||
int length;
|
|
||||||
int ret;
|
|
||||||
char* comment;
|
|
||||||
};
|
|
||||||
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
|
|
||||||
struct item data[] = {
|
|
||||||
/* 0*/ { "", -1, 1, "" },
|
|
||||||
/* 1*/ { "abcdefghijklmnopqrstuvwxyz", -1, 1, "" },
|
|
||||||
/* 2*/ { "éa", -1, 1, "" },
|
|
||||||
/* 3*/ { "a\000b", 3, 1, "Embedded nul" },
|
|
||||||
/* 4*/ { "\357\273\277a", -1, 1, "Bom" },
|
|
||||||
|
|
||||||
/* 5*/ { "a\xC2", -1, 0, "Missing 2nd byte" },
|
|
||||||
/* 6*/ { "a\200b", -1, 0, "Orphan continuation 0x80" },
|
|
||||||
/* 7*/ { "\300\201", -1, 0, "Overlong 0xC081" },
|
|
||||||
/* 8*/ { "\355\240\200", -1, 0, "Surrogate 0xEDA080" },
|
|
||||||
};
|
|
||||||
int data_size = ARRAY_SIZE(data);
|
|
||||||
|
|
||||||
for (int i = 0; i < data_size; i++) {
|
|
||||||
|
|
||||||
if (index != -1 && i != index) continue;
|
|
||||||
|
|
||||||
int length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
|
|
||||||
|
|
||||||
ret = is_valid_utf8((const unsigned char *) data[i].data, length);
|
|
||||||
assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
testFinish();
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
testFunction funcs[] = { /* name, func, has_index, has_generate, has_debug */
|
testFunction funcs[] = { /* name, func, has_index, has_generate, has_debug */
|
||||||
@ -643,7 +602,6 @@ int main(int argc, char *argv[]) {
|
|||||||
{ "test_valid_id", test_valid_id, 0, 0, 0 },
|
{ "test_valid_id", test_valid_id, 0, 0, 0 },
|
||||||
{ "test_error_tag", test_error_tag, 1, 0, 0 },
|
{ "test_error_tag", test_error_tag, 1, 0, 0 },
|
||||||
{ "test_strip_bom", test_strip_bom, 0, 0, 0 },
|
{ "test_strip_bom", test_strip_bom, 0, 0, 0 },
|
||||||
{ "test_is_valid_utf8", test_is_valid_utf8, 1, 0, 0 },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
|
testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
|
||||||
|
Loading…
Reference in New Issue
Block a user