ECI conversion for GRIDMATRIX; GB 2312 LIBICONV port

This commit is contained in:
gitlost 2019-11-28 19:15:29 +00:00
parent fed7378675
commit 8be13283db
17 changed files with 17010 additions and 7527 deletions

View File

@ -4,7 +4,7 @@ project(zint)
find_package(PNG)
set(zint_COMMON_SRCS common.c library.c large.c reedsol.c gs1.c eci.c general_field.c sjis.c)
set(zint_COMMON_SRCS common.c library.c large.c reedsol.c gs1.c eci.c general_field.c sjis.c gb2312.c)
set(zint_ONEDIM_SRCS code.c code128.c 2of5.c upcean.c telepen.c medical.c plessey.c rss.c)
set(zint_POSTAL_SRCS postal.c auspost.c imail.c mailmark.c)
set(zint_TWODIM_SRCS code16k.c codablock.c dmatrix.c pdf417.c qr.c maxicode.c composite.c aztec.c code49.c code1.c gridmtx.c hanxin.c dotcode.c ultra.c)

1627
backend/gb2312.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,7 @@
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
/* vim: set ts=4 sw=4 et : */
/* This file impliments Grid Matrix as specified in
AIM Global Document Number AIMD014 Rev. 1.63 Revised 9 Dec 2008 */
@ -43,7 +44,7 @@
#include "gridmtx.h"
#include "gb2312.h"
int number_lat(int gbdata[], const size_t length, const size_t position) {
int number_lat(unsigned int gbdata[], const size_t length, const size_t position) {
/* Attempt to calculate the 'cost' of using numeric mode from a given position in number of bits */
/* Also ensures that numeric mode is not selected when it cannot be used: for example in
a string which has "2.2.0" (cannot have more than one non-numeric character for each
@ -118,7 +119,7 @@ int number_lat(int gbdata[], const size_t length, const size_t position) {
return tally;
}
static int seek_forward(int gbdata[], const size_t length, const size_t position, int current_mode) {
static int seek_forward(unsigned int gbdata[], const size_t length, const size_t position, int current_mode, int debug) {
/* In complete contrast to the method recommended in Annex D of the ANSI standard this
code uses a look-ahead test in the same manner as Data Matrix. This decision was made
because the "official" algorithm does not provide clear methods for dealing with all
@ -126,9 +127,8 @@ static int seek_forward(int gbdata[], const size_t length, const size_t position
int number_count, byte_count, mixed_count, upper_count, lower_count, chinese_count;
int best_mode;
size_t sp;
size_t sp;
int best_count, last = -1;
int debug = 0;
if (gbdata[position] > 0xff) {
return GM_CHINESE;
@ -277,7 +277,7 @@ static int seek_forward(int gbdata[], const size_t length, const size_t position
/* Numeric mode is more complex */
number_count += number_lat(gbdata, length, position);
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("C %d / B %d / M %d / U %d / L %d / N %d\n", chinese_count, byte_count, mixed_count, upper_count, lower_count, number_count);
}
@ -316,7 +316,7 @@ static int seek_forward(int gbdata[], const size_t length, const size_t position
static void add_byte_count(char binary[], const size_t byte_count_posn, const int byte_count) {
int p;
for (p = 0; p < 8; p++) {
for (p = 0; p < 9; p++) {
if (byte_count & (0x100 >> p)) {
binary[byte_count_posn + p] = '0';
} else {
@ -326,24 +326,25 @@ static void add_byte_count(char binary[], const size_t byte_count_posn, const in
}
/* Add a control character to the data stream */
void add_shift_char(char binary[], int shifty) {
int i, debug = 0;
void add_shift_char(char binary[], int shifty, int debug) {
int i;
int glyph = 0;
for (i = 0; i < 64; i++) {
if (shift_set[i] == shifty) {
glyph = i;
break;
}
}
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("SHIFT [%d] ", glyph);
}
bin_append(glyph, 6, binary);
}
static int gm_encode(int gbdata[], const size_t length, char binary[],const int reader,const int eci, int debug) {
static int gm_encode(unsigned int gbdata[], const size_t length, char binary[], const int reader, const int eci, int debug) {
/* Create a binary stream representation of the input data.
7 sets are defined - Chinese characters, Numerals, Lower case letters, Upper case letters,
Mixed numerals and latters, Control characters and 8-bit binary data */
@ -383,7 +384,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
}
do {
int next_mode = seek_forward(gbdata, length, sp, current_mode);
int next_mode = seek_forward(gbdata, length, sp, current_mode, debug);
if (next_mode != current_mode) {
switch (current_mode) {
@ -491,7 +492,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
}
break;
}
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
switch (next_mode) {
case GM_CHINESE: printf("CHIN ");
break;
@ -519,13 +520,12 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
c1 = (gbdata[sp] & 0xff00) >> 8;
c2 = gbdata[sp] & 0xff;
if ((c1 >= 0xa0) && (c1 <= 0xa9)) {
if ((c1 >= 0xa1) && (c1 <= 0xa9)) {
glyph = (0x60 * (c1 - 0xa1)) + (c2 - 0xa0);
}
if ((c1 >= 0xb0) && (c1 <= 0xf7)) {
} else if ((c1 >= 0xb0) && (c1 <= 0xf7)) {
glyph = (0x60 * (c1 - 0xb0 + 9)) + (c2 - 0xa0);
}
done = 1;
done = 1; /* GB 2312 always within above ranges */
}
if (!(done)) {
if (sp != (length - 1)) {
@ -533,8 +533,8 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
/* End of Line */
glyph = 7776;
sp++;
done = 1;
}
done = 1;
}
}
if (!(done)) {
@ -544,6 +544,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
/* Two digits */
glyph = 8033 + (10 * (gbdata[sp] - '0')) + (gbdata[sp + 1] - '0');
sp++;
done = 1;
}
}
}
@ -552,7 +553,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
glyph = 7777 + gbdata[sp];
}
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
@ -620,7 +621,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
glyph += ppos;
glyph += 1000;
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
@ -628,7 +629,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
}
glyph = (100 * (numbuf[0] - '0')) + (10 * (numbuf[1] - '0')) + (numbuf[2] - '0');
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
@ -651,7 +652,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
}
glyph = gbdata[sp];
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
bin_append(glyph, 8, binary);
@ -677,7 +678,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
if (shift == 0) {
/* Mixed Mode character */
glyph = posn(EUROPIUM, gbdata[sp]);
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
@ -685,7 +686,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
} else {
/* Shift Mode character */
bin_append(1014, 10, binary); /* shift indicator */
add_shift_char(binary, gbdata[sp]);
add_shift_char(binary, gbdata[sp], debug);
}
sp++;
@ -703,7 +704,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
if (shift == 0) {
/* Upper Case character */
glyph = posn("ABCDEFGHIJKLMNOPQRSTUVWXYZ ", gbdata[sp]);
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
@ -711,7 +712,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
} else {
/* Shift Mode character */
bin_append(125, 7, binary); /* shift indicator */
add_shift_char(binary, gbdata[sp]);
add_shift_char(binary, gbdata[sp], debug);
}
sp++;
@ -729,7 +730,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
if (shift == 0) {
/* Lower Case character */
glyph = posn("abcdefghijklmnopqrstuvwxyz ", gbdata[sp]);
if (debug) {
if (debug & ZINT_DEBUG_PRINT) {
printf("[%d] ", glyph);
}
@ -737,7 +738,7 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
} else {
/* Shift Mode character */
bin_append(125, 7, binary); /* shift indicator */
add_shift_char(binary, gbdata[sp]);
add_shift_char(binary, gbdata[sp], debug);
}
sp++;
@ -786,10 +787,9 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
/* Add padding bits if required */
p = 7 - (strlen(binary) % 7);
if (p == 7) {
p = 0;
if (p % 7) {
bin_append(0, p, binary);
}
bin_append(0, p, binary);
if (strlen(binary) > 9191) {
return ZINT_ERROR_TOO_LONG;
@ -797,6 +797,14 @@ static int gm_encode(int gbdata[], const size_t length, char binary[],const int
return 0;
}
static void gm_test_codeword_dump(struct zint_symbol *symbol, int* codewords, int length) {
int i;
for (i = 0; i < length && i < 33; i++) { /* 33*3 < errtxt 100 chars */
sprintf(symbol->errtxt + i * 3, "%02X ", codewords[i]);
}
symbol->errtxt[strlen(symbol->errtxt) - 1] = '\0'; /* Zap last space */
}
static void gm_add_ecc(const char binary[], const size_t data_posn, const int layers, const int ecc_level, int word[]) {
int data_cw, i, j, wp, p;
int n1, b1, n2, b2, e1, b3, e2;
@ -810,7 +818,7 @@ static void gm_add_ecc(const char binary[], const size_t data_posn, const int la
data[i] = 0;
}
/* Convert from binary sream to 7-bit codewords */
/* Convert from binary stream to 7-bit codewords */
for (i = 0; i < data_posn; i++) {
for (p = 0; p < 7; p++) {
if (binary[i * 7 + p] == '1') {
@ -1009,46 +1017,35 @@ int grid_matrix(struct zint_symbol *symbol, const unsigned char source[], size_t
int word[1460], data_max, reader = 0;
#ifndef _MSC_VER
int utfdata[length + 1];
int gbdata[length + 1];
unsigned int gbdata[length + 1];
#else
char* grid;
int* utfdata = (int *) _alloca((length + 1) * sizeof (int));
int* gbdata = (int *) _alloca((length + 1) * sizeof (int));
unsigned int* gbdata = (unsigned int *) _alloca((length + 1) * sizeof (unsigned int));
#endif
for (i = 0; i < 1460; i++) {
word[i] = 0;
}
if ((symbol->input_mode == DATA_MODE) || (symbol->eci != 0)) {
for (i = 0; i < length; i++) {
gbdata[i] = (int) source[i];
}
if ((symbol->input_mode & 0x07) == DATA_MODE) {
gb2312_cpy(source, &length, gbdata);
} else {
/* Convert Unicode input to GB-2312 */
error_number = utf8toutf16(symbol, source, utfdata, &length);
if (error_number != 0) {
return error_number;
int done = 0;
if (symbol->eci != 29) { /* Unless ECI 29 (GB) */
/* Try single byte (Latin) conversion first */
int error_number = gb2312_utf8tosb(symbol->eci && symbol->eci <= 899 ? symbol->eci : 3, source, &length, gbdata);
if (error_number == 0) {
done = 1;
} else if (symbol->eci && symbol->eci <= 899) {
strcpy(symbol->errtxt, "575: Invalid characters in input data");
return error_number;
}
}
for (i = 0; i < length; i++) {
if (utfdata[i] <= 0xff) {
gbdata[i] = utfdata[i];
} else {
int j = 0;
int glyph = 0;
do {
if (gb2312_lookup[j * 2] == utfdata[i]) {
glyph = gb2312_lookup[(j * 2) + 1];
}
j++;
} while ((j < 7445) && (glyph == 0));
if (glyph == 0) {
strcpy(symbol->errtxt, "530: Invalid character in input data");
return ZINT_ERROR_INVALID_DATA;
}
gbdata[i] = glyph;
if (!done) {
/* Try GB 2312 (EUC-CN) */
int error_number = gb2312_utf8tomb(symbol, source, &length, gbdata);
if (error_number != 0) {
return error_number;
}
}
}
@ -1155,6 +1152,7 @@ int grid_matrix(struct zint_symbol *symbol, const unsigned char source[], size_t
}
gm_add_ecc(binary, data_cw, layers, ecc_level, word);
if (symbol->debug & ZINT_DEBUG_TEST) gm_test_codeword_dump(symbol, word, data_cw);
size = 6 + (layers * 12);
modules = 1 + (layers * 2);

View File

@ -1257,6 +1257,7 @@ int han_xin(struct zint_symbol *symbol, const unsigned char source[], size_t len
}
} else {
int posn;
unsigned char gb2312_buf[2]; /* Temporary use until gb18030_utf8tomb() implemented */
/* Convert Unicode input to GB-18030 */
int error_number = utf8toutf16(symbol, source, utfdata, &length);
if (error_number != 0) {
@ -1277,15 +1278,11 @@ int han_xin(struct zint_symbol *symbol, const unsigned char source[], size_t len
/* Two bytes characters in GB-2312 */
if (done == 0) {
j = 0;
do {
if (gb2312_lookup[j * 2] == utfdata[i]) {
gbdata[posn] = gb2312_lookup[(j * 2) + 1];
posn++;
done = 1;
}
j++;
} while ((j < 7445) && (done == 0));
if (gb2312_wctomb_zint(gb2312_buf, utfdata[i], 2) == 2) { /* Temporary use until gb18030_utf8tomb() implemented */
gbdata[posn] = (gb2312_buf[0] << 8) | gb2312_buf[1];
posn++;
done = 1;
}
}
/* Two byte characters in GB-18030 */

View File

@ -52,8 +52,6 @@
* License along with the GNU LIBICONV Library; see the file COPYING.LIB.
* If not, see <https://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "common.h"
#include "sjis.h"
@ -100,7 +98,7 @@ static int jisx0201_wctomb(unsigned char* r, unsigned int wc, size_t n) {
* JISX0208.1990-0 (libiconv-1.16/lib/jisx0208.h)
*/
/* ZINT: Table converted from JIS X 0208 to Shift JIS values using php:
/* ZINT: Table converted from JIS X 0208 to Shift JIS values using tools/cnv_sjis.php:
*
while ($line = fgets(STDIN)) {
echo preg_replace_callback('/0x([0-9a-f]{4})/', function ($matches) {

View File

@ -51,6 +51,8 @@ zint_add_test(common, test_common)
zint_add_test(composite, test_composite)
zint_add_test(dmatrix, test_dmatrix)
zint_add_test(eci, test_eci)
zint_add_test(gb2312, test_gb2312)
zint_add_test(gridmtx, test_gridmtx)
zint_add_test(gs1, test_gs1)
zint_add_test(imail, test_imail)
zint_add_test(library, test_library)

264
backend/tests/test_gb2312.c Normal file
View File

@ -0,0 +1,264 @@
/*
libzint - the open source barcode library
Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
/* vim: set ts=4 sw=4 et : */
#include "testcommon.h"
#include "test_gb2312_tab.h"
#include "../gb2312.h"
// As control convert to GB 2312 using simple table generated from unicode.org GB2312.TXT plus simple processing
// GB2312.TXT no longer on unicode.org site but available from https://haible.de/bruno/charsets/conversion-tables/GB2312.html
static int gb2312_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
{
// Shortcut
if ((wc > 0x0451 && wc < 0x2015) || (wc > 0x3229 && wc < 0x4E00) || (wc > 0x9FA0 && wc < 0xFF01) || wc > 0xFFE5) {
return 0;
}
int tab_length = sizeof(test_gb2312_tab) / sizeof(unsigned short);
for (int i = 0; i < tab_length; i += 2) {
if (test_gb2312_tab[i + 1] == wc) {
unsigned short c = test_gb2312_tab[i] + 0x8080; // Table in GB 2312 not EUC-CN
r[0] = (c >> 8);
r[1] = c & 0xFF;
return 2;
}
}
return 0;
}
static void test_gb2312_wctomb_zint(void)
{
testStart("");
int ret, ret2;
unsigned char buf[2], buf2[2];
unsigned int val, val2;
for (unsigned int i = 0; i < 0xFFFE; i++) {
if (i < 0x80) { // ASCII is straight through and not dealt with by gb2312_wctomb_zint()
continue;
}
if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
continue;
}
buf[0] = buf[1] = buf2[0] = buf2[1] = 0;
ret = gb2312_wctomb_zint(buf, i, 2);
val = ret == 1 ? buf[0] : (buf[0] << 8) | buf[1];
ret2 = gb2312_wctomb_zint2(buf2, i, 2);
val2 = ret2 == 1 ? buf2[0] : (buf2[0] << 8) | buf2[1];
if (i == 0xB7) { // Extra mapping middle dot U+00B7 to 0xA1A4, duplicate of U+30FB (Katakana middle dot)
assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val 0x%04X\n", i, i, ret, val);
assert_equal(val, 0xA1A4, "i:%d 0x%04X val 0x%04X != 0xA1A4\n", i, i, val);
assert_zero(ret2, "i:%d 0x%04X ret2 %d != 0, val2 0x%04X\n", i, i, ret2, val2);
} else if (i == 0x2014) { // Extra mapping em dash U+2014 to 0xA1AA, duplicate of U+2015 (horizontal bar)
assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val 0x%04X\n", i, i, ret, val);
assert_equal(val, 0xA1AA, "i:%d 0x%04X val 0x%04X != 0xA1AA\n", i, i, val);
assert_zero(ret2, "i:%d 0x%04X ret2 %d != 0, val2 0x%04X\n", i, i, ret2, val2);
} else {
assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val 0x%04X, val2 0x%04X\n", i, i, ret, ret2, val, val2);
}
if (ret2) {
assert_equal(val, val2, "i:%d 0x%04X val 0x%04X != val2 0x%04X\n", i, i, val, val2);
}
}
testFinish();
}
static void test_gb2312_utf8tomb(void)
{
testStart("");
int ret;
struct item {
unsigned char* data;
int length;
int ret;
size_t ret_length;
unsigned int expected_gbdata[20];
char* comment;
};
// é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 2312 0xA8A6, UTF-8 C3A9
// β U+03B2 in ISO 8859-7 Greek (but not other ISO 8859 or Win page), in GB 2312 0xA6C2, UTF-8 CEB2
// ¤ U+00A4 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 2312 0xA1E8, UTF-8 C2A4
// ¥ U+00A5 in ISO 8859-1 0xA5, not in GB 2312, UTF-8 C2A5
// ・ U+30FB katakana middle dot, not in any ISO or Win page, in GB 2312 "GB2312.TXT" 0xA1A4, duplicate of mapping of U+00B7, UTF-8 E383BB
// · U+00B7 middle dot in ISO 8859-1 0xB7, in GB 2312 "GB 18030 subset" 0xA1A4, duplicate of mapping of U+30FB, UTF-8 C2B7
// ― U+2015 horizontal bar in ISO 8859-7 Greek and ISO 8859-10 Nordic, not in any Win page, in GB 2312 "GB2312.TXT" 0xA1AA, duplicate of mapping of U+2014, UTF-8 E28095
// — U+2014 em dash, not in any ISO, in Win 1250 and other Win, in GB 2312 "GB 18030 subset" 0xA1AA, duplicate of mapping of U+2015, UTF-8 E28094
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { "é", -1, 0, 1, { 0xA8A6 }, "" },
/* 1*/ { "β", -1, 0, 1, { 0xA6C2 }, "" },
/* 2*/ { "¤", -1, 0, 1, { 0xA1E8 }, "" },
/* 3*/ { "¥", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "" },
/* 4*/ { "", -1, 0, 1, { 0xA1A4 }, "GB2312.TXT mapping" },
/* 5*/ { "·", -1, 0, 1, { 0xA1A4 }, "GB 18030 subset mapping" },
/* 6*/ { "", -1, 0, 1, { 0xA1AA }, "GB2312.TXT mapping" },
/* 7*/ { "", -1, 0, 1, { 0xA1AA }, "GB 18030 subset mapping" },
/* 8*/ { "aβc・·—é—Z", -1, 0, 9, { 'a', 0xA6C2, 'c', 0xA1A4, 0xA1A4, 0xA1AA, 0xA8A6, 0xA1AA, 'Z' }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);
struct zint_symbol symbol;
unsigned int gbdata[20];
for (int i = 0; i < data_size; i++) {
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
ret = gb2312_utf8tomb(&symbol, data[i].data, &ret_length, gbdata);
assert_equal(ret, data[i].ret, "i:%d ret %d != %d (%s)\n", i, ret, data[i].ret, symbol.errtxt);
if (ret == 0) {
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
for (int j = 0; j < ret_length; j++) {
assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
}
}
}
testFinish();
}
static void test_gb2312_utf8tosb(void)
{
testStart("");
int ret;
struct item {
int eci;
unsigned char* data;
int length;
int ret;
size_t ret_length;
unsigned int expected_gbdata[20];
char* comment;
};
// é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win, in GRIDMATRIX Chinese mode first byte range 0xA1..A9, 0xB0..F7
// β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page)
// ¥ U+00A5 in ISO 8859-1 0xA5, in first byte range 0xA1..A9, 0xB0..F7
// NBSP U+00A0 in ISO 8859-1 0xA0, outside first byte and second byte range 0xA1..FE, UTF-8 C2A0 (\302\240)
// ¡ U+00A1 in ISO 8859-1 0xA1, in first byte range
// © U+00A9 in ISO 8859-1 0xA9, in first byte range
// ª U+00AA in ISO 8859-1 0xAA, outside first byte range
// ¯ U+00AF in ISO 8859-1 0xAF, outside first byte range
// ° U+00B0 in ISO 8859-1 0xB0, in first byte range
// ÷ U+00F7 in ISO 8859-1 0xF7, in first byte range
// ø U+00F8 in ISO 8859-1 0xF8, outside first byte range
// ÿ U+00FF in ISO 8859-1 0xFF, outside first byte and second byte range
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { 3, "é", -1, 0, 1, { 0xE9 }, "First byte in range but only one byte" },
/* 1*/ { 3, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Not in ECI 3 (ISO 8859-1)" },
/* 2*/ { 9, "β", -1, 0, 1, { 0xE2 }, "In ECI 9 (ISO 8859-7)" },
/* 3*/ { 3, "¥", -1, 0, 1, { 0xA5 }, "First byte in range but only one byte" },
/* 4*/ { 3, "¡é", -1, 0, 1, { 0xA1E9 }, "In GRIDMATRIX Chinese mode range" },
/* 5*/ { 3, "¡\302\240", -1, 0, 2, { 0xA1, 0xA0 }, "First byte in range but not second" },
/* 6*/ { 3, "©é", -1, 0, 1, { 0xA9E9 }, "In GRIDMATRIX Chinese mode range" },
/* 7*/ { 3, "©ÿ", -1, 0, 2, { 0xA9, 0xFF }, "First byte in range but not second" },
/* 8*/ { 3, "éaé驪ª©¯é°°é÷éø", -1, 0, 10, { 0xE9, 0x61, 0xE9E9, 0xA9AA, 0xAA, 0xA9AF, 0xE9B0, 0xB0E9, 0xF7E9, 0xF8 }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);
unsigned int gbdata[20];
for (int i = 0; i < data_size; i++) {
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
ret = gb2312_utf8tosb(data[i].eci, data[i].data, &ret_length, gbdata);
assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
if (ret == 0) {
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
for (int j = 0; j < ret_length; j++) {
assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
}
}
}
testFinish();
}
static void test_gb2312_cpy(void)
{
testStart("");
int ret;
struct item {
unsigned char* data;
int length;
int ret;
size_t ret_length;
unsigned int expected_jisdata[20];
char* comment;
};
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { "\351", -1, 0, 1, { 0xE9 }, "In GRIDMATRIX Chinese mode first-byte range but only one byte" },
/* 1*/ { "\351\241", -1, 0, 1, { 0xE9A1 }, "In GRIDMATRIX Chinese range" },
/* 0*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
/* 0*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
/* 0*/ { "\241\240\241\376\367\376\367\377", -1, 0, 6, { 0xA1, 0xA0, 0xA1FE, 0xF7FE, 0xF7, 0xFF }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);
unsigned int jisdata[20];
for (int i = 0; i < data_size; i++) {
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
gb2312_cpy(data[i].data, &ret_length, jisdata);
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
for (int j = 0; j < ret_length; j++) {
assert_equal(jisdata[j], data[i].expected_jisdata[j], "i:%d jisdata[%d] %04X != %04X\n", i, j, jisdata[j], data[i].expected_jisdata[j]);
}
}
testFinish();
}
int main()
{
test_gb2312_wctomb_zint();
test_gb2312_utf8tomb();
test_gb2312_utf8tosb();
test_gb2312_cpy();
testReport();
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -189,14 +189,15 @@ static void test_sjis_utf8tosb(void)
unsigned int expected_jisdata[20];
char* comment;
};
// é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win
// é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win, in QR Kanji mode first byte range 0x81..9F, 0xE0..EB
// β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page)
// ¥ U+00A5 in ISO 8859-1 0xA5, in QR Kanji mode first byte range 0x81..9F, 0xE0..EB
// ¥ U+00A5 in ISO 8859-1 0xA5, outside first byte range 0x81..9F, 0xE0..EB
// ú U+00FA in ISO 8859-1 0xFA, outside first byte range
// à U+00FA in ISO 8859-1 0xE0, in first byte range
// ë U+00FA in ISO 8859-1 0xEB, in first byte range
// ì U+00FA in ISO 8859-1 0xEC, outside first byte range
// à U+00EO in ISO 8859-1 0xE0, in first byte range
// ë U+00EB in ISO 8859-1 0xEB, in first byte range
// ì U+00EC in ISO 8859-1 0xEC, outside first byte range
// µ U+00B5 in ISO 8859-1 0xB5, outside first byte range
// À U+00C0 in ISO 8859-1 0xC0, outside first byte range and 0xEBxx second byte range
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { 3, "é", -1, 0, 1, { 0xE9 }, "" },
@ -205,6 +206,7 @@ static void test_sjis_utf8tosb(void)
/* 3*/ { 3, "¥", -1, 0, 1, { 0xA5 }, "" },
/* 4*/ { 3, "éa", -1, 0, 1, { 0xE961 }, "In QR Kanji mode range" },
/* 5*/ { 3, "éaúbàcëdìeµ", -1, 0, 8, { 0xE961, 0xFA, 0x62, 0xE063, 0xEB64, 0xEC, 0x65, 0xB5 }, "" },
/* 6*/ { 3, "ëÀ", -1, 0, 2, { 0xEB, 0xC0 }, "Outside QR Kanji mode range" },
};
int data_size = sizeof(data) / sizeof(struct item);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,25 @@
<?php
/* Convert libiconv-1.16/lib/gb2312.h to EUC-CN values (+ 0x8080). */
/*
libzint - the open source barcode library
Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
*/
/* vim: set ts=4 sw=4 et : */
$in = false;
while ($line = fgets(STDIN)) {
if ($in) {
if (strncmp($line, '};', 2) === 0) {
echo $line;
break;
}
echo preg_replace_callback('/0x([0-9a-f]{4})/', function ($matches) {
return '0x' . dechex(hexdec($matches[1]) + 0x8080);
}, $line);
} else {
if (strpos($line, 'short gb2312_2charset') !== false) {
echo $line;
$in = true;
}
}
}

View File

@ -0,0 +1,31 @@
<?php
/* Convert libiconv-1.16/lib/jisx0208.h to Shift JIS values. */
/*
libzint - the open source barcode library
Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
*/
/* vim: set ts=4 sw=4 et : */
$in = false;
while ($line = fgets(STDIN)) {
if ($in) {
if (strncmp($line, '};', 2) === 0) {
echo $line;
break;
}
echo preg_replace_callback('/0x([0-9a-f]{4})/', function ($matches) {
$dec = hexdec($matches[1]);
$c1 = $dec >> 8;
$c2 = ($dec & 0xFF);
$t1 = ($c1 - 0x21) >> 1;
$t2 = ((($c1 - 0x21) & 1) ? 0x5e : 0) + ($c2 - 0x21);
$r = (($t1 < 0x1f ? ($t1 + 0x81) : ($t1 + 0xc1)) << 8) | ($t2 < 0x3f ? ($t2 + 0x40) : ($t2 + 0x41));
return '0x' . dechex($r);
}, $line);
} else {
if (strpos($line, 'jisx0208_2charset') !== false) {
echo $line;
$in = true;
}
}
}

View File

@ -60,6 +60,7 @@ HEADERS += ../backend/aztec.h \
../backend/dmatrix.h \
../backend/eci.h \
../backend/font.h \
../backend/gb2312.h \
../backend/gridmtx.h \
../backend/gs1.h \
../backend/hanxin.h \
@ -89,6 +90,7 @@ SOURCES += ../backend/2of5.c \
../backend/dotcode.c \
../backend/eci.c \
../backend/emf.c \
../backend/gb2312.c \
../backend/general_field.c \
../backend/gif.c \
../backend/gridmtx.c \

View File

@ -156,6 +156,10 @@ SOURCE=..\backend\emf.c
# End Source File
# Begin Source File
SOURCE=..\backend\gb2312.c
# End Source File
# Begin Source File
SOURCE=..\backend\general_field.c
# End Source File
# Begin Source File

View File

@ -319,6 +319,7 @@
<ClCompile Include="..\backend\dotcode.c" />
<ClCompile Include="..\backend\eci.c" />
<ClCompile Include="..\backend\emf.c" />
<ClCompile Include="..\backend\gb2312.c" />
<ClCompile Include="..\backend\general_field.c" />
<ClCompile Include="..\backend\gif.c" />
<ClCompile Include="..\backend\gridmtx.c" />

View File

@ -152,6 +152,10 @@ SOURCE=..\..\backend\emf.c
# End Source File
# Begin Source File
SOURCE=..\..\backend\gb2312.c
# End Source File
# Begin Source File
SOURCE=..\..\backend\general_field.c
# End Source File
# Begin Source File