HANXIN ECI conversion, GB 18030 LIBICONV port; some codeword fixes; optimized encoding modes

This commit is contained in:
gitlost 2019-12-08 16:15:34 +00:00
parent ce8aa92846
commit 889e786d95
35 changed files with 67955 additions and 23877 deletions

View File

@ -4,7 +4,7 @@ project(zint)
find_package(PNG)
set(zint_COMMON_SRCS common.c library.c large.c reedsol.c gs1.c eci.c general_field.c sjis.c gb2312.c)
set(zint_COMMON_SRCS common.c library.c large.c reedsol.c gs1.c eci.c general_field.c sjis.c gb2312.c gb18030.c)
set(zint_ONEDIM_SRCS code.c code128.c 2of5.c upcean.c telepen.c medical.c plessey.c rss.c)
set(zint_POSTAL_SRCS postal.c auspost.c imail.c mailmark.c)
set(zint_TWODIM_SRCS code16k.c codablock.c dmatrix.c pdf417.c qr.c maxicode.c composite.c aztec.c code49.c code1.c gridmtx.c hanxin.c dotcode.c ultra.c)

View File

@ -310,8 +310,9 @@ unsigned int decode_utf8(unsigned int* state, unsigned int* codep, const unsigne
return *state;
}
/* Convert UTF-8 to UTF-16 for codepoints <= U+FFFF (ie four-byte sequences (requiring UTF-16 surrogates) not allowed) */
int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int vals[], size_t *length) {
/* Convert UTF-8 to Unicode. If `disallow_4byte` unset, allow all values (UTF-32).
* If `disallow_4byte` set, only allow codepoints <= U+FFFF (ie four-byte sequences not allowed) (UTF-16, no surrogates) */
int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[], size_t *length, int disallow_4byte) {
size_t bpos;
int jpos;
unsigned int codepoint, state = 0;
@ -328,7 +329,7 @@ int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int va
strcpy(symbol->errtxt, "240: Corrupt Unicode data");
return ZINT_ERROR_INVALID_DATA;
}
if (codepoint > 0xffff) {
if (disallow_4byte && codepoint > 0xffff) {
strcpy(symbol->errtxt, "242: Unicode sequences of more than 3 bytes not supported");
return ZINT_ERROR_INVALID_DATA;
}
@ -342,9 +343,8 @@ int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int va
return 0;
}
/* Enforce minimum permissable height of rows */
void set_minimum_height(struct zint_symbol *symbol, const int min_height) {
/* Enforce minimum permissable height of rows */
int fixed_height = 0;
int zero_count = 0;
int i;
@ -368,3 +368,94 @@ void set_minimum_height(struct zint_symbol *symbol, const int min_height) {
}
}
/* Calculate optimized encoding modes. Adapted from Project Nayuki */
void pn_define_mode(char* mode, const unsigned int data[], const size_t length, const int debug,
unsigned int state[], const char mode_types[], const int num_modes, pn_head_costs head_costs, pn_switch_cost switch_cost, pn_eod_cost eod_cost, pn_cur_cost cur_cost) {
/*
* Copyright (c) Project Nayuki. (MIT License)
* https://www.nayuki.io/page/qr-code-generator-library
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
* - The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*/
int i, j, k, cm_i;
unsigned int min_cost;
char cur_mode;
#ifndef _MSC_VER
unsigned int prev_costs[num_modes];
char char_modes[length * num_modes];
unsigned int cur_costs[num_modes];
#else
unsigned int* prev_costs;
char* char_modes;
unsigned int* cur_costs;
prev_costs = (unsigned int*) _alloca(num_modes * sizeof(unsigned int));
char_modes = (char*) _alloca(length * num_modes);
cur_costs = (unsigned int*) _alloca(num_modes * sizeof(unsigned int));
#endif
/* char_modes[i * num_modes + j] represents the mode to encode the code point at index i such that the final segment ends in
* mode_types[j] and the total number of bits is minimized over all possible choices */
memset(char_modes, 0, length * num_modes);
/* At the beginning of each iteration of the loop below, prev_costs[j] is the minimum number of 1/6 (1/GM_MULT) bits needed
* to encode the entire string prefix of length i, and end in mode_types[j] */
memcpy(prev_costs, (*head_costs)(state), num_modes * sizeof(unsigned int));
/* Calculate costs using dynamic programming */
for (i = 0, cm_i = 0; i < length; i++, cm_i += num_modes) {
memset(cur_costs, 0, num_modes * sizeof(unsigned int));
(*cur_cost)(state, data, length, i, char_modes, prev_costs, cur_costs);
if (eod_cost && i == length - 1) { /* Add end of data costs if last character */
for (j = 0; j < num_modes; j++) {
if (char_modes[cm_i + j]) {
cur_costs[j] += (*eod_cost)(state, j);
}
}
}
/* Start new segment at the end to switch modes */
for (j = 0; j < num_modes; j++) { /* To mode */
for (k = 0; k < num_modes; k++) { /* From mode */
if (j != k && char_modes[cm_i + k]) {
unsigned int new_cost = cur_costs[k] + (*switch_cost)(state, k, j);
if (!char_modes[cm_i + j] || new_cost < cur_costs[j]) {
cur_costs[j] = new_cost;
char_modes[cm_i + j] = mode_types[k];
}
}
}
}
memcpy(prev_costs, cur_costs, num_modes * sizeof(unsigned int));
}
/* Find optimal ending mode */
min_cost = prev_costs[0];
cur_mode = mode_types[0];
for (i = 1; i < num_modes; i++) {
if (prev_costs[i] < min_cost) {
min_cost = prev_costs[i];
cur_mode = mode_types[i];
}
}
/* Get optimal mode for each code point by tracing backwards */
for (i = length - 1, cm_i = i * num_modes; i >= 0; i--, cm_i -= num_modes) {
j = strchr(mode_types, cur_mode) - mode_types;
cur_mode = char_modes[cm_i + j];
mode[i] = cur_mode;
}
if (debug & ZINT_DEBUG_PRINT) {
printf(" Mode: %.*s\n", (int)length, mode);
}
}

View File

@ -75,8 +75,15 @@ extern "C" {
extern int is_extendable(const int symbology);
extern int is_composite(const int symbology);
extern unsigned int decode_utf8(unsigned int* state, unsigned int* codep, const unsigned char byte);
extern int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int vals[], size_t *length);
extern int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[], size_t *length, int disallow_4byte);
extern void set_minimum_height(struct zint_symbol *symbol, const int min_height);
typedef unsigned int* (*pn_head_costs)(unsigned int state[]);
typedef unsigned int (*pn_switch_cost)(unsigned int state[], const int j, const int k);
typedef unsigned int (*pn_eod_cost)(unsigned int state[], const int k);
typedef void (*pn_cur_cost)(unsigned int state[], const unsigned int data[], const size_t length, const int i, char* char_modes, unsigned int prev_costs[], unsigned int cur_costs[]);
extern void pn_define_mode(char* mode, const unsigned int data[], const size_t length, const int debug,
unsigned int state[], const char mode_types[], const int num_modes, pn_head_costs head_costs, pn_switch_cost switch_cost, pn_eod_cost eod_cost, pn_cur_cost cur_cost);
#ifdef __cplusplus
}
#endif /* __cplusplus */

2956
backend/gb18030.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1499,23 +1499,17 @@ static const Summary16 gb2312_uni2indx_pageff[15] = {
{ 7441, 0x0000 }, { 7441, 0x0000 }, { 7441, 0x002b },
};
int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
int gb2312_wctomb_zint(unsigned int* r, unsigned int wc) {
const Summary16 *summary = NULL;
if (wc >= 0x0000 && wc < 0x0460) {
if (wc == 0x00b7) { /* ZINT: Patched to duplicate map to 0xA1A4 */
if (n < 2) {
return -1;
}
r[0] = 0xA1; r[1] = 0xA4;
*r = 0xA1A4;
return 2;
}
summary = &gb2312_uni2indx_page00[(wc>>4)];
} else if (wc >= 0x2000 && wc < 0x2650) {
if (wc == 0x2014) { /* ZINT: Patched to duplicate map to 0xA1AA */
if (n < 2) {
return -1;
}
r[0] = 0xA1; r[1] = 0xAA;
*r = 0xA1AA;
return 2;
}
summary = &gb2312_uni2indx_page20[(wc>>4)-0x200];
@ -1532,10 +1526,6 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
unsigned short used = summary->used;
unsigned int i = wc & 0x0f;
if (used & ((unsigned short) 1 << i)) {
unsigned short c;
if (n < 2) {
return -1;
}
/* Keep in 'used' only the bits 0..i-1. */
used &= ((unsigned short) 1 << i) - 1;
/* Add 'summary->indx' and the number of bits set in 'used'. */
@ -1543,8 +1533,7 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
used = (used & 0x3333) + ((used & 0xcccc) >> 2);
used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);
used = (used & 0x00ff) + (used >> 8);
c = gb2312_2charset[summary->indx + used];
r[0] = (c >> 8); r[1] = (c & 0xff);
*r = gb2312_2charset[summary->indx + used];
return 2;
}
}
@ -1553,16 +1542,15 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
/* Convert UTF-8 string to GB 2312 (EUC-CN) and place in array of ints */
int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* gbdata) {
int i, error_number, ret;
int i, error_number;
unsigned int length;
unsigned char buf[2];
#ifndef _MSC_VER
int utfdata[*p_length + 1]; /* Leave signed for the moment until `utf8toutf16()` signature changed */
unsigned int utfdata[*p_length + 1];
#else
int* utfdata = (int*) _alloca((*p_length + 1) * sizeof(int));
unsigned int* utfdata = (unsigned int*) _alloca((*p_length + 1) * sizeof(unsigned int));
#endif
error_number = utf8toutf16(symbol, source, utfdata, p_length);
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
if (error_number != 0) {
return error_number;
}
@ -1571,12 +1559,10 @@ int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], si
if (utfdata[i] < 0x80) {
gbdata[i] = utfdata[i];
} else {
ret = gb2312_wctomb_zint(buf, utfdata[i], 2);
if (ret != 2) {
if (!gb2312_wctomb_zint(gbdata + i, utfdata[i])) {
strcpy(symbol->errtxt, "810: Invalid character in input data");
return ZINT_ERROR_INVALID_DATA;
}
gbdata[i] = (buf[0] << 8) | buf[1];
}
}

View File

@ -37,7 +37,7 @@
extern "C" {
#endif /* __cplusplus */
int gb2312_wctomb_zint(unsigned char* r, unsigned int wc, size_t n);
int gb2312_wctomb_zint(unsigned int* r, unsigned int wc);
int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* gbdata);
int gb2312_utf8tosb(int eci, const unsigned char source[], size_t* p_length, unsigned int* gbdata);
void gb2312_cpy(const unsigned char source[], size_t* p_length, unsigned int* gbdata);

View File

@ -30,7 +30,7 @@
*/
/* vim: set ts=4 sw=4 et : */
/* This file impliments Grid Matrix as specified in
/* This file implements Grid Matrix as specified in
AIM Global Document Number AIMD014 Rev. 1.63 Revised 9 Dec 2008 */
#include <stdio.h>
@ -44,13 +44,15 @@
#include "gridmtx.h"
#include "gb2312.h"
/* define_mode() stuff */
/* Bits multiplied by this for costs, so as to be whole integer divisible by 2 and 3 */
#define GM_MULT 6
static char numeral_nondigits[] = " +-.,"; /* Non-digit numeral set, excluding EOL (carriage return/linefeed) */
/* Whether in numeral or not. If in numeral, *p_numeral_end is set to position after numeral, and *p_numeral_cost is set to per-numeral cost */
static int numeral_lat(unsigned int gbdata[], const size_t length, const int posn, int* p_numeral_end, int* p_numeral_cost) {
static int in_numeral(const unsigned int gbdata[], const size_t length, const int posn, unsigned int* p_numeral_end, unsigned int* p_numeral_cost) {
int i, nondigit, nondigit_posn, digit_cnt;
if (posn < *p_numeral_end) {
@ -82,7 +84,7 @@ static int numeral_lat(unsigned int gbdata[], const size_t length, const int pos
}
}
if (digit_cnt == 0) { /* Must have at least one digit */
*p_numeral_end = -1;
*p_numeral_end = 0;
return 0;
}
if (nondigit && nondigit_posn == i - 1) { /* Non-digit can't be at end */
@ -119,30 +121,18 @@ static int numeral_lat(unsigned int gbdata[], const size_t length, const int pos
#define GM_NUM_MODES 6
/* Calculate optimized encoding modes. Adapted from Project Nayuki */
/*
* Copyright (c) Project Nayuki. (MIT License)
* https://www.nayuki.io/page/qr-code-generator-library
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
* - The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*/
static void define_mode(unsigned int gbdata[], const size_t length, char* mode, int debug) {
static char mode_types[] = { GM_CHINESE, GM_NUMBER, GM_LOWER, GM_UPPER, GM_MIXED, GM_BYTE }; /* Must be in same order as GM_H etc */
/* Initial mode costs */
static unsigned int head_costs[GM_NUM_MODES] = {
/* H N (+pad prefix) L U M B (+byte count) */
/* Initial mode costs */
static unsigned int head_costs[GM_NUM_MODES] = {
/* H N (+pad prefix) L U M B (+byte count) */
4 * GM_MULT, (4 + 2) * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, (4 + 9) * GM_MULT
};
};
/* Costs of switching modes - see AIMD014 Rev. 1.63 Table 9 Type conversion codes */
static unsigned int* gm_head_costs(unsigned int state[]) {
return head_costs;
}
/* Cost of switching modes - see AIMD014 Rev. 1.63 Table 9 Type conversion codes */
static unsigned int gm_switch_cost(unsigned int state[], const int j, const int k) {
static unsigned int switch_costs[GM_NUM_MODES][GM_NUM_MODES] = {
/* H N L U M B */
/*H*/ { 0, (13 + 2) * GM_MULT, 13 * GM_MULT, 13 * GM_MULT, 13 * GM_MULT, (13 + 9) * GM_MULT },
@ -153,36 +143,26 @@ static void define_mode(unsigned int gbdata[], const size_t length, char* mode,
/*B*/ { 4 * GM_MULT, (4 + 2) * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, 0 },
};
/* Final end-of-data costs - see AIMD014 Rev. 1.63 Table 9 Type conversion codes */
return switch_costs[j][k];
}
/* Final end-of-data cost - see AIMD014 Rev. 1.63 Table 9 Type conversion codes */
static unsigned int gm_eod_cost(unsigned int state[], const int k) {
static unsigned int eod_costs[GM_NUM_MODES] = {
/* H N L U M B */
13 * GM_MULT, 10 * GM_MULT, 5 * GM_MULT, 5 * GM_MULT, 10 * GM_MULT, 4 * GM_MULT
};
unsigned int prev_costs[GM_NUM_MODES];
int i, j, k;
int byte_count = 0;
int numeral_end = -1, numeral_cost;
int cur_mode_index;
unsigned int min_cost;
return eod_costs[k];
}
/* char_modes[i][j] represents the mode to encode the code point at index i such that the final segment ends in mode_types[j] and the
* total number of bits is minimized over all possible choices */
#ifndef _MSC_VER
char char_modes[length][GM_NUM_MODES];
#else
char** char_modes = (char**) _alloca(length * GM_NUM_MODES);
#endif
memset(char_modes, 0, length * GM_NUM_MODES);
/* At the beginning of each iteration of the loop below, prev_costs[j] is the minimum number of 1/6 (1/GM_MULT) bits needed
* to encode the entire string prefix of length i, and end in mode_types[j] */
memcpy(prev_costs, head_costs, sizeof(head_costs));
/* Calculate costs using dynamic programming */
for (i = 0; i < length; i++) {
/* Calculate cost of encoding current character */
static void gm_cur_cost(unsigned int state[], const unsigned int gbdata[], const size_t length, const int i, char* char_modes, unsigned int prev_costs[], unsigned int cur_costs[]) {
int cm_row = i * GM_NUM_MODES;
int double_byte, space, numeric, lower, upper, control, double_digit, eol;
unsigned int cur_costs[GM_NUM_MODES] = { 0, 0, 0, 0, 0, 0 };
unsigned int* p_numeral_end = &state[0];
unsigned int* p_numeral_cost = &state[1];
unsigned int* p_byte_count = &state[2];
double_byte = gbdata[i] > 0xFF;
space = gbdata[i] == ' ';
@ -195,92 +175,55 @@ static void define_mode(unsigned int gbdata[], const size_t length, char* mode,
/* Hanzi mode can encode anything */
cur_costs[GM_H] = prev_costs[GM_H] + (double_digit || eol ? 39 : 78); /* (6.5 : 13) * GM_MULT */
char_modes[i][GM_H] = 'H';
char_modes[cm_row + GM_H] = GM_CHINESE;
/* Byte mode can encode anything */
if (byte_count == 512 || (double_byte && byte_count == 511)) {
if (*p_byte_count == 512 || (double_byte && *p_byte_count == 511)) {
cur_costs[GM_B] = head_costs[GM_B];
if (double_byte && byte_count == 511) {
if (double_byte && *p_byte_count == 511) {
cur_costs[GM_B] += 48; /* 8 * GM_MULT */
double_byte = 0; /* Splitting double-byte so mark as single */
}
byte_count = 0;
*p_byte_count = 0;
}
cur_costs[GM_B] += prev_costs[GM_B] + (double_byte ? 96 : 48); /* (16 : 8) * GM_MULT */
char_modes[i][GM_B] = 'B';
byte_count += double_byte ? 2 : 1;
char_modes[cm_row + GM_B] = GM_BYTE;
*p_byte_count += double_byte ? 2 : 1;
if (numeral_lat(gbdata, length, i, &numeral_end, &numeral_cost)) {
cur_costs[GM_N] = prev_costs[GM_N] + numeral_cost;
char_modes[i][GM_N] = 'N';
if (in_numeral(gbdata, length, i, p_numeral_end, p_numeral_cost)) {
cur_costs[GM_N] = prev_costs[GM_N] + *p_numeral_cost;
char_modes[cm_row + GM_N] = GM_NUMBER;
}
if (control) {
cur_costs[GM_L] = prev_costs[GM_L] + 78; /* (7 + 6) * GM_MULT */
char_modes[i][GM_L] = 'L';
char_modes[cm_row + GM_L] = GM_LOWER;
cur_costs[GM_U] = prev_costs[GM_U] + 78; /* (7 + 6) * GM_MULT */
char_modes[i][GM_U] = 'U';
char_modes[cm_row + GM_U] = GM_UPPER;
cur_costs[GM_M] = prev_costs[GM_M] + 96; /* (10 + 6) * GM_MULT */
char_modes[i][GM_M] = 'M';
char_modes[cm_row + GM_M] = GM_MIXED;
} else {
if (lower || space) {
cur_costs[GM_L] = prev_costs[GM_L] + 30; /* 5 * GM_MULT */
char_modes[i][GM_L] = 'L';
char_modes[cm_row + GM_L] = GM_LOWER;
}
if (upper || space) {
cur_costs[GM_U] = prev_costs[GM_U] + 30; /* 5 * GM_MULT */
char_modes[i][GM_U] = 'U';
char_modes[cm_row + GM_U] = GM_UPPER;
}
if (numeric || lower || upper || space) {
cur_costs[GM_M] = prev_costs[GM_M] + 36; /* 6 * GM_MULT */
char_modes[i][GM_M] = 'M';
char_modes[cm_row + GM_M] = GM_MIXED;
}
}
}
if (i == length - 1) { /* Add end of data costs if last character */
for (j = 0; j < GM_NUM_MODES; j++) {
if (char_modes[i][j]) {
cur_costs[j] += eod_costs[j];
}
}
}
/* Calculate optimized encoding modes */
static void define_mode(char* mode, const unsigned int gbdata[], const size_t length, const int debug) {
static char mode_types[] = { GM_CHINESE, GM_NUMBER, GM_LOWER, GM_UPPER, GM_MIXED, GM_BYTE }; /* Must be in same order as GM_H etc */
unsigned int state[3] = { 0 /*numeral_end*/, 0 /*numeral_cost*/, 0 /*byte_count*/ };
/* Start new segment at the end to switch modes */
for (j = 0; j < GM_NUM_MODES; j++) { /* To mode */
for (k = 0; k < GM_NUM_MODES; k++) { /* From mode */
if (j != k && char_modes[i][k]) {
unsigned int new_cost = cur_costs[k] + switch_costs[k][j];
if (!char_modes[i][j] || new_cost < cur_costs[j]) {
cur_costs[j] = new_cost;
char_modes[i][j] = mode_types[k];
}
}
}
}
memcpy(prev_costs, cur_costs, sizeof(cur_costs));
}
/* Find optimal ending mode */
cur_mode_index = 0;
min_cost = prev_costs[0];
for (i = 1; i < GM_NUM_MODES; i++) {
if (prev_costs[i] < min_cost) {
min_cost = prev_costs[i];
cur_mode_index = i;
}
}
/* Get optimal mode for each code point by tracing backwards */
for (i = length - 1; i >= 0; i--) {
char cur_mode = char_modes[i][cur_mode_index];
cur_mode_index = strchr(mode_types, cur_mode) - mode_types;
mode[i] = cur_mode;
}
if (debug & ZINT_DEBUG_PRINT) {
printf(" Mode: %.*s\n", (int)length, mode);
}
pn_define_mode(mode, gbdata, length, debug, state, mode_types, GM_NUM_MODES, gm_head_costs, gm_switch_cost, gm_eod_cost, gm_cur_cost);
}
/* Add the length indicator for byte encoded blocks */
@ -352,7 +295,7 @@ static int gm_encode(unsigned int gbdata[], const size_t length, char binary[],
}
}
define_mode(gbdata, length, mode, debug);
define_mode(mode, gbdata, length, debug);
do {
int next_mode = mode[sp];
@ -774,14 +717,11 @@ static int gm_encode(unsigned int gbdata[], const size_t length, char binary[],
}
static void gm_test_codeword_dump(struct zint_symbol *symbol, int* codewords, int length) {
int i, max, cnt_len;
if (length >= 33) {
int i, max = length, cnt_len = 0;
if (length > 33) {
sprintf(symbol->errtxt, "(%d) ", length); /* Place the number of codewords at the front */
cnt_len = strlen(symbol->errtxt);
max = 33 - (cnt_len + 2) / 3;
} else {
max = length > 33 ? 33 : length;
cnt_len = 0;
}
for (i = 0; i < max; i++) { /* 33*3 < errtxt 100 chars */
sprintf(symbol->errtxt + cnt_len + i * 3, "%02X ", codewords[i]);

File diff suppressed because it is too large Load Diff

View File

@ -51,6 +51,8 @@
#include <stdio.h> // only needed for debug (main)
#ifdef _MSC_VER
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#include "reedsol.h"
static int logmod; // 2**symsize - 1

View File

@ -62,32 +62,20 @@ extern int utf_to_eci(const int eci, const unsigned char source[], unsigned char
* JISX0201.1976-0 (libiconv-1.16/lib/jisx0201.h)
*/
static int jisx0201_wctomb(unsigned char* r, unsigned int wc, size_t n) {
static int jisx0201_wctomb(unsigned int* r, unsigned int wc) {
if (wc < 0x0080 && !(wc == 0x005c || wc == 0x007e)) {
if (n < 1) {
return -1;
}
*r = wc;
return 1;
}
if (wc == 0x00a5) {
if (n < 1) {
return -1;
}
*r = 0x5c;
return 1;
}
if (wc == 0x203e) {
if (n < 1) {
return -1;
}
*r = 0x7e;
return 1;
}
if (wc >= 0xff61 && wc < 0xffa0) {
if (n < 1) {
return -1;
}
*r = wc - 0xfec0;
return 1;
}
@ -1453,7 +1441,7 @@ static const Summary16 jisx0208_uni2indx_pageff[15] = {
{ 6877, 0x0000 }, { 6877, 0x0000 }, { 6877, 0x0028 },
};
static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
static int jisx0208_wctomb(unsigned int* r, unsigned int wc) {
const Summary16 *summary = NULL;
if (wc >= 0x0000 && wc < 0x0100) {
summary = &jisx0208_uni2indx_page00[(wc>>4)];
@ -1474,10 +1462,6 @@ static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
unsigned short used = summary->used;
unsigned int i = wc & 0x0f;
if (used & ((unsigned short) 1 << i)) {
unsigned short c;
if (n < 2) {
return -1;
}
/* Keep in 'used' only the bits 0..i-1. */
used &= ((unsigned short) 1 << i) - 1;
/* Add 'summary->indx' and the number of bits set in 'used'. */
@ -1485,8 +1469,7 @@ static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
used = (used & 0x3333) + ((used & 0xcccc) >> 2);
used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);
used = (used & 0x00ff) + (used >> 8);
c = jisx0208_2charset[summary->indx + used];
r[0] = (c >> 8); r[1] = (c & 0xff);
*r = jisx0208_2charset[summary->indx + used];
return 2;
}
}
@ -1497,19 +1480,19 @@ static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
* SHIFT_JIS (libiconv-1.16/lib/sjis.h)
*/
/* Returns 1 or 2 on success, 0 if no mapping, -1 if buffer too small */
int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n) {
/* Returns 1 or 2 on success, 0 if no mapping */
int sjis_wctomb_zint(unsigned int* r, unsigned int wc) {
int ret;
/* Try JIS X 0201-1976. */
ret = jisx0201_wctomb(r, wc, n);
ret = jisx0201_wctomb(r, wc);
if (ret) {
return ret;
}
/* Try JIS X 0208-1990. */
/* ZINT: Note leaving mapping of full-width reverse solidus U+FF3C to 0x815F (duplicate of patched U+005C) to avoid having to regen tables */
ret = jisx0208_wctomb(r, wc, n);
ret = jisx0208_wctomb(r, wc);
if (ret) {
return ret;
}
@ -1519,13 +1502,9 @@ int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n) {
/* ZINT: https://file.allitebooks.com/20160708/CJKV%20Information%20Processing.pdf (table 4-86, p. 286, 2nd ed.) */
if (wc >= 0xe000 && wc < 0xe758) {
unsigned char c1, c2;
if (n < 2) {
return -1;
}
c1 = (unsigned int) (wc - 0xe000) / 188;
c2 = (unsigned int) (wc - 0xe000) % 188;
r[0] = c1 + 0xf0;
r[1] = c2 < 0x3f ? c2 + 0x40 : c2 + 0x41;
*r = ((c1 + 0xf0) << 8) | (c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
return 2;
}
@ -1534,31 +1513,24 @@ int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n) {
/* Convert UTF-8 string to Shift JIS and place in array of ints */
int sjis_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* jisdata) {
int i, error_number, ret;
int i, error_number;
unsigned int length;
unsigned char buf[2];
#ifndef _MSC_VER
int utfdata[*p_length + 1]; /* Leave signed for the moment until `utf8toutf16()` signature changed */
unsigned int utfdata[*p_length + 1];
#else
int* utfdata = (int*) _alloca((*p_length + 1) * sizeof(int));
unsigned int* utfdata = (unsigned int*) _alloca((*p_length + 1) * sizeof(unsigned int));
#endif
error_number = utf8toutf16(symbol, source, utfdata, p_length);
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
if (error_number != 0) {
return error_number;
}
for (i = 0, length = *p_length; i < length; i++) {
ret = sjis_wctomb_zint(buf, utfdata[i], 2);
if (ret <= 0) {
if (!sjis_wctomb_zint(jisdata + i, utfdata[i])) {
strcpy(symbol->errtxt, "800: Invalid character in input data");
return ZINT_ERROR_INVALID_DATA;
}
if (ret == 1) {
jisdata[i] = buf[0];
} else {
jisdata[i] = (buf[0] << 8) | buf[1];
}
}
return 0;

View File

@ -37,7 +37,7 @@
extern "C" {
#endif /* __cplusplus */
int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n);
int sjis_wctomb_zint(unsigned int* r, unsigned int wc);
int sjis_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* jisdata);
int sjis_utf8tosb(int eci, const unsigned char source[], size_t* p_length, unsigned int* jisdata);
void sjis_cpy(const unsigned char source[], size_t* p_length, unsigned int* jisdata);

View File

@ -51,9 +51,11 @@ zint_add_test(common, test_common)
zint_add_test(composite, test_composite)
zint_add_test(dmatrix, test_dmatrix)
zint_add_test(eci, test_eci)
zint_add_test(gb18030, test_gb18030)
zint_add_test(gb2312, test_gb2312)
zint_add_test(gridmtx, test_gridmtx)
zint_add_test(gs1, test_gs1)
zint_add_test(hanxin, test_hanxin)
zint_add_test(imail, test_imail)
zint_add_test(library, test_library)
zint_add_test(mailmark, test_mailmark)

View File

@ -31,7 +31,7 @@
#include "testcommon.h"
static void test_utf8toutf16(void)
static void test_utf8_to_unicode(void)
{
testStart("");
@ -39,6 +39,7 @@ static void test_utf8toutf16(void)
struct item {
unsigned char* data;
int length;
int disallow_4byte;
int ret;
size_t ret_length;
int expected_vals[20];
@ -46,11 +47,11 @@ static void test_utf8toutf16(void)
};
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { "", -1, 0, 0, {}, "" },
/* 1*/ { "\000a\302\200\340\240\200", 7, 0, 4, { 0, 'a', 0x80, 0x800 }, "NUL a C280 E0A080" },
/* 2*/ { "\357\277\277", -1, 0, 1, { 0xFFFF }, "EFBFBF" },
/* 3*/ { "\360\220\200\200", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Four-byte F0908080" },
/* 4*/ { "a\200b", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Orphan continuation 0x80" },
/* 0*/ { "", -1, 1, 0, 0, {}, "" },
/* 1*/ { "\000a\302\200\340\240\200", 7, 1, 0, 4, { 0, 'a', 0x80, 0x800 }, "NUL a C280 E0A080" },
/* 2*/ { "\357\277\277", -1, 1, 0, 1, { 0xFFFF }, "EFBFBF" },
/* 3*/ { "\360\220\200\200", -1, 1, ZINT_ERROR_INVALID_DATA, -1, {}, "Four-byte F0908080" },
/* 4*/ { "a\200b", -1, 1, ZINT_ERROR_INVALID_DATA, -1, {}, "Orphan continuation 0x80" },
};
int data_size = sizeof(data) / sizeof(struct item);
@ -62,7 +63,7 @@ static void test_utf8toutf16(void)
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
ret = utf8toutf16(&symbol, data[i].data, vals, &ret_length);
ret = utf8_to_unicode(&symbol, data[i].data, vals, &ret_length, data[i].disallow_4byte);
assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
if (ret == 0) {
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %ld != %ld\n", i, ret_length, data[i].ret_length);
@ -77,7 +78,7 @@ static void test_utf8toutf16(void)
int main()
{
test_utf8toutf16();
test_utf8_to_unicode();
testReport();

View File

@ -0,0 +1,317 @@
/*
libzint - the open source barcode library
Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
/* vim: set ts=4 sw=4 et : */
#include "testcommon.h"
#include "test_gb18030_tab.h"
#include "../gb18030.h"
// As control convert to GB 18030 using table generated from GB18030.TXT plus simple processing.
// The version of GB18030.TXT is libiconv-1.11/GB18030.TXT taken from https://haible.de/bruno/charsets/conversion-tables/GB18030.html
// The generated file backend/tests/test_gb18030_tab.h does not include U+10000..10FFFF codepoints to save space.
// See also backend/tests/tools/data/GB18030.TXT.README and backend/tests/tools/gen_test_tab.php.
static int gb18030_wctomb_zint2(unsigned int* r1, unsigned int* r2, unsigned int wc)
{
unsigned int c;
// GB18030 two-byte extension (libiconv-1.16/lib/gb18030ext.h)
if (wc == 0x1E3F) { // GB 18030-2005 change, was PUA U+E7C7 below, see Table 3-39, p.111, Lunde 2nd ed.
*r1 = 0xA8BC;
return 2;
}
// GB18030 four-byte extension (libiconv-1.16/lib/gb18030uni.h)
if (wc == 0xE7C7) { // PUA
*r1 = 0x8135;
*r2 = 0xF437;
return 4;
}
// GB18030 two-byte extension (libiconv-1.16/lib/gb18030ext.h)
if (wc >= 0x9FB4 && wc <= 0x9FBB) { // GB 18030-2005 change, were PUA, see Table 3-37, p.108, Lunde 2nd ed.
if (wc == 0x9FB4) {
*r1 = 0xFE59;
} else if (wc == 0x9FB5) {
*r1 = 0xFE61;
} else if (wc == 0x9FB6 || wc == 0x9FB7) {
*r1 = 0xFE66 + (wc - 0x9FB6);
} else if (wc == 0x9FB8) {
*r1 = 0xFE6D;
} else if (wc == 0x9FB9) {
*r1 = 0xFE7E;
} else if (wc == 0x9FBA) {
*r1 = 0xFE90;
} else {
*r1 = 0xFEA0;
}
return 2;
}
// GB18030 two-byte extension (libiconv-1.16/lib/gb18030ext.h)
if (wc >= 0xFE10 && wc <= 0xFE19) { // GB 18030-2005 change, were PUA, see Table 3-37, p.108, Lunde 2nd ed.
if (wc == 0xFE10) {
*r1 = 0xA6D9;
} else if (wc == 0xFE11) {
*r1 = 0xA6DB;
} else if (wc == 0xFE12) {
*r1 = 0xA6DA;
} else if (wc >= 0xFE13 && wc <= 0xFE16) {
*r1 = 0xA6DC + (wc - 0xFE13);
} else if (wc == 0xFE17 || wc == 0xFE18) {
*r1 = 0xA6EC + (wc - 0xFE17);
} else {
*r1 = 0xA6F3;
}
return 2;
}
// GB18030 four-byte extension (libiconv-1.16/lib/gb18030uni.h)
if (wc >= 0xFE1A && wc <= 0xFE2F) { // These are Vertical Forms (U+FE1A..FE1F unassigned) and Combining Half Marks (U+FE20..FE2F)
if (wc >= 0xFE1A && wc <= 0xFE1D) {
c = 0x84318336 + (wc - 0xFE1A);
} else if (wc >= 0xFE1E && wc <= 0xFE27) {
c = 0x84318430 + (wc - 0xFE1E);
} else {
c = 0x84318530 + (wc - 0xFE28);
}
*r1 = c >> 16;
*r2 = c & 0xFFFF;
return 4;
}
// GB18030 (libiconv-1.16/lib/gb18030.h)
if (wc >= 0x10000 && wc < 0x10400) { // Code set 3 (Unicode U+10000..U+10FFFF)
c = wc - 0x10000;
*r1 = 0x9030;
*r2 = 0x8130 + (c % 10) + 0x100 * (c / 10);
return 4;
}
int tab_length = sizeof(test_gb18030_tab) / sizeof(unsigned int);
for (int i = test_gb18030_tab_ind[wc >> 12]; i < tab_length; i += 2) {
if (test_gb18030_tab[i + 1] == wc) {
c = test_gb18030_tab[i];
if (c <= 0xFFFF) {
*r1 = c;
return c <= 0xFF ? 1 : 2;
}
*r1 = c >> 16;
*r2 = c & 0xFFFF;
return 4;
}
}
return 0;
}
static void test_gb18030_wctomb_zint(void)
{
testStart("");
int ret, ret2;
unsigned int val1_1, val1_2, val2_1, val2_2;
for (unsigned int i = 0; i < 0x10400; i++) { // Don't bother with U+10400..U+10FFFF, programmatically filled
if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
continue;
}
val1_1 = val1_2 = val2_1 = val2_2 = 0;
ret = gb18030_wctomb_zint(&val1_1, &val1_2, i);
ret2 = gb18030_wctomb_zint2(&val2_1, &val2_2, i);
assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val1_1 0x%04X, val2_1 0x%04X, val1_2 0x%04X, val2_2 0x%04X\n", i, i, ret, ret2, val1_1, val2_1, val1_2, val2_2);
if (ret2) {
assert_equal(val1_1, val2_1, "i:%d 0x%04X val1_1 0x%04X != val2_1 0x%04X\n", i, i, val1_1, val2_1);
assert_equal(val1_2, val2_2, "i:%d 0x%04X val1_2 0x%04X != val2_2 0x%04X\n", i, i, val1_2, val2_2);
}
}
testFinish();
}
static void test_gb18030_utf8tomb(void)
{
testStart("");
int ret;
struct item {
unsigned char* data;
int length;
int ret;
size_t ret_length;
unsigned int expected_gbdata[20];
char* comment;
};
// é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA8A6, UTF-8 C3A9
// β U+03B2 in ISO 8859-7 Greek (but not other ISO 8859 or Win page), in GB 18030 0xA6C2, UTF-8 CEB2
// ¤ U+00A4 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA1E8, UTF-8 C2A4
// ¥ U+00A5 in ISO 8859-1 0xA5, in GB 18030 4-byte 0x81308436, UTF-8 C2A5
// ・ U+30FB katakana middle dot, not in any ISO or Win page, in GB 18030 0xA1A4, duplicate of mapping of U+00B7, UTF-8 E383BB
// · U+00B7 middle dot in ISO 8859-1 0xB7, in GB 18030 "GB 18030 subset" 0xA1A4, duplicate of mapping of U+30FB, UTF-8 C2B7
// ― U+2015 horizontal bar in ISO 8859-7 Greek and ISO 8859-10 Nordic, not in any Win page, in GB 18030 "GB18030.TXT" 0xA1AA, duplicate of mapping of U+2014, UTF-8 E28095
// — U+2014 em dash, not in any ISO, in Win 1250 and other Win, in GB 18030 "GB 18030 subset" 0xA1AA, duplicate of mapping of U+2015, UTF-8 E28094
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { "é", -1, 0, 1, { 0xA8A6 }, "" },
/* 1*/ { "β", -1, 0, 1, { 0xA6C2 }, "" },
/* 2*/ { "¤", -1, 0, 1, { 0xA1E8 }, "" },
/* 3*/ { "¥", -1, 0, 2, { 0x8130, 0x8436 }, "0x81308436" },
/* 4*/ { "", -1, 0, 2, { 0x8139, 0xA739 }, "" },
/* 5*/ { "·", -1, 0, 1, { 0xA1A4 }, "GB 18030 subset mapping" },
/* 6*/ { "", -1, 0, 1, { 0xA844 }, "GB18030.TXT mapping" },
/* 7*/ { "", -1, 0, 1, { 0xA1AA }, "GB 18030 subset mapping" },
/* 8*/ { "aβc・·—é—Z", -1, 0, 10, { 'a', 0xA6C2, 'c', 0x8139, 0xA739, 0xA1A4, 0xA1AA, 0xA8A6, 0xA1AA, 'Z' }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);
struct zint_symbol symbol;
unsigned int gbdata[20];
for (int i = 0; i < data_size; i++) {
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
ret = gb18030_utf8tomb(&symbol, data[i].data, &ret_length, gbdata);
assert_equal(ret, data[i].ret, "i:%d ret %d != %d (%s)\n", i, ret, data[i].ret, symbol.errtxt);
if (ret == 0) {
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
for (int j = 0; j < ret_length; j++) {
assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] 0x%04X != 0x%04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
}
}
}
testFinish();
}
static void test_gb18030_utf8tosb(void)
{
testStart("");
int ret;
struct item {
int eci;
unsigned char* data;
int length;
int ret;
size_t ret_length;
unsigned int expected_gbdata[20];
char* comment;
};
// é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win, in HANXIN Chinese mode first byte range 0x81..FE
// β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page)
// ¥ U+00A5 in ISO 8859-1 0xA5, in first byte range 0x81..FE
// ÿ U+00FF in ISO 8859-1 0xFF, outside first byte and second/third/fourth byte ranges
// @ U+0040 in ASCII 0x40, outside first byte range, in double-byte second byte range, outside quad-byte second/third/fourth byte ranges
// 9 U+0039 in ASCII 0x39, outside first byte range, outside double-byte second byte range and quad-byte third byte range, in quad-byte second/fourth byte ranges
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { 3, "é", -1, 0, 1, { 0xE9 }, "First byte in range but only one byte" },
/* 1*/ { 3, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Not in ECI 3 (ISO 8859-1)" },
/* 2*/ { 9, "β", -1, 0, 1, { 0xE2 }, "In ECI 9 (ISO 8859-7)" },
/* 3*/ { 3, "¥", -1, 0, 1, { 0xA5 }, "First byte in range but only one byte" },
/* 4*/ { 3, "¥é", -1, 0, 1, { 0xA5E9 }, "In double-byte range" },
/* 5*/ { 3, "¥ÿ", -1, 0, 2, { 0xA5, 0xFF }, "First byte in range but not second" },
/* 6*/ { 3, "¥9é9", -1, 0, 2, { 0xA539, 0xE939 }, "In quad-byte range" },
/* 7*/ { 3, "¥9", -1, 0, 2, { 0xA5, 0x39 }, "In quad-byte first/second range but only 2 bytes, not in double-byte range" },
/* 8*/ { 3, "¥9é", -1, 0, 3, { 0xA5, 0x39, 0xE9 }, "In quad-byte first/second/third range but only 3 bytes, no bytes in double-byte range" },
/* 9*/ { 3, "¥9é@", -1, 0, 3, { 0xA5, 0x39, 0xE940 }, "In quad-byte first/second/third range but not fourth, second 2 bytes in double-byte range" },
/* 10*/ { 3, "¥@é9", -1, 0, 3, { 0xA540, 0xE9, 0x39 }, "In quad-byte first/third/fourth range but not second, first 2 bytes in double-byte range" },
/* 11*/ { 3, "¥9@9", -1, 0, 4, { 0xA5, 0x39, 0x40, 0x39 }, "In quad-byte first/second/fourth range but not third, no bytes in double-byte range" },
/* 12*/ { 3, "é9éé¥9é@¥9é9¥9é0é@@¥¥é0é1", -1, 0, 15, { 0xE9, 0x39, 0xE9E9, 0xA5, 0x39, 0xE940, 0xA539, 0xE939, 0xA539, 0xE930, 0xE940, 0x40, 0xA5A5, 0xE930, 0xE931 }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);
unsigned int gbdata[20];
for (int i = 0; i < data_size; i++) {
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
ret = gb18030_utf8tosb(data[i].eci, data[i].data, &ret_length, gbdata);
assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
if (ret == 0) {
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
for (int j = 0; j < ret_length; j++) {
assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
}
}
}
testFinish();
}
static void test_gb18030_cpy(void)
{
testStart("");
int ret;
struct item {
unsigned char* data;
int length;
int ret;
size_t ret_length;
unsigned int expected_jisdata[20];
char* comment;
};
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { "\351", -1, 0, 1, { 0xE9 }, "In HANXIN Chinese mode first-byte range but only one byte" },
/* 1*/ { "\351\241", -1, 0, 1, { 0xE9A1 }, "In HANXIN Chinese range" },
/* 2*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
/* 3*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
/* 4*/ { "\241\240\241\376\367\376\367\377\2012\2013", -1, 0, 7, { 0xA1A0, 0xA1FE, 0xF7FE, 0xF7, 0xFF, 0x8132, 0x8133 }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);
unsigned int jisdata[40];
for (int i = 0; i < data_size; i++) {
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
size_t ret_length = length;
gb18030_cpy(data[i].data, &ret_length, jisdata);
assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
for (int j = 0; j < ret_length; j++) {
assert_equal(jisdata[j], data[i].expected_jisdata[j], "i:%d jisdata[%d] %04X != %04X\n", i, j, jisdata[j], data[i].expected_jisdata[j]);
}
}
testFinish();
}
int main()
{
test_gb18030_wctomb_zint();
test_gb18030_utf8tomb();
test_gb18030_utf8tosb();
test_gb18030_cpy();
testReport();
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -35,18 +35,16 @@
// As control convert to GB 2312 using simple table generated from unicode.org GB2312.TXT plus simple processing
// GB2312.TXT no longer on unicode.org site but available from https://haible.de/bruno/charsets/conversion-tables/GB2312.html
static int gb2312_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
static int gb2312_wctomb_zint2(unsigned int* r, unsigned int wc)
{
// Shortcut
if ((wc > 0x0451 && wc < 0x2015) || (wc > 0x3229 && wc < 0x4E00) || (wc > 0x9FA0 && wc < 0xFF01) || wc > 0xFFE5) {
return 0;
}
int tab_length = sizeof(test_gb2312_tab) / sizeof(unsigned short);
for (int i = 0; i < tab_length; i += 2) {
int tab_length = sizeof(test_gb2312_tab) / sizeof(unsigned int);
for (int i = test_gb2312_tab_ind[wc >> 12]; i < tab_length; i += 2) {
if (test_gb2312_tab[i + 1] == wc) {
unsigned short c = test_gb2312_tab[i] + 0x8080; // Table in GB 2312 not EUC-CN
r[0] = (c >> 8);
r[1] = c & 0xFF;
*r = test_gb2312_tab[i] + 0x8080; // Table in GB 2312 not EUC-CN
return 2;
}
}
@ -68,11 +66,9 @@ static void test_gb2312_wctomb_zint(void)
if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
continue;
}
buf[0] = buf[1] = buf2[0] = buf2[1] = 0;
ret = gb2312_wctomb_zint(buf, i, 2);
val = ret == 1 ? buf[0] : (buf[0] << 8) | buf[1];
ret2 = gb2312_wctomb_zint2(buf2, i, 2);
val2 = ret2 == 1 ? buf2[0] : (buf2[0] << 8) | buf2[1];
val = val2 = 0;
ret = gb2312_wctomb_zint(&val, i);
ret2 = gb2312_wctomb_zint2(&val2, i);
if (i == 0xB7) { // Extra mapping middle dot U+00B7 to 0xA1A4, duplicate of U+30FB (Katakana middle dot)
assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val 0x%04X\n", i, i, ret, val);
assert_equal(val, 0xA1A4, "i:%d 0x%04X val 0x%04X != 0xA1A4\n", i, i, val);
@ -227,9 +223,9 @@ static void test_gb2312_cpy(void)
struct item data[] = {
/* 0*/ { "\351", -1, 0, 1, { 0xE9 }, "In GRIDMATRIX Chinese mode first-byte range but only one byte" },
/* 1*/ { "\351\241", -1, 0, 1, { 0xE9A1 }, "In GRIDMATRIX Chinese range" },
/* 0*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
/* 0*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
/* 0*/ { "\241\240\241\376\367\376\367\377", -1, 0, 6, { 0xA1, 0xA0, 0xA1FE, 0xF7FE, 0xF7, 0xFF }, "" },
/* 2*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
/* 3*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
/* 4*/ { "\241\240\241\376\367\376\367\377", -1, 0, 6, { 0xA1, 0xA0, 0xA1FE, 0xF7FE, 0xF7, 0xFF }, "" },
};
int data_size = sizeof(data) / sizeof(struct item);

View File

@ -1,5 +1,5 @@
/* Generated by gen_test_tab.php from GB2312.TXT */
static const unsigned short test_gb2312_tab[] = {
static const unsigned int test_gb2312_tab[] = {
0x2168, 0x00A4,
0x216C, 0x00A7,
0x2127, 0x00A8,
@ -7446,3 +7446,22 @@ static const unsigned short test_gb2312_tab[] = {
0x237E, 0xFFE3,
0x2324, 0xFFE5,
};
static const unsigned int test_gb2312_tab_ind[] = {
0,
298,
298,
694,
1168,
1708,
4686,
7508,
9962,
12638,
14694,
14694,
14694,
14694,
14694,
14694,
};

View File

@ -180,7 +180,7 @@ static void test_input(void)
#ifdef TEST_INPUT_GENERATE_EXPECTED
printf(" /*%3d*/ { %s, %d, \"%s\", %s, %d, \"%s\", \"%s\" },\n",
i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
ret < 5 ? symbol->eci : -1, symbol->errtxt, data[i].comment);
#else
if (ret < 5) {

409
backend/tests/test_hanxin.c Normal file
View File

@ -0,0 +1,409 @@
/*
libzint - the open source barcode library
Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
/* vim: set ts=4 sw=4 et : */
#include "testcommon.h"
#define TEST_INPUT_GENERATE_EXPECTED 1
//#define TEST_ENCODE_GENERATE_EXPECTED 1
static void test_options(void)
{
testStart("");
int ret;
struct item {
unsigned char* data;
int option_1;
int option_2;
int ret_encode;
int ret_vector;
int expected_size;
};
// s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
struct item data[] = {
/* 0*/ { "12345", -1, -1, 0, 0, 23 }, // Default version 1, ECC auto-set to 4
/* 1*/ { "12345", 1, -1, 0, 0, 23 },
/* 2*/ { "12345", -1, 2, 0, 0, 25 },
/* 3*/ { "12345", -1, 85, 0, 0, 23 }, // Version > max version 85 so ignored
/* 4*/ { "12345", -1, 84, 0, 0, 189 },
/* 5*/ { "1234567890123456789012345678901234567890123", 1, 1, ZINT_ERROR_TOO_LONG, -1, -1 },
/* 6*/ { "1234567890123456", 4, 1, ZINT_ERROR_TOO_LONG, -1, -1 },
/* 7*/ { "12345678901234567", 4, 2, 0, 0, 25 },
/* 8*/ { "12345678901234567", 4, -1, 0, 0, 25 }, // Version auto-set to 2
/* 9*/ { "12345678901234567", -1, -1, 0, 0, 23 }, // Version auto-set to 1, ECC auto-set to 3
/* 10*/ { "12345678901234567", 5, -1, 0, 0, 23 }, // ECC > max ECC 4 so ignored and auto-settings version 1, ECC 3 used
/* 11*/ { "1234567890123456789012345678901234567890123", -1, -1, 0, 0, 25 }, // Version auto-set to 2, ECC auto-set to 2
};
int data_size = sizeof(data) / sizeof(struct item);
for (int i = 0; i < data_size; i++) {
struct zint_symbol* symbol = ZBarcode_Create();
assert_nonnull(symbol, "Symbol not created\n");
symbol->symbology = BARCODE_HANXIN;
if (data[i].option_1 != -1) {
symbol->option_1 = data[i].option_1;
}
if (data[i].option_2 != -1) {
symbol->option_2 = data[i].option_2;
}
int length = strlen(data[i].data);
ret = ZBarcode_Encode(symbol, data[i].data, length);
assert_equal(ret, data[i].ret_encode, "i:%d ZBarcode_Encode ret %d != %d (%s)\n", i, ret, data[i].ret_encode, symbol->errtxt);
if (data[i].ret_vector != -1) {
ret = ZBarcode_Buffer_Vector(symbol, 0);
assert_equal(ret, data[i].ret_vector, "i:%d ZBarcode_Buffer_Vector ret %d != %d\n", i, ret, data[i].ret_vector);
assert_equal(symbol->width, data[i].expected_size, "i:%d symbol->width %d != %d\n", i, symbol->width, data[i].expected_size);
assert_equal(symbol->rows, data[i].expected_size, "i:%d symbol->rows %d != %d\n", i, symbol->rows, data[i].expected_size);
}
ZBarcode_Delete(symbol);
}
testFinish();
}
static void test_input(void)
{
testStart("");
int ret;
struct item {
int input_mode;
int eci;
unsigned char* data;
int length;
int ret;
int expected_eci;
char* expected;
char* comment;
};
// é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA8A6, UTF-8 C3A9
// β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page), in GB 18030 0xA6C2, UTF-8 CEB2
// ÿ U+00FF in ISO 8859-1 0xFF, not in GB 18030, outside first byte and second byte range, UTF-8 C3BF
// 啊 U+554A GB 18030 Region One 0xB0A1, UTF-8 E5958A
// 亍 U+4E8D GB 18030 Region Two 0xD8A1, UTF-8 E4BA8D
// 齄 U+9F44 GB 18030 Region Two 0xF7FE, UTF-8 E9BD84
// 丂 U+4E02 GB 18030 2-byte Region 0x8140, UTF-8 E4B882
// PAD U+0080 GB 18030 4-byte Region 0x81308130, UTF-8 C280 (\302\200)
// <20> (REPLACEMENT CHARACTER) U+FFFD GB 18030 4-byte Region 0x81308130, UTF-8 EFBFBD (\357\277\275)
struct item data[] = {
/* 0*/ { UNICODE_MODE, 0, "é", -1, 0, 0, "30 00 F4 80 00 00 00 00 00", "B1 (ISO 8859-1)" },
/* 1*/ { UNICODE_MODE, 3, "é", -1, 0, 3, "80 33 00 0F 48 00 00 00 00", "ECI-3 B1 (ISO 8859-1)" },
/* 2*/ { UNICODE_MODE, 29, "é", -1, 0, 29, "81 D4 FC FF FF 00 00 00 00", "ECI-29 H(1)1 (GB 18030) (Region One)" },
/* 3*/ { UNICODE_MODE, 26, "é", -1, 0, 26, "81 A4 70 2F FF 00 00 00 00", "ECI-26 H(1)1 (UTF-8) (Region One)" },
/* 4*/ { DATA_MODE, 0, "é", -1, 0, 0, "47 02 FF F0 00 00 00 00 00", "H(1)1 (UTF-8) (Region One)" },
/* 5*/ { DATA_MODE, 0, "\351", -1, 0, 0, "30 00 F4 80 00 00 00 00 00", "B1 (ISO 8859-1) (0xE9)" },
/* 6*/ { UNICODE_MODE, 0, "β", -1, 0, 0, "30 01 53 61 00 00 00 00 00", "B2 (GB 18030) (2-byte Region)" },
/* 7*/ { UNICODE_MODE, 9, "β", -1, 0, 9, "80 93 00 0F 10 00 00 00 00", "ECI-9 B1 (ISO 8859-7)" },
/* 8*/ { UNICODE_MODE, 29, "β", -1, 0, 29, "81 D3 00 15 36 10 00 00 00", "ECI-29 B2 (GB 18030) (2-byte Region)" },
/* 9*/ { UNICODE_MODE, 26, "β", -1, 0, 26, "81 A4 B1 5F FF 00 00 00 00", "ECI-26 H(1)1 (UTF-8) (Region One)" },
/* 10*/ { DATA_MODE, 0, "β", -1, 0, 0, "4B 15 FF F0 00 00 00 00 00", "H(1)1 (UTF-8) (Region One)" },
/* 11*/ { UNICODE_MODE, 0, "ÿ", -1, 0, 0, "30 00 FF 80 00 00 00 00 00", "B1 (ISO 8859-1)" },
/* 12*/ { UNICODE_MODE, 0, "ÿÿÿ", -1, 0, 0, "30 01 FF FF FF 80 00 00 00", "B3 (ISO 8859-1)" },
/* 13*/ { UNICODE_MODE, 0, "\302\200", -1, 0, 0, "70 00 00 00 00 00 00 00 00", "H(f)1 (GB 18030) (4-byte Region)" },
/* 14*/ { UNICODE_MODE, 0, "\302\200<EFBFBD>", -1, 0, 0, "70 00 00 38 26 7E 40 00 00", "H(f)2 (GB 18030) (both 4-byte Region)" },
/* 15*/ { UNICODE_MODE, 0, "啊亍齄丂\302\200", -1, 0, 0, "64 68 50 3C AC 28 80 00 FF FE E0 00 00 00 00 00 00", "H(d)4 H(f)1 (GB 18030)" },
/* 16*/ { DATA_MODE, 0, "\177\177", -1, 0, 0, "2F BD F7 F0 00 00 00 00 00", "T2 (ASCII)" },
/* 17*/ { DATA_MODE, 0, "\177\177\177", -1, 0, 0, "2F BD F7 DF C0 00 00 00 00", "T3 (ASCII)" },
/* 18*/ { UNICODE_MODE, 0, "123", -1, 0, 0, "11 EF FF 00 00 00 00 00 00", "N3 (ASCII)" },
/* 19*/ { UNICODE_MODE, 0, "12345", -1, 0, 0, "11 EC 2D FF 80 00 00 00 00", "N5 (ASCII)" },
/* 20*/ { UNICODE_MODE, 0, "Aa%$Bb9", -1, 0, 0, "22 A4 FA 18 3E 2E 52 7F 00", "T7 (ASCII)" },
/* 21*/ { UNICODE_MODE, 0, "Summer Palace Ticket for 6 June 2015 13:00;2015年6月6日夜01時00分PM頤和園のチケット;2015년6월6일13시오후여름궁전티켓.2015年6月6号下午13:00的颐和园门票;", -1, 0, 0, "(189) 27 38 C3 0A 35 F9 CF 99 92 F9 26 A3 E7 3E 76 C9 AE A3 7F CC 08 04 0C CD EE 44 06 C4 09 4E AD", "T20 B64 N4 H(f)1 T1 H(f)1 T1 H(f)1 T2 H(f)9 B35 (GB 18030)" },
/* 22*/ { UNICODE_MODE, 0, "\000\014\033 #/059:<@AMZ", 15, 0, 0, "2F 80 31 B7 1F AF E0 05 27 EB 2E CB E2 96 8F F0 00", "T15 (ASCII)" },
/* 23*/ { UNICODE_MODE, 0, "Z[\\`alz{~\177", -1, 0, 0, "28 FE CF 4E 3E 92 FF 7E E7 CF 7F 00 00", "T10 (ASCII)" },
};
int data_size = sizeof(data) / sizeof(struct item);
char escaped[1024];
for (int i = 0; i < data_size; i++) {
struct zint_symbol* symbol = ZBarcode_Create();
assert_nonnull(symbol, "Symbol not created\n");
symbol->symbology = BARCODE_HANXIN;
symbol->input_mode = data[i].input_mode;
symbol->eci = data[i].eci;
symbol->debug = ZINT_DEBUG_TEST; // Needed to get codeword dump in errtxt
int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
ret = ZBarcode_Encode(symbol, data[i].data, length);
assert_equal(ret, data[i].ret, "i:%d ZBarcode_Encode ret %d != %d (%s)\n", i, ret, data[i].ret, symbol->errtxt);
#ifdef TEST_INPUT_GENERATE_EXPECTED
printf(" /*%3d*/ { %s, %d, \"%s\", %s, %d, \"%s\", \"%s\" },\n",
i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
ret < 5 ? symbol->eci : -1, symbol->errtxt, data[i].comment);
#else
if (ret < 5) {
assert_equal(symbol->eci, data[i].expected_eci, "i:%d eci %d != %d\n", i, symbol->eci, data[i].expected_eci);
assert_zero(strcmp(symbol->errtxt, data[i].expected), "i:%d strcmp(%s, %s) != 0\n", i, symbol->errtxt, data[i].expected);
}
#endif
ZBarcode_Delete(symbol);
}
testFinish();
}
static void test_encode(void)
{
testStart("");
int ret;
struct item {
unsigned char* data;
int input_mode;
int option_1;
int option_2;
int ret;
int expected_rows;
int expected_width;
char* comment;
char* expected;
};
struct item data[] = {
/* 0*/ { "1234", UNICODE_MODE, -1, -1, 0, 23, 23, "",
"11111110101000101111111"
"10000000010001000000001"
"10111110101110001111101"
"10100000101011000000101"
"10101110010011101110101"
"10101110101111101110101"
"10101110011010001110101"
"00000000110001100000000"
"00010101111011001010101"
"01001010101101000111110"
"10101101001010101010100"
"11011101010100100010010"
"10101010010011101010100"
"01010100111110000011110"
"10101010001011110101000"
"00000000110111100000000"
"11111110011001001110101"
"00000010110000101110101"
"11111010101110001110101"
"00001010010110100000101"
"11101010001100101111101"
"11101010000011000000001"
"11101010101010101111111"
},
/* 1*/ { "1234567890", UNICODE_MODE, 1, 1, 0, 23, 23, "**NOT SAME** as ISO 20830 Draft K.1 Example 1, uses masking pattern 2 instead of pattern 1; however matches pattern 2 example (excluding Function Info)",
"11111110001011101111111"
"10000000100000000000001"
"10111110011111001111101"
"10100000000111000000101"
"10101110011110101110101"
"10101110011111101110101"
"10101110101111101110101"
"00000000001100000000000"
"00010101011111010010101"
"01010001100001001001101"
"00100100100100101011100"
"11111000010111111111111"
"01001001001001001001000"
"11110010100100110010000"
"10101001011111010101000"
"00000000001000000000000"
"11111110110000101110101"
"00000010111111001110101"
"11111010101001001110101"
"00001010000011000000101"
"11101010010111001111101"
"11101010001001100000001"
"11101010100100001111111"
},
/* 2*/ { "1234567890ABCDEFGabcdefg,Han Xin Code", UNICODE_MODE, 3, 10, 0, 41, 41, "Same as ISO 20830 Draft K.2 Example 2 (happens to use same mask pattern, 2)",
"11111110001011000000100000010101101111111"
"10000000001011110010000000011010100000001"
"10111110111111111010111011101111101111101"
"10100000101001001001001001001001100000101"
"10101110000100100101010010010111001110101"
"10101110010111110111111011101100101110101"
"10101110101111001001011101110011001110101"
"00000000011001100100100100100100100000000"
"00011110111111111111111001101111110010101"
"10110011011100110010001001100000001001001"
"11001100100100100100100100100100100100100"
"11111111001110101101011011110011011110110"
"10100001001001000001100001010010001001001"
"10100100100100100100100101100100001110011"
"10011111100010110000001111111111101000001"
"10110000001001001001001001001001001001001"
"10101011011110000011001101111001100100100"
"11111111000110010101111011111111111111111"
"11001001001001000110011001110111100000010"
"10111110100100100100100110001110011011000"
"11111111111111111111111111111111111111101"
"00000000000000000000101001001001001011000"
"01110110101001001010101000010110100100100"
"11111111101110010010100011111111111111111"
"01001001001001001100110001101111011101011"
"00101111100100100100100100001101001101110"
"11111111111111111110111111111111111001111"
"10010110100000101110111010011001001001001"
"00101001101100100010100100100100100100100"
"11111111111110110000111000110000110000101"
"00000001001001001000101011011100000100101"
"00100100100100100100100100100100101011111"
"10101001100101110100111011100010101111000"
"00000000110001001000101001001111000000000"
"11111110001011100100100100100100101110101"
"00000010111111111110111111111111001110101"
"11111010001001000000111111111011001110101"
"00001010100101111000101010100100100000101"
"11101010111111111100101100001111101111101"
"11101010101001001000101001001001000000001"
"11101010100100100100111111111111001111111"
},
/* 3*/ { "Summer Palace Ticket for 6 June 2015 13:00;2015年6月6日夜01時00分PM頤和園のチケット;2015년6월6일13시오후여름궁전티켓.2015年6月6号下午13:00的颐和园门票;", UNICODE_MODE, 2, 17, 0, 55, 55, "**NOT SAME** as ISO 20830 Draft K.3 Example 3, different encoding modes; if same encoding modes forced, uses masking pattern 1 instead of pattern 2, but matches pattern 1 example (excluding Function Info)",
"1111111001111111111011100100110101101010101100101111111"
"1000000000000000001100011000011001000010101111100000001"
"1011111011110010101110010110100000111010101101101111101"
"1010000001010100001101011100001101100100010100000000101"
"1010111000011011001111001000010010110010101010001110101"
"1010111011010101001101010100001010011001000110001110101"
"1010111001101001001001110010001001100100001001001110101"
"0000000011100111101101111010001010001100110011000000000"
"0010010101010100001100111100101010101111010001101010101"
"1111111011101110101000110010100010000101010101010101010"
"1010100111011011001101110110100101100011101000111110110"
"0011100111010001101001111011100001001111110010000011001"
"0011000100100010101011000001101101010000001010011010000"
"1100111101010101001101010101010100010100001110110101000"
"0000000100111001001010101100101100000001011111001110100"
"1101000010010001001101110001010101101100101110001110111"
"0101010101011100001110010001111110101010101010101010101"
"0001011000101000101011010011111000010010000011110101100"
"1001000100000110001111111111111111111000011101001110001"
"0010110011110110100000000000000000001101011101001100000"
"1111111100000100100000111011111001001111011001011100101"
"1101010101010101010101010100111011001110101010100111101"
"1101010011001001100110000001001000101000001011111110000"
"1000111001010111001010111111101100101101010000111001101"
"1110101100000011001001001011001010101010101010101010101"
"1101001110000100101100101011001100001001110111011001000"
"1001101010000000001000010010101100001011101001110010101"
"1101001100101100100011100010110000101101110100110010110"
"1000001010101010101010101010111101001010101100011001100"
"1101000101010110010101100101011000101001001000001000001"
"1010011101101101010001000111011011101011111010101111001"
"1101101100100000010010000011011001001101010101010101010"
"1010111000110111100101100011101010001010001001101110011"
"1100110000001101010011010000001011101011110011010101001"
"1011101111011001101010110111101100101100110001101100101"
"1000110111010101010101010101010100001101011110111010101"
"1111111111111111111110001011001010101111111111111111111"
"0000000000000000001110000010010000000000000000000000001"
"0100011110001011001010000111010001001101001001010010101"
"0101011111000100101000110110001100101111110100110101001"
"0100011010101010101010100110110101100111011001011000101"
"1000110011110001101000011011111101100010100001110000101"
"1111001110101101101000100011011010001010011000010000001"
"0001110000001011001100000011111101010101010101010101001"
"1010011101001101001101011101000010010100010000001110101"
"1101100101011001101001000100010000001101111001000111001"
"1010101010110101101010010110010011001111101011010100100"
"0000000000000100001001100011100100010101010101100000000"
"1111111000101010101010101110111100111011011111001110101"
"0000001000101011001010000100010101001110001111101110101"
"1111101000100001101101000001100001001011101001001110101"
"0000101001000100101010110010010101010110011001000000101"
"1110101010111000101011111101011101111100001110101111101"
"1110101011010101001101010101010101000000001010000000001"
"1110101011010001001111111111111111101000001110001111111"
},
};
int data_size = sizeof(data) / sizeof(struct item);
for (int i = 0; i < data_size; i++) {
struct zint_symbol* symbol = ZBarcode_Create();
assert_nonnull(symbol, "Symbol not created\n");
symbol->symbology = BARCODE_HANXIN;
symbol->input_mode = data[i].input_mode;
if (data[i].option_1 != -1) {
symbol->option_1 = data[i].option_1;
}
if (data[i].option_2 != -1) {
symbol->option_2 = data[i].option_2;
}
int length = strlen(data[i].data);
ret = ZBarcode_Encode(symbol, data[i].data, length);
assert_equal(ret, data[i].ret, "i:%d ZBarcode_Encode ret %d != %d (%s)\n", i, ret, data[i].ret, symbol->errtxt);
#ifdef TEST_ENCODE_GENERATE_EXPECTED
printf(" /*%3d*/ { \"%s\", %s, %d, %d, %s, %d, %d, \"%s\",\n",
i, data[i].data, testUtilInputModeName(data[i].input_mode), data[i].option_1, data[i].option_2, testUtilErrorName(data[i].ret),
symbol->rows, symbol->width, data[i].comment);
testUtilModulesDump(symbol, " ", "\n");
printf(" },\n");
#else
if (ret < 5) {
assert_equal(symbol->rows, data[i].expected_rows, "i:%d symbol->rows %d != %d (%s)\n", i, symbol->rows, data[i].expected_rows, data[i].data);
assert_equal(symbol->width, data[i].expected_width, "i:%d symbol->width %d != %d (%s)\n", i, symbol->width, data[i].expected_width, data[i].data);
if (ret == 0) {
int width, row;
ret = testUtilModulesCmp(symbol, data[i].expected, &width, &row);
assert_zero(ret, "i:%d testUtilModulesCmp ret %d != 0 width %d row %d (%s)\n", i, ret, width, row, data[i].data);
}
}
#endif
ZBarcode_Delete(symbol);
}
testFinish();
}
int main()
{
test_options();
test_input();
test_encode();
testReport();
return 0;
}

View File

@ -107,7 +107,7 @@ static void test_best_supported_set(void)
#ifdef TEST_GENERATE_EXPECTED
printf(" /*%2d*/ { %s, \"%s\", %d, %.0f, %.0f, %d, %d, %d, \"%s\",\n",
i, testUtilBarcodeName(data[i].symbology), testUtilEscape(data[i].data, escaped_data, sizeof(escaped_data)), ret,
i, testUtilBarcodeName(data[i].symbology), testUtilEscape(data[i].data, length, escaped_data, sizeof(escaped_data)), ret,
data[i].w, data[i].h, data[i].ret_vector, symbol->rows, symbol->width, data[i].comment);
testUtilModulesDump(symbol, " ", "\n");
printf(" },\n");

View File

@ -255,7 +255,7 @@ static void test_qr_input(void)
#ifdef TEST_QR_INPUT_GENERATE_EXPECTED
printf(" /*%3d*/ { %s, %d, \"%s\", %s, %d, \"%s\", \"%s\" },\n",
i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
ret < 5 ? symbol->eci : -1, symbol->errtxt, data[i].comment);
#else
if (ret < 5) {
@ -348,7 +348,7 @@ static void test_microqr_input(void)
#ifdef TEST_MICROQR_INPUT_GENERATE_EXPECTED
printf(" /*%3d*/ { %s, \"%s\", %s, \"%s\", \"%s\" },\n",
i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
symbol->errtxt, data[i].comment);
#else
if (ret < 5) {
@ -404,7 +404,7 @@ static void test_upnqr_input(void)
#ifdef TEST_UPNQR_INPUT_GENERATE_EXPECTED
printf(" /*%3d*/ { %s, \"%s\", %s, \"%s\", \"%s\" },\n",
i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
symbol->errtxt, data[i].comment);
#else
if (ret < 5) {

View File

@ -34,10 +34,10 @@
#include "../sjis.h"
// As control convert to Shift JIS using simple table generated from https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT plus simple processing
static int sjis_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
static int sjis_wctomb_zint2(unsigned int* r, unsigned int wc)
{
if (wc < 0x20 || wc == 0x7F) {
r[0] = wc;
*r = wc;
return 1;
}
// Shortcut
@ -45,43 +45,34 @@ static int sjis_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
return 0;
}
if (wc >= 0xE000 && wc <= 0xE757) { // PUA mappings, not in SHIFTJIS.TXT
unsigned short c;
if (wc <= 0xE0BB) {
c = wc - 0xE000 + 0xF040 + (wc >= 0xE000 + 0x3F);
*r = wc - 0xE000 + 0xF040 + (wc >= 0xE000 + 0x3F);
} else if (wc <= 0xE177) {
c = wc - 0xE0BC + 0xF140 + (wc >= 0xE0BC + 0x3F);
*r = wc - 0xE0BC + 0xF140 + (wc >= 0xE0BC + 0x3F);
} else if (wc <= 0xE233) {
c = wc - 0xE178 + 0xF240 + (wc >= 0xE178 + 0x3F);
*r = wc - 0xE178 + 0xF240 + (wc >= 0xE178 + 0x3F);
} else if (wc <= 0xE2EF) {
c = wc - 0xE234 + 0xF340 + (wc >= 0xE234 + 0x3F);
*r = wc - 0xE234 + 0xF340 + (wc >= 0xE234 + 0x3F);
} else if (wc <= 0xE3AB) {
c = wc - 0xE2F0 + 0xF440 + (wc >= 0xE2F0 + 0x3F);
*r = wc - 0xE2F0 + 0xF440 + (wc >= 0xE2F0 + 0x3F);
} else if (wc <= 0xE467) {
c = wc - 0xE3AC + 0xF540 + (wc >= 0xE3AC + 0x3F);
*r = wc - 0xE3AC + 0xF540 + (wc >= 0xE3AC + 0x3F);
} else if (wc <= 0xE523) {
c = wc - 0xE468 + 0xF640 + (wc >= 0xE468 + 0x3F);
*r = wc - 0xE468 + 0xF640 + (wc >= 0xE468 + 0x3F);
} else if (wc <= 0xE5DF) {
c = wc - 0xE524 + 0xF740 + (wc >= 0xE524 + 0x3F);
*r = wc - 0xE524 + 0xF740 + (wc >= 0xE524 + 0x3F);
} else if (wc <= 0xE69B) {
c = wc - 0xE5E0 + 0xF840 + (wc >= 0xE5E0 + 0x3F);
*r = wc - 0xE5E0 + 0xF840 + (wc >= 0xE5E0 + 0x3F);
} else {
c = wc - 0xE69C + 0xF940 + (wc >= 0xE69C + 0x3F);
*r = wc - 0xE69C + 0xF940 + (wc >= 0xE69C + 0x3F);
}
r[0] = (c >> 8);
r[1] = c & 0xFF;
return 2;
}
int tab_length = sizeof(test_sjis_tab) / sizeof(unsigned short);
for (int i = 0; i < tab_length; i += 2) {
int tab_length = sizeof(test_sjis_tab) / sizeof(unsigned int);
for (int i = test_sjis_tab_ind[wc >> 12]; i < tab_length; i += 2) {
if (test_sjis_tab[i + 1] == wc) {
unsigned short c = test_sjis_tab[i];
if (c < 0xFF) {
r[0] = c;
return 1;
}
r[0] = (c >> 8);
r[1] = c & 0xFF;
return 2;
*r = test_sjis_tab[i];
return *r > 0xFF ? 2 : 1;
}
}
return 0;
@ -92,18 +83,15 @@ static void test_sjis_wctomb_zint(void)
testStart("");
int ret, ret2;
unsigned char buf[2], buf2[2];
unsigned int val, val2;
for (unsigned int i = 0; i < 0xFFFE; i++) {
if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
continue;
}
buf[0] = buf[1] = buf2[0] = buf2[1] = 0;
ret = sjis_wctomb_zint(buf, i, 2);
val = ret == 1 ? buf[0] : (buf[0] << 8) | buf[1];
ret2 = sjis_wctomb_zint2(buf2, i, 2);
val2 = ret2 == 1 ? buf2[0] : (buf2[0] << 8) | buf2[1];
val = val2 = 0;
ret = sjis_wctomb_zint(&val, i);
ret2 = sjis_wctomb_zint2(&val2, i);
if (i == 0xFF3C) { // Extra mapping full-width reverse solidus U+FF3C to 0x815F, duplicate of U+005C (backslash)
assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val 0x%04X\n", i, i, ret, val);
assert_equal(val, 0x815F, "i:%d 0x%04X val 0x%04X != 0x815F\n", i, i, val);

View File

@ -1,5 +1,5 @@
/* Generated by gen_test_tab.php from SHIFTJIS.TXT */
static const unsigned short test_sjis_tab[] = {
static const unsigned int test_sjis_tab[] = {
0x0020, 0x0020,
0x0021, 0x0021,
0x0022, 0x0022,
@ -7038,3 +7038,22 @@ static const unsigned short test_sjis_tab[] = {
0x8150, 0xFFE3,
0x818F, 0xFFE5,
};
static const unsigned int test_sjis_tab_ind[] = {
0,
440,
440,
656,
1054,
1466,
4228,
7042,
9404,
11812,
13766,
13766,
13766,
13766,
13766,
13766,
};

View File

@ -253,6 +253,7 @@ char* testUtilBarcodeName(int symbology) {
{ BARCODE_GRIDMATRIX, "BARCODE_GRIDMATRIX", 142 },
{ BARCODE_UPNQR, "BARCODE_UPNQR", 143 },
{ BARCODE_ULTRA, "BARCODE_ULTRA", 144 },
{ BARCODE_RMQR, "BARCODE_RMQR", 145 },
};
int data_size = sizeof(data) / sizeof(struct item);
@ -365,13 +366,14 @@ int testUtilIsValidUTF8(const unsigned char str[], const size_t length) {
return state == 0;
}
char* testUtilEscape(char* buffer, char* escaped, int escaped_size)
char* testUtilEscape(char* buffer, int length, char* escaped, int escaped_size)
{
int i;
unsigned char* b = buffer;
int non_utf8 = !testUtilIsValidUTF8(buffer, strlen(buffer));
unsigned char* be = buffer + length;
int non_utf8 = !testUtilIsValidUTF8(buffer, length);
for (i = 0; i < escaped_size && *b; b++) {
for (i = 0; b < be && i < escaped_size; b++) {
if (non_utf8 || *b < ' ' || *b == '\177') {
if (i < escaped_size - 4) {
sprintf(escaped + i, "\\%.3o", *b);
@ -383,6 +385,12 @@ char* testUtilEscape(char* buffer, char* escaped, int escaped_size)
escaped[i + 1] = *b;
}
i += 2;
} else if (b + 1 < be && *b == 0xC2 && *(b + 1) < 0xA0) {
if (i < escaped_size - 8) {
sprintf(escaped + i, "\\%.3o\\%.3o", *b, *(b + 1));
}
i += 8;
b++;
} else {
escaped[i++] = *b;
}

View File

@ -70,7 +70,7 @@ char* testUtilBarcodeName(int symbology);
char* testUtilErrorName(int error_number);
char* testUtilInputModeName(int input_mode);
int testUtilDAFTConvert(const struct zint_symbol* symbol, char* buffer, int buffer_size);
char* testUtilEscape(char* buffer, char* escaped, int escaped_size);
char* testUtilEscape(char* buffer, int length, char* escaped, int escaped_size);
char* testUtilReadCSVField(char* buffer, char* field, int field_size);
int testUtilSymbolCmp(const struct zint_symbol* a, const struct zint_symbol* b);
struct zint_vector* testUtilVectorCpy(const struct zint_vector* in);

View File

@ -0,0 +1,3 @@
# GB18030.TXT not included as 21MB in size. It can be downloaded from
# https://haible.de/bruno/charsets/conversion-tables/GB18030.html
# The version used is libiconv-1.11/GB18030.TXT

View File

@ -4,6 +4,20 @@
libzint - the open source barcode library
Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
*/
/* To create backend/tests/test_sjis_tab.h (from backend/tests/build directory):
*
* php ../tools/gen_test_tab.php
*
* To create backend/tests/test_gb2312_tab.h;
*
* php ../tools/gen_test_tab.php -f GB2312.TXT -s gb2312_tab
*
* To create backend/tests/test_gb18030_tab.h (note that backend/tests/tools/data/GB18030.TXT
* will have to be downloaded first from https://haible.de/bruno/charsets/conversion-tables/GB18030.html
* using the version libiconv-1.11/GB18030.TXT):
*
* php ../tools/gen_test_tab.php -f GB18030.TXT -s gb18030_tab
*/
/* vim: set ts=4 sw=4 et : */
$basename = basename(__FILE__);
@ -35,7 +49,10 @@ foreach ($lines as $line) {
if ($line === '' || strncmp($line, '0x', 2) !== 0) {
continue;
}
$tab_lines[] = preg_replace_callback('/^0x([0-9A-F]{2,4})[ \t]+0x([0-9A-F]{4}).*$/', function ($matches) {
if (preg_match('/^0x([0-9A-F]{2,8})[ \t]+0x([0-9A-F]{5})/', $line)) { // Exclude U+10000..10FFFF to save space
continue;
}
$tab_lines[] = preg_replace_callback('/^0x([0-9A-F]{2,8})[ \t]+0x([0-9A-F]{4}).*$/', function ($matches) {
global $sort;
$mb = hexdec($matches[1]);
$unicode = hexdec($matches[2]);
@ -50,8 +67,20 @@ array_multisort($sort, $tab_lines);
$out = array();
$out[] = '/* Generated by ' . $basename . ' from ' . $file_name . ' */';
$out[] = 'static const unsigned short test_' . $suffix_name . '[] = {';
$out[] = 'static const unsigned int test_' . $suffix_name . '[] = {';
$out = array_merge($out, $tab_lines);
$out[] = '};';
$out[] = '';
$out[] = 'static const unsigned int test_' . $suffix_name . '_ind[] = {';
$first = 0;
foreach ($sort as $ind => $unicode) {
$div = (int)($unicode / 0x1000);
while ($div >= $first) {
$out[] = ($ind * 2) . ',';
$first++;
}
}
$out[] = '};';
file_put_contents($out_dirname . '/test_' . $suffix_name . '.h', implode("\n", $out) . "\n");

View File

@ -60,6 +60,7 @@ HEADERS += ../backend/aztec.h \
../backend/dmatrix.h \
../backend/eci.h \
../backend/font.h \
../backend/gb18030.h \
../backend/gb2312.h \
../backend/gridmtx.h \
../backend/gs1.h \
@ -90,6 +91,7 @@ SOURCES += ../backend/2of5.c \
../backend/dotcode.c \
../backend/eci.c \
../backend/emf.c \
../backend/gb18030.c \
../backend/gb2312.c \
../backend/general_field.c \
../backend/gif.c \

View File

@ -24,6 +24,7 @@ HEADERS += ../backend/aztec.h \
../backend/composite.h \
../backend/dmatrix.h \
../backend/font.h \
../backend/gb18030.h \
../backend/gb2312.h \
../backend/gridmtx.h \
../backend/gs1.h \
@ -50,6 +51,8 @@ SOURCES += ../backend/2of5.c \
../backend/common.c \
../backend/composite.c \
../backend/dmatrix.c \
../backend/gb18030.c \
../backend/gb2312.c \
../backend/gridmtx.c \
../backend/gs1.c \
../backend/imail.c \
@ -63,7 +66,6 @@ SOURCES += ../backend/2of5.c \
../backend/ps.c \
../backend/qr.c \
../backend/reedsol.c \
../backend/render.c \
../backend/rss.c \
../backend/svg.c \
../backend/telepen.c \

View File

@ -84,6 +84,7 @@ TEA_ADD_SOURCES([
../backend/dmatrix.c
../backend/dotcode.c
../backend/eci.c
../backend/gb18030.c
../backend/gb2312.c
../backend/general_field.c
../backend/gif.c

View File

@ -156,6 +156,10 @@ SOURCE=..\backend\emf.c
# End Source File
# Begin Source File
SOURCE=..\backend\gb18030.c
# End Source File
# Begin Source File
SOURCE=..\backend\gb2312.c
# End Source File
# Begin Source File

View File

@ -2276,7 +2276,7 @@ under
development, so it is recommended it should not yet be used for a production
environment. The symbology is capable of encoding characters in the GB18030
character set (up to 4-byte characters) and is also able to support the ECI
mechanism. Han Xin does not support the encoding of GS-1 data.
mechanism. Support for the encoding of GS-1 data has not yet been implemented.
The size of the symbol can be specified using the --ver= option or setting
option_2 to a value between 1 and 84 according to the following table.
@ -2383,10 +2383,6 @@ Mode | Recovery Capacity
4 | Approx 30%
--------------------------
It is not possible to select both symbol size and error correction capacity for
the same symbol. If both options are selected then the error correction
capacity selection will be ignored.
6.7 Other Barcode-Like Markings
-------------------------------
6.7.1. Facing Identification Mark (FIM)

View File

@ -319,6 +319,7 @@
<ClCompile Include="..\backend\dotcode.c" />
<ClCompile Include="..\backend\eci.c" />
<ClCompile Include="..\backend\emf.c" />
<ClCompile Include="..\backend\gb18030.c" />
<ClCompile Include="..\backend\gb2312.c" />
<ClCompile Include="..\backend\general_field.c" />
<ClCompile Include="..\backend\gif.c" />
@ -340,7 +341,6 @@
<ClCompile Include="..\backend\qr.c" />
<ClCompile Include="..\backend\raster.c" />
<ClCompile Include="..\backend\reedsol.c" />
<ClCompile Include="..\backend\render.c" />
<ClCompile Include="..\backend\rss.c" />
<ClCompile Include="..\backend\sjis.c" />
<ClCompile Include="..\backend\svg.c" />

View File

@ -152,6 +152,10 @@ SOURCE=..\..\backend\emf.c
# End Source File
# Begin Source File
SOURCE=..\..\backend\gb18030.c
# End Source File
# Begin Source File
SOURCE=..\..\backend\gb2312.c
# End Source File
# Begin Source File