2022-07-14 16:01:30 +01:00
|
|
|
/* eci.c - Extended Channel Interpretations */
|
|
|
|
/*
|
2016-08-16 12:43:41 +01:00
|
|
|
libzint - the open source barcode library
|
2023-06-12 01:25:55 +01:00
|
|
|
Copyright (C) 2009-2023 Robin Stuart <rstuart114@gmail.com>
|
2016-08-16 12:43:41 +01:00
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
|
2017-10-23 21:37:52 +02:00
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
2016-08-16 12:43:41 +01:00
|
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
2017-10-23 21:37:52 +02:00
|
|
|
documentation and/or other materials provided with the distribution.
|
2016-08-16 12:43:41 +01:00
|
|
|
3. Neither the name of the project nor the names of its contributors
|
|
|
|
may be used to endorse or promote products derived from this software
|
2017-10-23 21:37:52 +02:00
|
|
|
without specific prior written permission.
|
2016-08-16 12:43:41 +01:00
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
2017-10-23 21:37:52 +02:00
|
|
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
2016-08-16 12:43:41 +01:00
|
|
|
SUCH DAMAGE.
|
|
|
|
*/
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause */
|
2016-08-16 12:43:41 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
#include <assert.h>
|
2021-01-11 18:11:41 +00:00
|
|
|
#include "common.h"
|
|
|
|
#include "eci.h"
|
|
|
|
#include "eci_sb.h"
|
|
|
|
#include "big5.h"
|
2022-06-02 20:32:25 +01:00
|
|
|
#include "gb18030.h"
|
2021-01-11 18:11:41 +00:00
|
|
|
#include "gb2312.h"
|
2022-06-02 20:32:25 +01:00
|
|
|
#include "gbk.h"
|
2021-01-11 18:11:41 +00:00
|
|
|
#include "ksx1001.h"
|
2022-06-02 20:32:25 +01:00
|
|
|
#include "sjis.h"
|
2016-08-16 12:43:41 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Single-byte stuff */
|
2021-01-11 18:11:41 +00:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Base ISO/IEC 8859 routine to convert Unicode codepoint `u` */
|
|
|
|
static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
|
|
|
|
const unsigned char *tab_sb, int e, unsigned char *dest) {
|
|
|
|
int s;
|
|
|
|
if (u < 0xA0) {
|
|
|
|
if (u >= 0x80) { /* U+0080-9F fail */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
*dest = (unsigned char) u;
|
|
|
|
return 1;
|
2016-08-16 12:43:41 +01:00
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
if (u <= 0xFF) {
|
|
|
|
const unsigned int u2 = u - 0xA0;
|
|
|
|
if (tab_s[u2 >> 4] & ((unsigned short) 1 << (u2 & 0xF))) {
|
|
|
|
*dest = (unsigned char) u; /* Straight-thru */
|
|
|
|
return 1;
|
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
s = 0;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (tab_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (tab_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
*dest = tab_sb[m];
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Base Windows-125x routine to convert Unicode codepoint `u` */
|
|
|
|
static int u_cp125x(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
|
|
|
|
const unsigned char *tab_sb, int e, unsigned char *dest) {
|
|
|
|
int s;
|
|
|
|
if (u < 0x80) {
|
|
|
|
*dest = (unsigned char) u;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (u <= 0xFF && u >= 0xA0) {
|
|
|
|
const unsigned int u2 = u - 0xA0;
|
|
|
|
if (tab_s[u2 >> 4] & ((unsigned short) 1 << (u2 & 0xF))) {
|
|
|
|
*dest = (unsigned char) u; /* Straight-thru */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
s = 0;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (tab_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (tab_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
*dest = tab_sb[m];
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2021-01-11 18:11:41 +00:00
|
|
|
/* ECI 27 ASCII (ISO/IEC 646:1991 IRV (US)) */
|
2022-06-02 20:32:25 +01:00
|
|
|
static int u_ascii(const unsigned int u, unsigned char *dest) {
|
|
|
|
if (u < 0x80) {
|
|
|
|
*dest = (unsigned char) u;
|
2021-01-11 18:11:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* ECI 170 ASCII subset (ISO/IEC 646:1991 Invariant), excludes 12 chars that historically had national variants,
|
|
|
|
namely "#$@[\]^`{|}~" */
|
|
|
|
static int u_ascii_inv(const unsigned int u, unsigned char *dest) {
|
|
|
|
if (u == 0x7F || (u <= 'z' && u != '#' && u != '$' && u != '@' && (u <= 'Z' || u == '_' || u >= 'a'))) {
|
|
|
|
*dest = (unsigned char) u;
|
2021-01-11 18:11:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* ECI 25 UTF-16 Big Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
|
|
static int u_utf16be(const unsigned int u, unsigned char *dest) {
|
|
|
|
unsigned int u2, v;
|
|
|
|
if (u < 0x10000) {
|
|
|
|
dest[0] = (unsigned char) (u >> 8);
|
|
|
|
dest[1] = (unsigned char) u;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
u2 = u - 0x10000;
|
|
|
|
v = u2 >> 10;
|
|
|
|
dest[0] = (unsigned char) (0xD8 + (v >> 8));
|
|
|
|
dest[1] = (unsigned char) v;
|
|
|
|
v = u2 & 0x3FF;
|
|
|
|
dest[2] = (unsigned char) (0xDC + (v >> 8));
|
|
|
|
dest[3] = (unsigned char) v;
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ECI 33 UTF-16 Little Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
|
|
static int u_utf16le(const unsigned int u, unsigned char *dest) {
|
|
|
|
unsigned int u2, v;
|
|
|
|
if (u < 0x10000) {
|
|
|
|
dest[0] = (unsigned char) u;
|
|
|
|
dest[1] = (unsigned char) (u >> 8);
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
u2 = u - 0x10000;
|
|
|
|
v = u2 >> 10;
|
|
|
|
dest[0] = (unsigned char) v;
|
|
|
|
dest[1] = (unsigned char) (0xD8 + (v >> 8));
|
|
|
|
v = u2 & 0x3FF;
|
|
|
|
dest[2] = (unsigned char) v;
|
|
|
|
dest[3] = (unsigned char) (0xDC + (v >> 8));
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ECI 34 UTF-32 Big Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
|
|
static int u_utf32be(const unsigned int u, unsigned char *dest) {
|
|
|
|
dest[0] = 0;
|
|
|
|
dest[1] = (unsigned char) (u >> 16);
|
|
|
|
dest[2] = (unsigned char) (u >> 8);
|
|
|
|
dest[3] = (unsigned char) u;
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ECI 35 UTF-32 Little Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
|
|
static int u_utf32le(const unsigned int u, unsigned char *dest) {
|
|
|
|
dest[0] = (unsigned char) u;
|
|
|
|
dest[1] = (unsigned char) (u >> 8);
|
|
|
|
dest[2] = (unsigned char) (u >> 16);
|
|
|
|
dest[3] = 0;
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Multibyte stuff */
|
2016-08-16 12:43:41 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Acknowledgements to Bruno Haible <bruno@clisp.org> for a no. of techniques used here */
|
|
|
|
|
|
|
|
/* Helper to lookup Unicode codepoint `u` in the URO (Unified Repertoire and Ordering) block (U+4E00-9FFF) */
|
|
|
|
static int eci_u_lookup_uro_int(const unsigned int u, const unsigned short *tab_u, const unsigned short *tab_mb_ind,
|
|
|
|
const unsigned short *tab_mb, unsigned int *d) {
|
|
|
|
unsigned int u2 = (u - 0x4E00) >> 4; /* Blocks of 16 */
|
|
|
|
unsigned int v = (unsigned int) 1 << (u & 0xF);
|
|
|
|
if ((tab_u[u2] & v) == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
v = tab_u[u2] & (v - 1); /* Mask to bits prior to this one */
|
|
|
|
/* Count bits set (http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel) */
|
|
|
|
v = v - ((v >> 1) & 0x55555555);
|
|
|
|
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
|
|
|
|
v = (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
|
|
|
*d = tab_mb[tab_mb_ind[u2] + v];
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Version of `eci_u_lookup_uro_int()` taking unsigned char destination */
|
|
|
|
static int eci_u_lookup_uro(const unsigned int u, const unsigned short *tab_u, const unsigned short *tab_mb_ind,
|
|
|
|
const unsigned short *tab_mb, unsigned char *dest) {
|
|
|
|
unsigned int d;
|
|
|
|
int ret = eci_u_lookup_uro_int(u, tab_u, tab_mb_ind, tab_mb, &d);
|
|
|
|
if (ret) {
|
|
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
|
|
dest[1] = (unsigned char) d;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ECI 20 Shift JIS */
|
|
|
|
static int u_sjis_int(const unsigned int u, unsigned int *d) {
|
|
|
|
unsigned int u2, dv, md;
|
|
|
|
int s, e;
|
|
|
|
|
|
|
|
if (u < 0x80 && u != 0x5C && u != 0x7E) { /* Backslash & tilde re-mapped according to JIS X 0201 Roman */
|
|
|
|
*d = u;
|
2021-01-11 18:11:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Special case URO block sequential mappings (considerably lessens size of `sjis_u[]` array) */
|
|
|
|
if (u >= 0x4E00 && u <= 0xDFFF) { /* 0xE000 next used value >= 0x4E00 */
|
|
|
|
if (u >= 0x9FB0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return eci_u_lookup_uro_int(u, sjis_uro_u, sjis_uro_mb_ind, sjis_mb, d);
|
|
|
|
}
|
|
|
|
/* PUA to user-defined (Table 4-86, Lunde, 2nd ed.) */
|
|
|
|
if (u >= 0xE000 && u <= 0xE757) {
|
|
|
|
u2 = u - 0xE000;
|
|
|
|
dv = u2 / 188;
|
|
|
|
md = u2 - dv * 188;
|
|
|
|
*d = ((dv + 0xF0) << 8) | (md + 0x40 + (md >= 0x3F));
|
2021-01-11 18:11:41 +00:00
|
|
|
return 2;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
if (u >= sjis_u[0] && u <= sjis_u[ARRAY_SIZE(sjis_u) - 1]) {
|
|
|
|
s = 0;
|
|
|
|
e = ARRAY_SIZE(sjis_u) - 1;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (sjis_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (sjis_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
*d = sjis_mb[u >= 0x4E00 ? m + 6356 : m]; /* Adjust for URO block */
|
|
|
|
return 1 + (*d > 0xFF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL int u_sjis_int_test(const unsigned int u, unsigned int *d) {
|
|
|
|
return u_sjis_int(u, d);
|
|
|
|
}
|
|
|
|
#endif
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Version of `u_sjis_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
|
|
static int u_sjis(const unsigned int u, unsigned char *dest) {
|
|
|
|
unsigned int d;
|
|
|
|
int ret = u_sjis_int(u, &d);
|
|
|
|
if (ret) {
|
|
|
|
if (ret == 1) {
|
|
|
|
dest[0] = (unsigned char) d;
|
|
|
|
} else {
|
|
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
|
|
dest[1] = (unsigned char) d;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ECI 28 Big5 Chinese (Taiwan) */
|
|
|
|
static int u_big5(const unsigned int u, unsigned char *dest) {
|
|
|
|
int s, e;
|
|
|
|
|
|
|
|
if (u < 0x80) {
|
|
|
|
*dest = (unsigned char) u;
|
2021-01-11 18:11:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Special case URO block sequential mappings (considerably lessens size of `big5_u[]` array) */
|
|
|
|
if (u >= 0x4E00 && u <= 0xFA0B) { /* 0xFA0C next used value >= 0x4E00 */
|
|
|
|
if (u >= 0x9FB0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return eci_u_lookup_uro(u, big5_uro_u, big5_uro_mb_ind, big5_mb, dest);
|
|
|
|
}
|
|
|
|
if (u >= big5_u[0] && u <= big5_u[ARRAY_SIZE(big5_u) - 1]) {
|
|
|
|
s = 0;
|
|
|
|
e = ARRAY_SIZE(big5_u) - 1;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (big5_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (big5_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
const unsigned short mb = big5_mb[u >= 0x4E00 ? m + 13061 : m]; /* Adjust for URO block */
|
|
|
|
dest[0] = (unsigned char) (mb >> 8);
|
|
|
|
dest[1] = (unsigned char) mb;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL int u_big5_test(const unsigned int u, unsigned char *dest) {
|
|
|
|
return u_big5(u, dest);
|
|
|
|
}
|
|
|
|
#endif
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* ECI 30 EUC-KR (KS X 1001, formerly KS C 5601) Korean */
|
|
|
|
static int u_ksx1001(const unsigned int u, unsigned char *dest) {
|
|
|
|
int s, e;
|
|
|
|
|
|
|
|
if (u < 0x80) {
|
|
|
|
*dest = (unsigned char) u;
|
2021-01-11 18:11:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Special case URO block sequential mappings (considerably lessens size of `ksx1001_u[]` array) */
|
|
|
|
if (u >= 0x4E00 && u <= 0xABFF) { /* 0xAC00 next used value >= 0x4E00 */
|
|
|
|
if (u >= 0x9FA0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return eci_u_lookup_uro(u, ksx1001_uro_u, ksx1001_uro_mb_ind, ksx1001_mb, dest);
|
|
|
|
}
|
|
|
|
if (u >= ksx1001_u[0] && u <= ksx1001_u[ARRAY_SIZE(ksx1001_u) - 1]) {
|
|
|
|
s = ksx1001_u_ind[(u - ksx1001_u[0]) >> 8];
|
|
|
|
e = s + 0x100 > ARRAY_SIZE(ksx1001_u) ? ARRAY_SIZE(ksx1001_u) - 1 : s + 0x100 - 1;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (ksx1001_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (ksx1001_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
const unsigned short mb = ksx1001_mb[u >= 0x4E00 ? m + 4620 : m]; /* Adjust for URO block */
|
|
|
|
dest[0] = (unsigned char) (mb >> 8);
|
|
|
|
dest[1] = (unsigned char) mb;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL int u_ksx1001_test(const unsigned int u, unsigned char *dest) {
|
|
|
|
return u_ksx1001(u, dest);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* ECI 29 GB 2312 Chinese (PRC) */
|
|
|
|
static int u_gb2312_int(const unsigned int u, unsigned int *d) {
|
|
|
|
int s, e;
|
|
|
|
|
|
|
|
if (u < 0x80) {
|
|
|
|
*d = u;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* Special case URO block sequential mappings (considerably lessens size of `gb2312_u[]` array) */
|
|
|
|
if (u >= 0x4E00 && u <= 0x9E1E) { /* 0x9E1F next used value >= 0x4E00 */
|
|
|
|
if (u >= 0x9CF0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return eci_u_lookup_uro_int(u, gb2312_uro_u, gb2312_uro_mb_ind, gb2312_mb, d);
|
|
|
|
}
|
|
|
|
if (u >= gb2312_u[0] && u <= gb2312_u[ARRAY_SIZE(gb2312_u) - 1]) {
|
|
|
|
s = gb2312_u_ind[(u - gb2312_u[0]) >> 8];
|
|
|
|
e = s + 0x100 > ARRAY_SIZE(gb2312_u) ? ARRAY_SIZE(gb2312_u) - 1 : s + 0x100 - 1;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (gb2312_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (gb2312_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
*d = gb2312_mb[u > 0x4E00 ? m + 6627 : m]; /* Adjust for URO block */
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL int u_gb2312_int_test(const unsigned int u, unsigned int *d) {
|
|
|
|
return u_gb2312_int(u, d);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Version of `u_gb2312_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
|
|
static int u_gb2312(const unsigned int u, unsigned char *dest) {
|
|
|
|
unsigned int d;
|
|
|
|
int ret = u_gb2312_int(u, &d);
|
|
|
|
if (ret) {
|
|
|
|
if (ret == 1) {
|
|
|
|
dest[0] = (unsigned char) d;
|
|
|
|
} else {
|
|
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
|
|
dest[1] = (unsigned char) d;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-04-10 11:12:18 +01:00
|
|
|
/* ECI 31 GBK Chinese */
|
2022-06-02 20:32:25 +01:00
|
|
|
static int u_gbk_int(const unsigned int u, unsigned int *d) {
|
|
|
|
int s, e;
|
2022-04-10 11:12:18 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
if (u < 0x80) {
|
|
|
|
*d = u;
|
2022-04-10 11:12:18 +01:00
|
|
|
return 1;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
/* Check GB 2312 first */
|
|
|
|
if (u == 0x30FB) {
|
|
|
|
/* KATAKANA MIDDLE DOT, mapped by GB 2312 but not by GBK (U+00B7 MIDDLE DOT mapped to 0xA1A4 instead) */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (u == 0x2015) {
|
|
|
|
/* HORIZONTAL BAR, mapped to 0xA844 by GBK rather than 0xA1AA (U+2014 EM DASH mapped there instead) */
|
|
|
|
*d = 0xA844;
|
2022-04-10 11:12:18 +01:00
|
|
|
return 2;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
if (u_gb2312_int(u, d)) { /* Includes the 2 GB 6345.1-86 corrections given in Table 3-22, Lunde, 2nd ed. */
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Special case URO block sequential mappings (considerably lessens size of `gbk_u[]` array) */
|
|
|
|
if (u >= 0x4E00 && u <= 0xF92B) { /* 0xF92C next used value >= 0x4E00 */
|
|
|
|
if (u >= 0x9FB0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return eci_u_lookup_uro_int(u, gbk_uro_u, gbk_uro_mb_ind, gbk_mb, d);
|
|
|
|
}
|
|
|
|
if (u >= gbk_u[0] && u <= gbk_u[ARRAY_SIZE(gbk_u) - 1]) {
|
|
|
|
s = 0;
|
|
|
|
e = ARRAY_SIZE(gbk_u) - 1;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (gbk_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (gbk_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
*d = gbk_mb[u >= 0x4E00 ? m + 14139 : m]; /* Adjust for URO block */
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-04-10 11:12:18 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL int u_gbk_int_test(const unsigned int u, unsigned int *d) {
|
|
|
|
return u_gbk_int(u, d);
|
|
|
|
}
|
|
|
|
#endif
|
2022-04-10 11:12:18 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Version of `u_gbk_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
|
|
static int u_gbk(const unsigned int u, unsigned char *dest) {
|
|
|
|
unsigned int d;
|
|
|
|
int ret = u_gbk_int(u, &d);
|
|
|
|
if (ret) {
|
|
|
|
if (ret == 1) {
|
|
|
|
dest[0] = (unsigned char) d;
|
|
|
|
} else {
|
|
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
|
|
dest[1] = (unsigned char) d;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Helper for `u_gb18030_int()` to output 4-byte sequential blocks */
|
|
|
|
static int u_gb18030_4_sequential_int(unsigned int u2, unsigned int mb_lead, unsigned int *d1, unsigned int *d2) {
|
|
|
|
unsigned int dv;
|
|
|
|
|
|
|
|
dv = u2 / 10;
|
|
|
|
*d2 = u2 - dv * 10 + 0x30;
|
|
|
|
u2 = dv;
|
|
|
|
dv = u2 / 126;
|
|
|
|
*d2 |= (u2 - dv * 126 + 0x81) << 8;
|
|
|
|
u2 = dv;
|
|
|
|
dv = u2 / 10;
|
|
|
|
*d1 = ((dv + mb_lead) << 8) | (u2 - dv * 10 + 0x30);
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ECI 32 GB 18030 Chinese - assumes valid Unicode */
|
|
|
|
static int u_gb18030_int(const unsigned int u, unsigned int *d1, unsigned int *d2) {
|
|
|
|
unsigned int u2, dv;
|
|
|
|
int s, e;
|
|
|
|
|
|
|
|
if (u < 0x80) {
|
|
|
|
*d1 = u;
|
2022-04-10 11:12:18 +01:00
|
|
|
return 1;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
/* Check GBK first */
|
|
|
|
if (u_gbk_int(u, d1)) {
|
2022-04-10 11:12:18 +01:00
|
|
|
return 2;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
if (u >= 0x10000) {
|
|
|
|
/* Non-PUA, non-BMP, see Table 3-37, Lunde, 2nd ed. */
|
|
|
|
if (u == 0x20087) {
|
|
|
|
*d1 = 0xFE51;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (u == 0x20089) {
|
|
|
|
*d1 = 0xFE52;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (u == 0x200CC) {
|
|
|
|
*d1 = 0xFE53;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (u == 0x215D7) {
|
|
|
|
*d1 = 0xFE6C;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (u == 0x2298F) {
|
|
|
|
*d1 = 0xFE76;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (u == 0x241FE) {
|
|
|
|
*d1 = 0xFE91;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
/* All other non-BMP U+10000-10FFFF */
|
|
|
|
return u_gb18030_4_sequential_int(u - 0x10000, 0x90, d1, d2);
|
|
|
|
}
|
|
|
|
if (u >= 0xE000 && u <= 0xE765) { /* PUA to user-defined */
|
|
|
|
if (u <= 0xE4C5) {
|
|
|
|
u2 = u - 0xE000;
|
|
|
|
dv = u2 / 94;
|
|
|
|
*d1 = ((dv + (dv < 6 ? 0xAA : 0xF2)) << 8) | (u2 - dv * 94 + 0xA1);
|
|
|
|
} else {
|
|
|
|
unsigned int md;
|
|
|
|
u2 = u - 0xE4C6;
|
|
|
|
dv = u2 / 96;
|
|
|
|
md = u2 - dv * 96;
|
|
|
|
*d1 = ((dv + 0xA1) << 8) | (md + 0x40 + (md >= 0x3F));
|
|
|
|
}
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (u >= gb18030_2_u[0] && u <= gb18030_2_u[ARRAY_SIZE(gb18030_2_u) - 1]) {
|
|
|
|
s = 0;
|
|
|
|
e = ARRAY_SIZE(gb18030_2_u) - 1;
|
|
|
|
while (s <= e) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (gb18030_2_u[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else if (gb18030_2_u[m] > u) {
|
|
|
|
e = m - 1;
|
|
|
|
} else {
|
|
|
|
*d1 = gb18030_2_mb[m];
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* All other BMP U+0080-FFFF */
|
|
|
|
if (u == 0xE7C7) { /* PUA change to non-PUA, see Table 3-39, Lunde, 2nd ed. */
|
|
|
|
*d1 = 0x8135;
|
|
|
|
*d2 = 0xF437;
|
2022-04-10 11:12:18 +01:00
|
|
|
return 4;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
s = 0;
|
|
|
|
e = ARRAY_SIZE(gb18030_4_u_e) - 1;
|
|
|
|
while (s < e) { /* Lower bound */
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
const int m = (s + e) >> 1;
|
2022-06-02 20:32:25 +01:00
|
|
|
if (gb18030_4_u_e[m] < u) {
|
|
|
|
s = m + 1;
|
|
|
|
} else {
|
|
|
|
e = m;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(s < ARRAY_SIZE(gb18030_4_u_e));
|
|
|
|
return u_gb18030_4_sequential_int(u - gb18030_4_mb_o[s] - 0x80, 0x81, d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL int u_gb18030_int_test(const unsigned int u, unsigned int *d1, unsigned int *d2) {
|
|
|
|
return u_gb18030_int(u, d1, d2);
|
2022-04-10 11:12:18 +01:00
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Version of `u_gb18030_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
|
|
static int u_gb18030(const unsigned int u, unsigned char *dest) {
|
|
|
|
unsigned int d1, d2;
|
|
|
|
int ret = u_gb18030_int(u, &d1, &d2);
|
|
|
|
if (ret) {
|
|
|
|
if (ret == 1) {
|
|
|
|
dest[0] = (unsigned char) d1;
|
|
|
|
} else {
|
|
|
|
dest[0] = (unsigned char) (d1 >> 8);
|
|
|
|
dest[1] = (unsigned char) d1;
|
|
|
|
if (ret == 4) {
|
|
|
|
dest[2] = (unsigned char) (d2 >> 8);
|
|
|
|
dest[3] = (unsigned char) d2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Main ECI stuff */
|
2022-04-10 11:12:18 +01:00
|
|
|
|
2021-01-11 18:11:41 +00:00
|
|
|
/* Helper to count the number of chars in a string within a range */
|
|
|
|
static int chr_range_cnt(const unsigned char string[], const int length, const unsigned char c1,
|
|
|
|
const unsigned char c2) {
|
|
|
|
int count = 0;
|
|
|
|
int i;
|
|
|
|
if (c1) {
|
|
|
|
for (i = 0; i < length; i++) {
|
|
|
|
if (string[i] >= c1 && string[i] <= c2) {
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < length; i++) {
|
|
|
|
if (string[i] <= c2) {
|
|
|
|
count++;
|
2016-08-16 12:43:41 +01:00
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Is ECI convertible from UTF-8? */
|
|
|
|
INTERNAL int is_eci_convertible(const int eci) {
|
2022-04-10 11:12:18 +01:00
|
|
|
if (eci == 26 || (eci > 35 && eci != 170)) { /* Exclude ECI 170 - ASCII Invariant */
|
|
|
|
/* UTF-8 (26) or 8-bit binary data (899) or undefined (> 35 and < 899) or not character set (> 899) */
|
2021-01-11 18:11:41 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
Add multiple segments support for AZTEC, CODEONE, DATAMATRIX, DOTCODE,
GRIDMATRIX, HANXIN, MAXICODE, MICROPDF417, PDF417, QRCODE, RMQR, ULTRA
RMQR: fix ECI encoding (wrong bit length for indicator)
MICROQR: check versions M1 and M2 for allowed characters so as to give
better error messages
DOTCODE: some small optimizations
common.c: add is_chr(), segs_length(), segs_cpy()
CODEONE/CODE128/DOTCODE/GRIDMATRIX/HANXIN/MAXICODE/QRCODE/ULTRA: add
namespace prefixes to static funcs/data
includes: use Z_ prefix, unuse double underscore prefixes (guard defines)
manual.txt: compress some tables using double/treble column sets
2022-05-09 19:50:50 +01:00
|
|
|
/* Are any of the ECIs in the segments convertible from UTF-8?
|
|
|
|
Sets `convertible[]` for each, which must be at least `seg_count` in size */
|
|
|
|
INTERNAL int is_eci_convertible_segs(const struct zint_seg segs[], const int seg_count, int convertible[]) {
|
|
|
|
int ret = 0;
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
|
|
convertible[i] = is_eci_convertible(segs[i].eci);
|
|
|
|
ret |= convertible[i];
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-01-11 18:11:41 +00:00
|
|
|
/* Calculate length required to convert UTF-8 to (double-byte) encoding */
|
|
|
|
INTERNAL int get_eci_length(const int eci, const unsigned char source[], int length) {
|
|
|
|
if (eci == 20) { /* Shift JIS */
|
|
|
|
/* Only ASCII backslash (reverse solidus) exceeds UTF-8 length */
|
|
|
|
length += chr_cnt(source, length, '\\');
|
|
|
|
|
2022-04-10 11:12:18 +01:00
|
|
|
} else if (eci == 25 || eci == 33) { /* UTF-16 */
|
2021-01-11 18:11:41 +00:00
|
|
|
/* All ASCII chars take 2 bytes */
|
|
|
|
length += chr_range_cnt(source, length, 0, 0x7F);
|
2022-04-10 11:12:18 +01:00
|
|
|
/* Surrogate pairs are 4 UTF-8 bytes long so fit */
|
2021-01-11 18:11:41 +00:00
|
|
|
|
2022-04-10 11:12:18 +01:00
|
|
|
} else if (eci == 32) { /* GB 18030 */
|
|
|
|
/* Allow for GB 18030 4 byters */
|
2021-01-11 18:11:41 +00:00
|
|
|
length *= 2;
|
2022-04-10 11:12:18 +01:00
|
|
|
|
|
|
|
} else if (eci == 34 || eci == 35) { /* UTF-32 */
|
|
|
|
/* Quadruple-up ASCII and double-up non-ASCII */
|
|
|
|
length += chr_range_cnt(source, length, 0, 0x7F) * 2 + length;
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
|
|
|
|
2022-04-10 11:12:18 +01:00
|
|
|
/* Big5, GB 2312, EUC-KR and GBK fit in UTF-8 length */
|
2021-01-11 18:11:41 +00:00
|
|
|
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
Add multiple segments support for AZTEC, CODEONE, DATAMATRIX, DOTCODE,
GRIDMATRIX, HANXIN, MAXICODE, MICROPDF417, PDF417, QRCODE, RMQR, ULTRA
RMQR: fix ECI encoding (wrong bit length for indicator)
MICROQR: check versions M1 and M2 for allowed characters so as to give
better error messages
DOTCODE: some small optimizations
common.c: add is_chr(), segs_length(), segs_cpy()
CODEONE/CODE128/DOTCODE/GRIDMATRIX/HANXIN/MAXICODE/QRCODE/ULTRA: add
namespace prefixes to static funcs/data
includes: use Z_ prefix, unuse double underscore prefixes (guard defines)
manual.txt: compress some tables using double/treble column sets
2022-05-09 19:50:50 +01:00
|
|
|
/* Call `get_eci_length()` for each segment, returning total */
|
|
|
|
INTERNAL int get_eci_length_segs(const struct zint_seg segs[], const int seg_count) {
|
|
|
|
int length = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
|
|
length += get_eci_length(segs[i].eci, segs[i].source, segs[i].length);
|
|
|
|
}
|
|
|
|
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Convert UTF-8 to other character encodings */
|
2023-06-12 01:25:55 +01:00
|
|
|
typedef int (*eci_func_t)(const unsigned int u, unsigned char *dest);
|
2021-01-11 18:11:41 +00:00
|
|
|
INTERNAL int utf8_to_eci(const int eci, const unsigned char source[], unsigned char dest[], int *p_length) {
|
|
|
|
|
2022-04-10 11:12:18 +01:00
|
|
|
static const eci_func_t eci_funcs[36] = {
|
2022-06-02 20:32:25 +01:00
|
|
|
NULL, NULL, NULL, NULL, u_iso8859_2, /*0-4*/
|
|
|
|
u_iso8859_3, u_iso8859_4, u_iso8859_5, u_iso8859_6, u_iso8859_7, /*5-9*/
|
|
|
|
u_iso8859_8, u_iso8859_9, u_iso8859_10, u_iso8859_11, NULL, /*10-14*/
|
|
|
|
u_iso8859_13, u_iso8859_14, u_iso8859_15, u_iso8859_16, NULL, /*15-19*/
|
|
|
|
u_sjis, u_cp1250, u_cp1251, u_cp1252, u_cp1256, /*20-24*/
|
|
|
|
u_utf16be, NULL, u_ascii, u_big5, u_gb2312, /*25-29*/
|
|
|
|
u_ksx1001, u_gbk, u_gb18030, u_utf16le, u_utf32be, /*30-34*/
|
|
|
|
u_utf32le,
|
2021-01-11 18:11:41 +00:00
|
|
|
};
|
|
|
|
eci_func_t eci_func;
|
2022-06-02 20:32:25 +01:00
|
|
|
unsigned int codepoint, state = 0;
|
|
|
|
int in_posn = 0;
|
|
|
|
int out_posn = 0;
|
2021-01-11 18:11:41 +00:00
|
|
|
int length = *p_length;
|
|
|
|
|
|
|
|
/* Special case ISO/IEC 8859-1 */
|
|
|
|
if (eci == 0 || eci == 3) { /* Default ECI 0 to ISO/IEC 8859-1 */
|
|
|
|
while (in_posn < length) {
|
|
|
|
do {
|
|
|
|
decode_utf8(&state, &codepoint, source[in_posn++]);
|
|
|
|
} while (in_posn < length && state != 0 && state != 12);
|
|
|
|
if (state != 0) {
|
2016-08-16 12:43:41 +01:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
if (codepoint >= 0x80 && (codepoint < 0xA0 || codepoint >= 0x100)) {
|
2016-08-16 12:43:41 +01:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
dest[out_posn++] = (unsigned char) codepoint;
|
2016-08-16 12:43:41 +01:00
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
dest[out_posn] = '\0';
|
|
|
|
*p_length = out_posn;
|
|
|
|
return 0;
|
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2021-01-11 18:11:41 +00:00
|
|
|
if (eci == 170) { /* ASCII Invariant (archaic subset) */
|
2022-06-02 20:32:25 +01:00
|
|
|
eci_func = u_ascii_inv;
|
2021-01-11 18:11:41 +00:00
|
|
|
} else {
|
|
|
|
eci_func = eci_funcs[eci];
|
|
|
|
if (eci_func == NULL) {
|
2016-08-16 12:43:41 +01:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2021-01-11 18:11:41 +00:00
|
|
|
}
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2021-01-11 18:11:41 +00:00
|
|
|
while (in_posn < length) {
|
|
|
|
int incr;
|
|
|
|
do {
|
|
|
|
decode_utf8(&state, &codepoint, source[in_posn++]);
|
|
|
|
} while (in_posn < length && state != 0 && state != 12);
|
|
|
|
if (state != 0) {
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
2016-08-16 12:43:41 +01:00
|
|
|
}
|
2022-06-02 20:32:25 +01:00
|
|
|
incr = (*eci_func)(codepoint, dest + out_posn);
|
2021-01-11 18:11:41 +00:00
|
|
|
if (incr == 0) {
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
|
|
|
out_posn += incr;
|
|
|
|
}
|
2016-08-16 12:43:41 +01:00
|
|
|
dest[out_posn] = '\0';
|
2021-01-11 18:11:41 +00:00
|
|
|
*p_length = out_posn;
|
2016-08-16 12:43:41 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-01-11 18:11:41 +00:00
|
|
|
/* Find the lowest single-byte ECI mode which will encode a given set of Unicode text */
|
|
|
|
INTERNAL int get_best_eci(const unsigned char source[], int length) {
|
2016-08-16 12:43:41 +01:00
|
|
|
int eci = 3;
|
2021-01-11 18:11:41 +00:00
|
|
|
/* Note: attempting single-byte conversions only, so get_eci_length() unnecessary */
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned char *local_source = (unsigned char *) z_alloca(length + 1);
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2016-08-16 12:43:41 +01:00
|
|
|
do {
|
2021-01-11 18:11:41 +00:00
|
|
|
if (eci == 14) { /* Reserved */
|
|
|
|
eci = 15;
|
|
|
|
} else if (eci == 19) { /* Reserved */
|
|
|
|
eci = 21; /* Skip 20 Shift JIS */
|
|
|
|
}
|
|
|
|
if (utf8_to_eci(eci, source, local_source, &length) == 0) {
|
2016-08-16 12:43:41 +01:00
|
|
|
return eci;
|
2016-09-05 22:06:50 +01:00
|
|
|
}
|
2016-08-16 12:43:41 +01:00
|
|
|
eci++;
|
|
|
|
} while (eci < 25);
|
2016-09-05 22:06:50 +01:00
|
|
|
|
2021-03-21 17:35:52 +00:00
|
|
|
if (!is_valid_utf8(source, length)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-07-14 16:01:30 +01:00
|
|
|
return 26; /* If all of these fail, use UTF-8! */
|
2016-08-16 12:43:41 +01:00
|
|
|
}
|
2022-04-10 11:12:18 +01:00
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* Call `get_best_eci()` for each segment. Returns 0 on failure, first ECI set on success */
|
Add multiple segments support for AZTEC, CODEONE, DATAMATRIX, DOTCODE,
GRIDMATRIX, HANXIN, MAXICODE, MICROPDF417, PDF417, QRCODE, RMQR, ULTRA
RMQR: fix ECI encoding (wrong bit length for indicator)
MICROQR: check versions M1 and M2 for allowed characters so as to give
better error messages
DOTCODE: some small optimizations
common.c: add is_chr(), segs_length(), segs_cpy()
CODEONE/CODE128/DOTCODE/GRIDMATRIX/HANXIN/MAXICODE/QRCODE/ULTRA: add
namespace prefixes to static funcs/data
includes: use Z_ prefix, unuse double underscore prefixes (guard defines)
manual.txt: compress some tables using double/treble column sets
2022-05-09 19:50:50 +01:00
|
|
|
INTERNAL int get_best_eci_segs(struct zint_symbol *symbol, struct zint_seg segs[], const int seg_count) {
|
2022-05-19 10:17:51 +01:00
|
|
|
const int default_eci = symbol->symbology == BARCODE_GRIDMATRIX ? 29 : symbol->symbology == BARCODE_UPNQR ? 4 : 3;
|
Add multiple segments support for AZTEC, CODEONE, DATAMATRIX, DOTCODE,
GRIDMATRIX, HANXIN, MAXICODE, MICROPDF417, PDF417, QRCODE, RMQR, ULTRA
RMQR: fix ECI encoding (wrong bit length for indicator)
MICROQR: check versions M1 and M2 for allowed characters so as to give
better error messages
DOTCODE: some small optimizations
common.c: add is_chr(), segs_length(), segs_cpy()
CODEONE/CODE128/DOTCODE/GRIDMATRIX/HANXIN/MAXICODE/QRCODE/ULTRA: add
namespace prefixes to static funcs/data
includes: use Z_ prefix, unuse double underscore prefixes (guard defines)
manual.txt: compress some tables using double/treble column sets
2022-05-09 19:50:50 +01:00
|
|
|
int first_eci_set = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
|
|
if (segs[i].eci == 0) {
|
|
|
|
int eci = get_best_eci(segs[i].source, segs[i].length);
|
|
|
|
if (eci == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
2022-05-19 10:17:51 +01:00
|
|
|
if (eci == default_eci) {
|
|
|
|
if (i != 0 && segs[i - 1].eci != 0 && segs[i - 1].eci != default_eci) {
|
Add multiple segments support for AZTEC, CODEONE, DATAMATRIX, DOTCODE,
GRIDMATRIX, HANXIN, MAXICODE, MICROPDF417, PDF417, QRCODE, RMQR, ULTRA
RMQR: fix ECI encoding (wrong bit length for indicator)
MICROQR: check versions M1 and M2 for allowed characters so as to give
better error messages
DOTCODE: some small optimizations
common.c: add is_chr(), segs_length(), segs_cpy()
CODEONE/CODE128/DOTCODE/GRIDMATRIX/HANXIN/MAXICODE/QRCODE/ULTRA: add
namespace prefixes to static funcs/data
includes: use Z_ prefix, unuse double underscore prefixes (guard defines)
manual.txt: compress some tables using double/treble column sets
2022-05-09 19:50:50 +01:00
|
|
|
segs[i].eci = eci;
|
|
|
|
if (first_eci_set == 0) {
|
|
|
|
first_eci_set = eci;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
segs[i].eci = eci;
|
|
|
|
if (first_eci_set == 0) {
|
|
|
|
first_eci_set = eci;
|
|
|
|
if (i == 0) {
|
|
|
|
symbol->eci = eci;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return first_eci_set;
|
|
|
|
}
|
|
|
|
|
2022-06-02 20:32:25 +01:00
|
|
|
/* QRCODE Shift JIS helpers */
|
|
|
|
|
|
|
|
/* Convert UTF-8 string to Shift JIS and place in array of ints */
|
|
|
|
INTERNAL int sjis_utf8(struct zint_symbol *symbol, const unsigned char source[], int *p_length,
|
|
|
|
unsigned int *ddata) {
|
|
|
|
int error_number;
|
|
|
|
unsigned int i, length;
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned int *utfdata = (unsigned int *) z_alloca(sizeof(unsigned int) * (*p_length + 1));
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
|
|
|
|
if (error_number != 0) {
|
|
|
|
return error_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
|
|
if (!u_sjis_int(utfdata[i], ddata + i)) {
|
|
|
|
strcpy(symbol->errtxt, "800: Invalid character in input data");
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If `full_multibyte` set, copy byte input stream to array of ints, putting double-bytes that match QR Kanji mode in
|
|
|
|
* a single entry. If `full_multibyte` not set, do a straight copy */
|
|
|
|
INTERNAL void sjis_cpy(const unsigned char source[], int *p_length, unsigned int *ddata, const int full_multibyte) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
unsigned int i, j, length;
|
|
|
|
unsigned char c1, c2;
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
if (full_multibyte) {
|
|
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
c1 = source[i];
|
|
|
|
/* Now using stricter interpretation of standard, and excluding certain trailing bytes */
|
|
|
|
if (((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEB)) && length - i >= 2) {
|
|
|
|
c2 = source[i + 1];
|
|
|
|
if ((c2 >= 0x40 && c2 <= 0xFC) && c2 != 0x7F && (c1 != 0xEB || c2 <= 0xBF)) {
|
2022-06-02 20:32:25 +01:00
|
|
|
/* This may or may not be valid Shift JIS, but don't care as long as it can be encoded in
|
|
|
|
* QR Kanji mode */
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
ddata[j] = (c1 << 8) | c2;
|
2022-06-02 20:32:25 +01:00
|
|
|
i++;
|
|
|
|
} else {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
ddata[j] = c1;
|
2022-06-02 20:32:25 +01:00
|
|
|
}
|
|
|
|
} else {
|
RMQR: update to ISO/IEC 23941:2022 - R13x77 numeric cclens change 8 -> 7
QRCODE: use stricter interpretation of ZINT_FULL_MULTIBYTE, excluding
certain trailing bytes
libzint: fix some confusing error messages introduced by segment stuff
general: new escape chars \U, \d and \o
backend_qt: fudge rendering of border rectangles due to scaling/translation
rounding errors TODO: better fudge
GUI: foreground/background colours -> text boxes and icon buttons, add swap
button, independently movable picker (NULL parent), preview colour changes,
preview Data Window changes, add clear data (del) buttons, add zap button
and Factory Reset menu option, various other fixes
libzint: remove STATIC_UNLESS_ZINT_TEST, use wrappers
CMake: add find package QtSvg, remove QtXml
manual: split symbology and general specs and sort, move DAFT to 4-state,
UPC/EAN -> EAN/UPC, DataBar -> GS1 DataBar always, expand MAILMARK info,
various other fiddlings
man page: options or -> |, expand MSI Plessey check digit options
README.linux: add packages info
license: add SPDX-License-Identifier to touched files
2022-06-09 21:52:02 +01:00
|
|
|
ddata[j] = c1;
|
2022-06-02 20:32:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
*p_length = j;
|
|
|
|
} else {
|
|
|
|
/* Straight copy */
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
|
|
ddata[i] = source[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Call `sjis_cpy()` for each segment */
|
|
|
|
INTERNAL void sjis_cpy_segs(struct zint_seg segs[], const int seg_count, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
int i;
|
|
|
|
unsigned int *dd = ddata;
|
|
|
|
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
|
|
sjis_cpy(segs[i].source, &segs[i].length, dd, full_multibyte);
|
|
|
|
dd += segs[i].length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert UTF-8 string to ECI and place in array of ints using `sjis_cpy()` */
|
|
|
|
INTERNAL int sjis_utf8_to_eci(const int eci, const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
|
|
|
|
if (is_eci_convertible(eci)) {
|
|
|
|
int error_number;
|
|
|
|
const int eci_length = get_eci_length(eci, source, *p_length);
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned char *converted = (unsigned char *) z_alloca(eci_length + 1);
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
error_number = utf8_to_eci(eci, source, converted, p_length);
|
|
|
|
if (error_number != 0) {
|
|
|
|
/* Note not setting `symbol->errtxt`, up to caller */
|
|
|
|
return error_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
sjis_cpy(converted, p_length, ddata, full_multibyte || eci == 20);
|
|
|
|
} else {
|
|
|
|
sjis_cpy(source, p_length, ddata, full_multibyte);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* GRIDMATRIX GB 2312 helpers */
|
|
|
|
|
|
|
|
/* Convert UTF-8 string to GB 2312 (EUC-CN) and place in array of ints */
|
|
|
|
INTERNAL int gb2312_utf8(struct zint_symbol *symbol, const unsigned char source[], int *p_length,
|
|
|
|
unsigned int *ddata) {
|
|
|
|
int error_number;
|
|
|
|
unsigned int i, length;
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned int *utfdata = (unsigned int *) z_alloca(sizeof(unsigned int) * (*p_length + 1));
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
|
|
|
|
if (error_number != 0) {
|
|
|
|
return error_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
|
|
if (utfdata[i] < 0x80) {
|
|
|
|
ddata[i] = utfdata[i];
|
|
|
|
} else {
|
|
|
|
if (!u_gb2312_int(utfdata[i], ddata + i)) {
|
|
|
|
strcpy(symbol->errtxt, "810: Invalid character in input data");
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If `full_multibyte` set, copy byte input stream to array of ints, putting double-bytes that match GRIDMATRIX
|
|
|
|
* Chinese mode in a single entry. If `full_multibyte` not set, do a straight copy */
|
|
|
|
static void gb2312_cpy(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
unsigned int i, j, length;
|
|
|
|
unsigned char c1, c2;
|
|
|
|
|
|
|
|
if (full_multibyte) {
|
|
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
|
|
if (length - i >= 2) {
|
|
|
|
c1 = source[i];
|
|
|
|
c2 = source[i + 1];
|
|
|
|
if (((c1 >= 0xA1 && c1 <= 0xA9) || (c1 >= 0xB0 && c1 <= 0xF7)) && c2 >= 0xA1 && c2 <= 0xFE) {
|
|
|
|
/* This may or may not be valid GB 2312 (EUC-CN), but don't care as long as it can be encoded in
|
|
|
|
* GRIDMATRIX Chinese mode */
|
|
|
|
ddata[j] = (c1 << 8) | c2;
|
|
|
|
i++;
|
|
|
|
} else {
|
|
|
|
ddata[j] = c1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ddata[j] = source[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*p_length = j;
|
|
|
|
} else {
|
|
|
|
/* Straight copy */
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
|
|
ddata[i] = source[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL void gb2312_cpy_test(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
gb2312_cpy(source, p_length, ddata, full_multibyte);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Call `gb2312_cpy()` for each segment */
|
|
|
|
INTERNAL void gb2312_cpy_segs(struct zint_seg segs[], const int seg_count, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
int i;
|
|
|
|
unsigned int *dd = ddata;
|
|
|
|
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
|
|
gb2312_cpy(segs[i].source, &segs[i].length, dd, full_multibyte);
|
|
|
|
dd += segs[i].length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert UTF-8 string to ECI and place in array of ints using `gb2312_cpy()` */
|
|
|
|
INTERNAL int gb2312_utf8_to_eci(const int eci, const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
|
|
|
|
if (is_eci_convertible(eci)) {
|
|
|
|
int error_number;
|
|
|
|
const int eci_length = get_eci_length(eci, source, *p_length);
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned char *converted = (unsigned char *) z_alloca(eci_length + 1);
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
error_number = utf8_to_eci(eci, source, converted, p_length);
|
|
|
|
if (error_number != 0) {
|
|
|
|
/* Note not setting `symbol->errtxt`, up to caller */
|
|
|
|
return error_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
gb2312_cpy(converted, p_length, ddata, full_multibyte || eci == 29);
|
|
|
|
} else {
|
|
|
|
gb2312_cpy(source, p_length, ddata, full_multibyte);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* HANXIN GB 18030 helpers */
|
|
|
|
|
|
|
|
/* Convert UTF-8 string to GB 18030 and place in array of ints */
|
|
|
|
INTERNAL int gb18030_utf8(struct zint_symbol *symbol, const unsigned char source[], int *p_length,
|
|
|
|
unsigned int *ddata) {
|
|
|
|
int error_number, ret;
|
|
|
|
unsigned int i, j, length;
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned int *utfdata = (unsigned int *) z_alloca(sizeof(unsigned int) * (*p_length + 1));
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 0 /*disallow_4byte*/);
|
|
|
|
if (error_number != 0) {
|
|
|
|
return error_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
|
|
if (utfdata[i] < 0x80) {
|
|
|
|
ddata[j] = utfdata[i];
|
|
|
|
} else {
|
|
|
|
ret = u_gb18030_int(utfdata[i], ddata + j, ddata + j + 1);
|
|
|
|
if (ret == 0) { /* Should never happen, as GB 18030 is a UTF i.e. maps all Unicode codepoints */
|
|
|
|
strcpy(symbol->errtxt, "820: Invalid character in input data"); /* Not reached */
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
|
|
|
if (ret == 4) {
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*p_length = j;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If `full_multibyte` set, copy byte input stream to array of ints, putting double-bytes that match HANXIN
|
|
|
|
* Chinese mode in single entry, and quad-bytes in 2 entries. If `full_multibyte` not set, do a straight copy */
|
|
|
|
static void gb18030_cpy(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
unsigned int i, j, length;
|
|
|
|
int done;
|
|
|
|
unsigned char c1, c2, c3, c4;
|
|
|
|
|
|
|
|
if (full_multibyte) {
|
|
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
|
|
done = 0;
|
|
|
|
c1 = source[i];
|
|
|
|
if (length - i >= 2) {
|
|
|
|
if (c1 >= 0x81 && c1 <= 0xFE) {
|
|
|
|
c2 = source[i + 1];
|
|
|
|
if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0x80 && c2 <= 0xFE)) {
|
|
|
|
ddata[j] = (c1 << 8) | c2;
|
|
|
|
i++;
|
|
|
|
done = 1;
|
|
|
|
} else if (length - i >= 4 && (c2 >= 0x30 && c2 <= 0x39)) {
|
|
|
|
c3 = source[i + 2];
|
|
|
|
c4 = source[i + 3];
|
|
|
|
if ((c3 >= 0x81 && c3 <= 0xFE) && (c4 >= 0x30 && c4 <= 0x39)) {
|
|
|
|
ddata[j++] = (c1 << 8) | c2;
|
|
|
|
ddata[j] = (c3 << 8) | c4;
|
|
|
|
i += 3;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!done) {
|
|
|
|
ddata[j] = c1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*p_length = j;
|
|
|
|
} else {
|
|
|
|
/* Straight copy */
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
|
|
ddata[i] = source[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
|
|
INTERNAL void gb18030_cpy_test(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
gb18030_cpy(source, p_length, ddata, full_multibyte);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Call `gb18030_cpy()` for each segment */
|
|
|
|
INTERNAL void gb18030_cpy_segs(struct zint_seg segs[], const int seg_count, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
int i;
|
|
|
|
unsigned int *dd = ddata;
|
|
|
|
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
|
|
gb18030_cpy(segs[i].source, &segs[i].length, dd, full_multibyte);
|
|
|
|
dd += segs[i].length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert UTF-8 string to ECI and place in array of ints using `gb18030_cpy()` */
|
|
|
|
INTERNAL int gb18030_utf8_to_eci(const int eci, const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
|
|
const int full_multibyte) {
|
|
|
|
|
|
|
|
if (is_eci_convertible(eci)) {
|
|
|
|
int error_number;
|
|
|
|
const int eci_length = get_eci_length(eci, source, *p_length);
|
2022-07-14 16:01:30 +01:00
|
|
|
unsigned char *converted = (unsigned char *) z_alloca(eci_length + 1);
|
2022-06-02 20:32:25 +01:00
|
|
|
|
|
|
|
error_number = utf8_to_eci(eci, source, converted, p_length);
|
|
|
|
if (error_number != 0) {
|
|
|
|
/* Note not setting `symbol->errtxt`, up to caller */
|
|
|
|
return error_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* GB 18030 (ECI 32) superset of GB 2312 (ECI 29) and GBK (ECI 31) */
|
|
|
|
gb18030_cpy(converted, p_length, ddata, full_multibyte || eci == 32 || eci == 29 || eci == 31);
|
|
|
|
} else {
|
|
|
|
gb18030_cpy(source, p_length, ddata, full_multibyte);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-04-10 11:12:18 +01:00
|
|
|
/* vim: set ts=4 sw=4 et : */
|