mirror of
https://github.com/zint/zint
synced 2024-11-16 20:57:25 +13:00
5e2044ff2e
encodation in certain cases (and no pessimizations found so far), props lyngklip (BWIPP); fix extended char latching when exactly 3 extended chars at end; count code set C (not digits) in loop deciding when to shift/latch to extended for better estimate AZTEC: return warning if ECC < 5% (due to bit-stuffing when version given); return error if > 22 layers (Zint 26) for Reader Initialisation symbol requested for better error message AZTEC/HANXIN/QRCODE: consolidate different ECC data size tables into one indexed by ECC DBAR_EXP: check for reduced length <= 77 up front for better error message HANXIN: use `malloc()` rather than `z_alloca()` for large binary array QRCODE: `ecc_level` now 0-based (not 1-based) MICROQR: consolidate different version end routines into one `microqr_end()` and use new `microqr_data` table to simplify code MICROPDF417: use table for max codewords per column library: centralize all error messages using new `errtxt()`, `errtxtf()`, `errtxt_adj()` funcs that protect `symbol->errtxt` from overflow, & try to make error messages more consistent thru-out, adding more feedback info to many, & use positional args "%n$" in prep for l10n (maybe); `is_sane/is_sane_lookup()` -> `not_sane/not_sane_lookup()`, returning 1-based position (zero on failure) instead of bool; `long` ints -> plain `int` (except those dealing with `ftell()`, `fread()` etc) as depend on int being 32-bits already GUI: in "grpDATF.ui" use "PlainText" rather than "RichText" for tracker ratio examples as height of text messing up sometimes manual: clarify Codablock-F length maximum & add examples docs: README: pandoc 3.5, Ubuntu 24.04 CMake: use "-Wpedantic" for Clang only as GNU complains about `errtxtf()` positional args "%n$"
1135 lines
37 KiB
C
1135 lines
37 KiB
C
/* eci.c - Extended Channel Interpretations */
|
|
/*
|
|
libzint - the open source barcode library
|
|
Copyright (C) 2009-2024 Robin Stuart <rstuart114@gmail.com>
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
3. Neither the name of the project nor the names of its contributors
|
|
may be used to endorse or promote products derived from this software
|
|
without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
SUCH DAMAGE.
|
|
*/
|
|
/* SPDX-License-Identifier: BSD-3-Clause */
|
|
|
|
#include <assert.h>
|
|
#include "common.h"
|
|
#include "eci.h"
|
|
#include "eci_sb.h"
|
|
#include "big5.h"
|
|
#include "gb18030.h"
|
|
#include "gb2312.h"
|
|
#include "gbk.h"
|
|
#include "ksx1001.h"
|
|
#include "sjis.h"
|
|
|
|
/* Single-byte stuff */
|
|
|
|
/* Base ISO/IEC 8859 routine to convert Unicode codepoint `u` */
|
|
static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
|
|
const unsigned char *tab_sb, int e, unsigned char *dest) {
|
|
int s;
|
|
if (u < 0xA0) {
|
|
if (u >= 0x80) { /* U+0080-9F fail */
|
|
return 0;
|
|
}
|
|
*dest = (unsigned char) u;
|
|
return 1;
|
|
}
|
|
if (u <= 0xFF) {
|
|
const unsigned int u2 = u - 0xA0;
|
|
if (tab_s[u2 >> 4] & ((unsigned short) 1 << (u2 & 0xF))) {
|
|
*dest = (unsigned char) u; /* Straight-thru */
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
s = 0;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (tab_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (tab_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
*dest = tab_sb[m];
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Base Windows-125x routine to convert Unicode codepoint `u` */
|
|
static int u_cp125x(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
|
|
const unsigned char *tab_sb, int e, unsigned char *dest) {
|
|
int s;
|
|
if (u < 0x80) {
|
|
*dest = (unsigned char) u;
|
|
return 1;
|
|
}
|
|
if (u <= 0xFF && u >= 0xA0) {
|
|
const unsigned int u2 = u - 0xA0;
|
|
if (tab_s[u2 >> 4] & ((unsigned short) 1 << (u2 & 0xF))) {
|
|
*dest = (unsigned char) u; /* Straight-thru */
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
s = 0;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (tab_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (tab_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
*dest = tab_sb[m];
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* ECI 27 ASCII (ISO/IEC 646:1991 IRV (US)) */
|
|
static int u_ascii(const unsigned int u, unsigned char *dest) {
|
|
if (u < 0x80) {
|
|
*dest = (unsigned char) u;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* ECI 170 ASCII subset (ISO/IEC 646:1991 Invariant), excludes 12 chars that historically had national variants,
|
|
namely "#$@[\]^`{|}~" */
|
|
static int u_ascii_inv(const unsigned int u, unsigned char *dest) {
|
|
if (u == 0x7F || (u <= 'z' && u != '#' && u != '$' && u != '@' && (u <= 'Z' || u == '_' || u >= 'a'))) {
|
|
*dest = (unsigned char) u;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* ECI 25 UTF-16 Big Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
static int u_utf16be(const unsigned int u, unsigned char *dest) {
|
|
unsigned int u2, v;
|
|
if (u < 0x10000) {
|
|
dest[0] = (unsigned char) (u >> 8);
|
|
dest[1] = (unsigned char) u;
|
|
return 2;
|
|
}
|
|
u2 = u - 0x10000;
|
|
v = u2 >> 10;
|
|
dest[0] = (unsigned char) (0xD8 + (v >> 8));
|
|
dest[1] = (unsigned char) v;
|
|
v = u2 & 0x3FF;
|
|
dest[2] = (unsigned char) (0xDC + (v >> 8));
|
|
dest[3] = (unsigned char) v;
|
|
return 4;
|
|
}
|
|
|
|
/* ECI 33 UTF-16 Little Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
static int u_utf16le(const unsigned int u, unsigned char *dest) {
|
|
unsigned int u2, v;
|
|
if (u < 0x10000) {
|
|
dest[0] = (unsigned char) u;
|
|
dest[1] = (unsigned char) (u >> 8);
|
|
return 2;
|
|
}
|
|
u2 = u - 0x10000;
|
|
v = u2 >> 10;
|
|
dest[0] = (unsigned char) v;
|
|
dest[1] = (unsigned char) (0xD8 + (v >> 8));
|
|
v = u2 & 0x3FF;
|
|
dest[2] = (unsigned char) v;
|
|
dest[3] = (unsigned char) (0xDC + (v >> 8));
|
|
return 4;
|
|
}
|
|
|
|
/* ECI 34 UTF-32 Big Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
static int u_utf32be(const unsigned int u, unsigned char *dest) {
|
|
dest[0] = 0;
|
|
dest[1] = (unsigned char) (u >> 16);
|
|
dest[2] = (unsigned char) (u >> 8);
|
|
dest[3] = (unsigned char) u;
|
|
return 4;
|
|
}
|
|
|
|
/* ECI 35 UTF-32 Little Endian (ISO/IEC 10646) - assumes valid Unicode */
|
|
static int u_utf32le(const unsigned int u, unsigned char *dest) {
|
|
dest[0] = (unsigned char) u;
|
|
dest[1] = (unsigned char) (u >> 8);
|
|
dest[2] = (unsigned char) (u >> 16);
|
|
dest[3] = 0;
|
|
return 4;
|
|
}
|
|
|
|
/* Multibyte stuff */
|
|
|
|
/* Acknowledgements to Bruno Haible <bruno@clisp.org> for a no. of techniques used here */
|
|
|
|
/* Helper to lookup Unicode codepoint `u` in the URO (Unified Repertoire and Ordering) block (U+4E00-9FFF) */
|
|
static int eci_u_lookup_uro_int(const unsigned int u, const unsigned short *tab_u, const unsigned short *tab_mb_ind,
|
|
const unsigned short *tab_mb, unsigned int *d) {
|
|
unsigned int u2 = (u - 0x4E00) >> 4; /* Blocks of 16 */
|
|
unsigned int v = (unsigned int) 1 << (u & 0xF);
|
|
if ((tab_u[u2] & v) == 0) {
|
|
return 0;
|
|
}
|
|
v = tab_u[u2] & (v - 1); /* Mask to bits prior to this one */
|
|
/* Count bits set (http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel) */
|
|
v = v - ((v >> 1) & 0x55555555);
|
|
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
|
|
v = (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
|
*d = tab_mb[tab_mb_ind[u2] + v];
|
|
return 2;
|
|
}
|
|
|
|
/* Version of `eci_u_lookup_uro_int()` taking unsigned char destination */
|
|
static int eci_u_lookup_uro(const unsigned int u, const unsigned short *tab_u, const unsigned short *tab_mb_ind,
|
|
const unsigned short *tab_mb, unsigned char *dest) {
|
|
unsigned int d;
|
|
int ret = eci_u_lookup_uro_int(u, tab_u, tab_mb_ind, tab_mb, &d);
|
|
if (ret) {
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
dest[1] = (unsigned char) d;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* ECI 20 Shift JIS */
|
|
static int u_sjis_int(const unsigned int u, unsigned int *d) {
|
|
unsigned int u2, dv, md;
|
|
int s, e;
|
|
|
|
if (u < 0x80 && u != 0x5C && u != 0x7E) { /* Backslash & tilde re-mapped according to JIS X 0201 Roman */
|
|
*d = u;
|
|
return 1;
|
|
}
|
|
/* Special case URO block sequential mappings (considerably lessens size of `sjis_u[]` array) */
|
|
if (u >= 0x4E00 && u <= 0xDFFF) { /* 0xE000 next used value >= 0x4E00 */
|
|
if (u >= 0x9FB0) {
|
|
return 0;
|
|
}
|
|
return eci_u_lookup_uro_int(u, sjis_uro_u, sjis_uro_mb_ind, sjis_mb, d);
|
|
}
|
|
/* PUA to user-defined (Table 4-86, Lunde, 2nd ed.) */
|
|
if (u >= 0xE000 && u <= 0xE757) {
|
|
u2 = u - 0xE000;
|
|
dv = u2 / 188;
|
|
md = u2 - dv * 188;
|
|
*d = ((dv + 0xF0) << 8) | (md + 0x40 + (md >= 0x3F));
|
|
return 2;
|
|
}
|
|
if (u >= sjis_u[0] && u <= sjis_u[ARRAY_SIZE(sjis_u) - 1]) {
|
|
s = 0;
|
|
e = ARRAY_SIZE(sjis_u) - 1;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (sjis_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (sjis_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
*d = sjis_mb[u >= 0x4E00 ? m + 6356 : m]; /* Adjust for URO block */
|
|
return 1 + (*d > 0xFF);
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL int u_sjis_int_test(const unsigned int u, unsigned int *d) {
|
|
return u_sjis_int(u, d);
|
|
}
|
|
#endif
|
|
|
|
/* Version of `u_sjis_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
static int u_sjis(const unsigned int u, unsigned char *dest) {
|
|
unsigned int d;
|
|
int ret = u_sjis_int(u, &d);
|
|
if (ret) {
|
|
if (ret == 1) {
|
|
dest[0] = (unsigned char) d;
|
|
} else {
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
dest[1] = (unsigned char) d;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* ECI 28 Big5 Chinese (Taiwan) */
|
|
static int u_big5(const unsigned int u, unsigned char *dest) {
|
|
int s, e;
|
|
|
|
if (u < 0x80) {
|
|
*dest = (unsigned char) u;
|
|
return 1;
|
|
}
|
|
/* Special case URO block sequential mappings (considerably lessens size of `big5_u[]` array) */
|
|
if (u >= 0x4E00 && u <= 0xFA0B) { /* 0xFA0C next used value >= 0x4E00 */
|
|
if (u >= 0x9FB0) {
|
|
return 0;
|
|
}
|
|
return eci_u_lookup_uro(u, big5_uro_u, big5_uro_mb_ind, big5_mb, dest);
|
|
}
|
|
if (u >= big5_u[0] && u <= big5_u[ARRAY_SIZE(big5_u) - 1]) {
|
|
s = 0;
|
|
e = ARRAY_SIZE(big5_u) - 1;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (big5_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (big5_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
const unsigned short mb = big5_mb[u >= 0x4E00 ? m + 13061 : m]; /* Adjust for URO block */
|
|
dest[0] = (unsigned char) (mb >> 8);
|
|
dest[1] = (unsigned char) mb;
|
|
return 2;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL int u_big5_test(const unsigned int u, unsigned char *dest) {
|
|
return u_big5(u, dest);
|
|
}
|
|
#endif
|
|
|
|
/* ECI 30 EUC-KR (KS X 1001, formerly KS C 5601) Korean */
|
|
static int u_ksx1001(const unsigned int u, unsigned char *dest) {
|
|
int s, e;
|
|
|
|
if (u < 0x80) {
|
|
*dest = (unsigned char) u;
|
|
return 1;
|
|
}
|
|
/* Special case URO block sequential mappings (considerably lessens size of `ksx1001_u[]` array) */
|
|
if (u >= 0x4E00 && u <= 0xABFF) { /* 0xAC00 next used value >= 0x4E00 */
|
|
if (u >= 0x9FA0) {
|
|
return 0;
|
|
}
|
|
return eci_u_lookup_uro(u, ksx1001_uro_u, ksx1001_uro_mb_ind, ksx1001_mb, dest);
|
|
}
|
|
if (u >= ksx1001_u[0] && u <= ksx1001_u[ARRAY_SIZE(ksx1001_u) - 1]) {
|
|
s = ksx1001_u_ind[(u - ksx1001_u[0]) >> 8];
|
|
e = s + 0x100 > ARRAY_SIZE(ksx1001_u) ? ARRAY_SIZE(ksx1001_u) - 1 : s + 0x100 - 1;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (ksx1001_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (ksx1001_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
const unsigned short mb = ksx1001_mb[u >= 0x4E00 ? m + 4620 : m]; /* Adjust for URO block */
|
|
dest[0] = (unsigned char) (mb >> 8);
|
|
dest[1] = (unsigned char) mb;
|
|
return 2;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL int u_ksx1001_test(const unsigned int u, unsigned char *dest) {
|
|
return u_ksx1001(u, dest);
|
|
}
|
|
#endif
|
|
|
|
/* ECI 29 GB 2312 Chinese (PRC) */
|
|
static int u_gb2312_int(const unsigned int u, unsigned int *d) {
|
|
int s, e;
|
|
|
|
if (u < 0x80) {
|
|
*d = u;
|
|
return 1;
|
|
}
|
|
/* Special case URO block sequential mappings (considerably lessens size of `gb2312_u[]` array) */
|
|
if (u >= 0x4E00 && u <= 0x9E1E) { /* 0x9E1F next used value >= 0x4E00 */
|
|
if (u >= 0x9CF0) {
|
|
return 0;
|
|
}
|
|
return eci_u_lookup_uro_int(u, gb2312_uro_u, gb2312_uro_mb_ind, gb2312_mb, d);
|
|
}
|
|
if (u >= gb2312_u[0] && u <= gb2312_u[ARRAY_SIZE(gb2312_u) - 1]) {
|
|
s = gb2312_u_ind[(u - gb2312_u[0]) >> 8];
|
|
e = s + 0x100 > ARRAY_SIZE(gb2312_u) ? ARRAY_SIZE(gb2312_u) - 1 : s + 0x100 - 1;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (gb2312_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (gb2312_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
*d = gb2312_mb[u > 0x4E00 ? m + 6627 : m]; /* Adjust for URO block */
|
|
return 2;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL int u_gb2312_int_test(const unsigned int u, unsigned int *d) {
|
|
return u_gb2312_int(u, d);
|
|
}
|
|
#endif
|
|
|
|
/* Version of `u_gb2312_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
static int u_gb2312(const unsigned int u, unsigned char *dest) {
|
|
unsigned int d;
|
|
int ret = u_gb2312_int(u, &d);
|
|
if (ret) {
|
|
if (ret == 1) {
|
|
dest[0] = (unsigned char) d;
|
|
} else {
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
dest[1] = (unsigned char) d;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* ECI 31 GBK Chinese */
|
|
static int u_gbk_int(const unsigned int u, unsigned int *d) {
|
|
int s, e;
|
|
|
|
if (u < 0x80) {
|
|
*d = u;
|
|
return 1;
|
|
}
|
|
|
|
/* Check GB 2312 first */
|
|
if (u == 0x30FB) {
|
|
/* KATAKANA MIDDLE DOT, mapped by GB 2312 but not by GBK (U+00B7 MIDDLE DOT mapped to 0xA1A4 instead) */
|
|
return 0;
|
|
}
|
|
if (u == 0x2015) {
|
|
/* HORIZONTAL BAR, mapped to 0xA844 by GBK rather than 0xA1AA (U+2014 EM DASH mapped there instead) */
|
|
*d = 0xA844;
|
|
return 2;
|
|
}
|
|
if (u_gb2312_int(u, d)) { /* Includes the 2 GB 6345.1-86 corrections given in Table 3-22, Lunde, 2nd ed. */
|
|
return 2;
|
|
}
|
|
|
|
/* Special case URO block sequential mappings (considerably lessens size of `gbk_u[]` array) */
|
|
if (u >= 0x4E00 && u <= 0xF92B) { /* 0xF92C next used value >= 0x4E00 */
|
|
if (u >= 0x9FB0) {
|
|
return 0;
|
|
}
|
|
return eci_u_lookup_uro_int(u, gbk_uro_u, gbk_uro_mb_ind, gbk_mb, d);
|
|
}
|
|
if (u >= gbk_u[0] && u <= gbk_u[ARRAY_SIZE(gbk_u) - 1]) {
|
|
s = 0;
|
|
e = ARRAY_SIZE(gbk_u) - 1;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (gbk_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (gbk_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
*d = gbk_mb[u >= 0x4E00 ? m + 14139 : m]; /* Adjust for URO block */
|
|
return 2;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL int u_gbk_int_test(const unsigned int u, unsigned int *d) {
|
|
return u_gbk_int(u, d);
|
|
}
|
|
#endif
|
|
|
|
/* Version of `u_gbk_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
static int u_gbk(const unsigned int u, unsigned char *dest) {
|
|
unsigned int d;
|
|
int ret = u_gbk_int(u, &d);
|
|
if (ret) {
|
|
if (ret == 1) {
|
|
dest[0] = (unsigned char) d;
|
|
} else {
|
|
dest[0] = (unsigned char) (d >> 8);
|
|
dest[1] = (unsigned char) d;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* Helper for `u_gb18030_int()` to output 4-byte sequential blocks */
|
|
static int u_gb18030_4_sequential_int(unsigned int u2, unsigned int mb_lead, unsigned int *d1, unsigned int *d2) {
|
|
unsigned int dv;
|
|
|
|
dv = u2 / 10;
|
|
*d2 = u2 - dv * 10 + 0x30;
|
|
u2 = dv;
|
|
dv = u2 / 126;
|
|
*d2 |= (u2 - dv * 126 + 0x81) << 8;
|
|
u2 = dv;
|
|
dv = u2 / 10;
|
|
*d1 = ((dv + mb_lead) << 8) | (u2 - dv * 10 + 0x30);
|
|
return 4;
|
|
}
|
|
|
|
/* ECI 32 GB 18030 Chinese - assumes valid Unicode */
|
|
static int u_gb18030_int(const unsigned int u, unsigned int *d1, unsigned int *d2) {
|
|
unsigned int u2, dv;
|
|
int s, e;
|
|
|
|
if (u < 0x80) {
|
|
*d1 = u;
|
|
return 1;
|
|
}
|
|
|
|
/* Check GBK first */
|
|
if (u_gbk_int(u, d1)) {
|
|
return 2;
|
|
}
|
|
|
|
if (u >= 0x10000) {
|
|
/* Non-PUA, non-BMP, see Table 3-37, Lunde, 2nd ed. */
|
|
if (u == 0x20087) {
|
|
*d1 = 0xFE51;
|
|
return 2;
|
|
}
|
|
if (u == 0x20089) {
|
|
*d1 = 0xFE52;
|
|
return 2;
|
|
}
|
|
if (u == 0x200CC) {
|
|
*d1 = 0xFE53;
|
|
return 2;
|
|
}
|
|
if (u == 0x215D7) {
|
|
*d1 = 0xFE6C;
|
|
return 2;
|
|
}
|
|
if (u == 0x2298F) {
|
|
*d1 = 0xFE76;
|
|
return 2;
|
|
}
|
|
if (u == 0x241FE) {
|
|
*d1 = 0xFE91;
|
|
return 2;
|
|
}
|
|
/* All other non-BMP U+10000-10FFFF */
|
|
return u_gb18030_4_sequential_int(u - 0x10000, 0x90, d1, d2);
|
|
}
|
|
if (u >= 0xE000 && u <= 0xE765) { /* PUA to user-defined */
|
|
if (u <= 0xE4C5) {
|
|
u2 = u - 0xE000;
|
|
dv = u2 / 94;
|
|
*d1 = ((dv + (dv < 6 ? 0xAA : 0xF2)) << 8) | (u2 - dv * 94 + 0xA1);
|
|
} else {
|
|
unsigned int md;
|
|
u2 = u - 0xE4C6;
|
|
dv = u2 / 96;
|
|
md = u2 - dv * 96;
|
|
*d1 = ((dv + 0xA1) << 8) | (md + 0x40 + (md >= 0x3F));
|
|
}
|
|
return 2;
|
|
}
|
|
if (u >= gb18030_2_u[0] && u <= gb18030_2_u[ARRAY_SIZE(gb18030_2_u) - 1]) {
|
|
s = 0;
|
|
e = ARRAY_SIZE(gb18030_2_u) - 1;
|
|
while (s <= e) {
|
|
const int m = (s + e) >> 1;
|
|
if (gb18030_2_u[m] < u) {
|
|
s = m + 1;
|
|
} else if (gb18030_2_u[m] > u) {
|
|
e = m - 1;
|
|
} else {
|
|
*d1 = gb18030_2_mb[m];
|
|
return 2;
|
|
}
|
|
}
|
|
}
|
|
/* All other BMP U+0080-FFFF */
|
|
if (u == 0xE7C7) { /* PUA change to non-PUA, see Table 3-39, Lunde, 2nd ed. */
|
|
*d1 = 0x8135;
|
|
*d2 = 0xF437;
|
|
return 4;
|
|
}
|
|
s = 0;
|
|
e = ARRAY_SIZE(gb18030_4_u_e) - 1;
|
|
while (s < e) { /* Lower bound */
|
|
const int m = (s + e) >> 1;
|
|
if (gb18030_4_u_e[m] < u) {
|
|
s = m + 1;
|
|
} else {
|
|
e = m;
|
|
}
|
|
}
|
|
assert(s < ARRAY_SIZE(gb18030_4_u_e));
|
|
return u_gb18030_4_sequential_int(u - gb18030_4_mb_o[s] - 0x80, 0x81, d1, d2);
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL int u_gb18030_int_test(const unsigned int u, unsigned int *d1, unsigned int *d2) {
|
|
return u_gb18030_int(u, d1, d2);
|
|
}
|
|
#endif
|
|
|
|
/* Version of `u_gb18030_int()` taking unsigned char destination, for use by `utf8_to_eci()` */
|
|
static int u_gb18030(const unsigned int u, unsigned char *dest) {
|
|
unsigned int d1, d2;
|
|
int ret = u_gb18030_int(u, &d1, &d2);
|
|
if (ret) {
|
|
if (ret == 1) {
|
|
dest[0] = (unsigned char) d1;
|
|
} else {
|
|
dest[0] = (unsigned char) (d1 >> 8);
|
|
dest[1] = (unsigned char) d1;
|
|
if (ret == 4) {
|
|
dest[2] = (unsigned char) (d2 >> 8);
|
|
dest[3] = (unsigned char) d2;
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* Main ECI stuff */
|
|
|
|
/* Helper to count the number of chars in a string within a range */
|
|
static int chr_range_cnt(const unsigned char string[], const int length, const unsigned char c1,
|
|
const unsigned char c2) {
|
|
int count = 0;
|
|
int i;
|
|
if (c1) {
|
|
for (i = 0; i < length; i++) {
|
|
if (string[i] >= c1 && string[i] <= c2) {
|
|
count++;
|
|
}
|
|
}
|
|
} else {
|
|
for (i = 0; i < length; i++) {
|
|
if (string[i] <= c2) {
|
|
count++;
|
|
}
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/* Is ECI convertible from UTF-8? */
|
|
INTERNAL int is_eci_convertible(const int eci) {
|
|
if (eci == 26 || (eci > 35 && eci != 170)) { /* Exclude ECI 170 - ASCII Invariant */
|
|
/* UTF-8 (26) or 8-bit binary data (899) or undefined (> 35 and < 899) or not character set (> 899) */
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
/* Are any of the ECIs in the segments convertible from UTF-8?
|
|
Sets `convertible[]` for each, which must be at least `seg_count` in size */
|
|
INTERNAL int is_eci_convertible_segs(const struct zint_seg segs[], const int seg_count, int convertible[]) {
|
|
int ret = 0;
|
|
int i;
|
|
for (i = 0; i < seg_count; i++) {
|
|
convertible[i] = is_eci_convertible(segs[i].eci);
|
|
ret |= convertible[i];
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* Calculate length required to convert UTF-8 to (double-byte) encoding */
|
|
INTERNAL int get_eci_length(const int eci, const unsigned char source[], int length) {
|
|
if (eci == 20) { /* Shift JIS */
|
|
/* Only ASCII backslash (reverse solidus) exceeds UTF-8 length */
|
|
length += chr_cnt(source, length, '\\');
|
|
|
|
} else if (eci == 25 || eci == 33) { /* UTF-16 */
|
|
/* All ASCII chars take 2 bytes */
|
|
length += chr_range_cnt(source, length, 0, 0x7F);
|
|
/* Surrogate pairs are 4 UTF-8 bytes long so fit */
|
|
|
|
} else if (eci == 32) { /* GB 18030 */
|
|
/* Allow for GB 18030 4 byters */
|
|
length *= 2;
|
|
|
|
} else if (eci == 34 || eci == 35) { /* UTF-32 */
|
|
/* Quadruple-up ASCII and double-up non-ASCII */
|
|
length += chr_range_cnt(source, length, 0, 0x7F) * 2 + length;
|
|
}
|
|
|
|
/* Big5, GB 2312, EUC-KR and GBK fit in UTF-8 length */
|
|
|
|
return length;
|
|
}
|
|
|
|
/* Call `get_eci_length()` for each segment, returning total */
|
|
INTERNAL int get_eci_length_segs(const struct zint_seg segs[], const int seg_count) {
|
|
int length = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
length += get_eci_length(segs[i].eci, segs[i].source, segs[i].length);
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
/* Convert UTF-8 to other character encodings */
|
|
typedef int (*eci_func_t)(const unsigned int u, unsigned char *dest);
|
|
INTERNAL int utf8_to_eci(const int eci, const unsigned char source[], unsigned char dest[], int *p_length) {
|
|
|
|
static const eci_func_t eci_funcs[36] = {
|
|
NULL, NULL, NULL, NULL, u_iso8859_2, /*0-4*/
|
|
u_iso8859_3, u_iso8859_4, u_iso8859_5, u_iso8859_6, u_iso8859_7, /*5-9*/
|
|
u_iso8859_8, u_iso8859_9, u_iso8859_10, u_iso8859_11, NULL, /*10-14*/
|
|
u_iso8859_13, u_iso8859_14, u_iso8859_15, u_iso8859_16, NULL, /*15-19*/
|
|
u_sjis, u_cp1250, u_cp1251, u_cp1252, u_cp1256, /*20-24*/
|
|
u_utf16be, NULL, u_ascii, u_big5, u_gb2312, /*25-29*/
|
|
u_ksx1001, u_gbk, u_gb18030, u_utf16le, u_utf32be, /*30-34*/
|
|
u_utf32le,
|
|
};
|
|
eci_func_t eci_func;
|
|
unsigned int codepoint, state = 0;
|
|
int in_posn = 0;
|
|
int out_posn = 0;
|
|
int length = *p_length;
|
|
|
|
/* Special case ISO/IEC 8859-1 */
|
|
if (eci == 0 || eci == 3) { /* Default ECI 0 to ISO/IEC 8859-1 */
|
|
while (in_posn < length) {
|
|
do {
|
|
decode_utf8(&state, &codepoint, source[in_posn++]);
|
|
} while (in_posn < length && state != 0 && state != 12);
|
|
if (state != 0) {
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
}
|
|
if (codepoint >= 0x80 && (codepoint < 0xA0 || codepoint >= 0x100)) {
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
}
|
|
dest[out_posn++] = (unsigned char) codepoint;
|
|
}
|
|
dest[out_posn] = '\0';
|
|
*p_length = out_posn;
|
|
return 0;
|
|
}
|
|
|
|
if (eci == 170) { /* ASCII Invariant (archaic subset) */
|
|
eci_func = u_ascii_inv;
|
|
} else {
|
|
eci_func = eci_funcs[eci];
|
|
if (eci_func == NULL) {
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
}
|
|
}
|
|
|
|
while (in_posn < length) {
|
|
int incr;
|
|
do {
|
|
decode_utf8(&state, &codepoint, source[in_posn++]);
|
|
} while (in_posn < length && state != 0 && state != 12);
|
|
if (state != 0) {
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
}
|
|
incr = (*eci_func)(codepoint, dest + out_posn);
|
|
if (incr == 0) {
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
}
|
|
out_posn += incr;
|
|
}
|
|
dest[out_posn] = '\0';
|
|
*p_length = out_posn;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Find the lowest single-byte ECI mode which will encode a given set of Unicode text, assuming valid UTF-8 */
|
|
INTERNAL int get_best_eci(const unsigned char source[], int length) {
|
|
int eci = 3;
|
|
/* Note: attempting single-byte conversions only, so get_eci_length() unnecessary */
|
|
unsigned char *local_source = (unsigned char *) z_alloca(length + 1);
|
|
|
|
do {
|
|
if (eci == 14) { /* Reserved */
|
|
eci = 15;
|
|
} else if (eci == 19) { /* Reserved */
|
|
eci = 21; /* Skip 20 Shift JIS */
|
|
}
|
|
if (utf8_to_eci(eci, source, local_source, &length) == 0) {
|
|
return eci;
|
|
}
|
|
eci++;
|
|
} while (eci < 25);
|
|
|
|
assert(is_valid_utf8(source, length));
|
|
|
|
return 26; /* If all of these fail, use UTF-8! */
|
|
}
|
|
|
|
/* Call `get_best_eci()` for each segment, assuming valid UTF-8. Returns 0 on failure, first ECI set on success */
|
|
INTERNAL int get_best_eci_segs(struct zint_symbol *symbol, struct zint_seg segs[], const int seg_count) {
|
|
const int default_eci = symbol->symbology == BARCODE_GRIDMATRIX ? 29 : symbol->symbology == BARCODE_UPNQR ? 4 : 3;
|
|
int first_eci_set = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
if (segs[i].eci == 0) {
|
|
const int eci = get_best_eci(segs[i].source, segs[i].length);
|
|
if (eci == default_eci) {
|
|
if (i != 0 && segs[i - 1].eci != 0 && segs[i - 1].eci != default_eci) {
|
|
segs[i].eci = eci;
|
|
if (first_eci_set == 0) {
|
|
first_eci_set = eci;
|
|
}
|
|
}
|
|
} else {
|
|
segs[i].eci = eci;
|
|
if (first_eci_set == 0) {
|
|
first_eci_set = eci;
|
|
if (i == 0) {
|
|
symbol->eci = eci;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return first_eci_set;
|
|
}
|
|
|
|
/* QRCODE Shift JIS helpers */
|
|
|
|
/* Convert UTF-8 string to Shift JIS and place in array of ints */
|
|
INTERNAL int sjis_utf8(struct zint_symbol *symbol, const unsigned char source[], int *p_length,
|
|
unsigned int *ddata) {
|
|
int error_number;
|
|
unsigned int i, length;
|
|
unsigned int *utfdata = (unsigned int *) z_alloca(sizeof(unsigned int) * (*p_length + 1));
|
|
|
|
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
|
|
if (error_number != 0) {
|
|
return error_number;
|
|
}
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
if (!u_sjis_int(utfdata[i], ddata + i)) {
|
|
return errtxt(ZINT_ERROR_INVALID_DATA, symbol, 800, "Invalid character in input");
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* If `full_multibyte` set, copy byte input stream to array of ints, putting double-bytes that match QR Kanji mode in
|
|
* a single entry. If `full_multibyte` not set, do a straight copy */
|
|
INTERNAL void sjis_cpy(const unsigned char source[], int *p_length, unsigned int *ddata, const int full_multibyte) {
|
|
unsigned int i, j, length;
|
|
unsigned char c1, c2;
|
|
|
|
if (full_multibyte) {
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
c1 = source[i];
|
|
/* Now using stricter interpretation of standard, and excluding certain trailing bytes */
|
|
if (((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEB)) && length - i >= 2) {
|
|
c2 = source[i + 1];
|
|
if ((c2 >= 0x40 && c2 <= 0xFC) && c2 != 0x7F && (c1 != 0xEB || c2 <= 0xBF)) {
|
|
/* This may or may not be valid Shift JIS, but don't care as long as it can be encoded in
|
|
* QR Kanji mode */
|
|
ddata[j] = (c1 << 8) | c2;
|
|
i++;
|
|
} else {
|
|
ddata[j] = c1;
|
|
}
|
|
} else {
|
|
ddata[j] = c1;
|
|
}
|
|
}
|
|
*p_length = j;
|
|
} else {
|
|
/* Straight copy */
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
ddata[i] = source[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Call `sjis_cpy()` for each segment */
|
|
INTERNAL void sjis_cpy_segs(struct zint_seg segs[], const int seg_count, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
int i;
|
|
unsigned int *dd = ddata;
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
sjis_cpy(segs[i].source, &segs[i].length, dd, full_multibyte);
|
|
dd += segs[i].length;
|
|
}
|
|
}
|
|
|
|
/* Convert UTF-8 string to ECI and place in array of ints using `sjis_cpy()` */
|
|
INTERNAL int sjis_utf8_to_eci(const int eci, const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
|
|
if (is_eci_convertible(eci)) {
|
|
int error_number;
|
|
const int eci_length = get_eci_length(eci, source, *p_length);
|
|
unsigned char *converted = (unsigned char *) z_alloca(eci_length + 1);
|
|
|
|
error_number = utf8_to_eci(eci, source, converted, p_length);
|
|
if (error_number != 0) {
|
|
/* Note not setting `symbol->errtxt`, up to caller */
|
|
return error_number;
|
|
}
|
|
|
|
sjis_cpy(converted, p_length, ddata, full_multibyte || eci == 20);
|
|
} else {
|
|
sjis_cpy(source, p_length, ddata, full_multibyte);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* GRIDMATRIX GB 2312 helpers */
|
|
|
|
/* Convert UTF-8 string to GB 2312 (EUC-CN) and place in array of ints */
|
|
INTERNAL int gb2312_utf8(struct zint_symbol *symbol, const unsigned char source[], int *p_length,
|
|
unsigned int *ddata) {
|
|
int error_number;
|
|
unsigned int i, length;
|
|
unsigned int *utfdata = (unsigned int *) z_alloca(sizeof(unsigned int) * (*p_length + 1));
|
|
|
|
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
|
|
if (error_number != 0) {
|
|
return error_number;
|
|
}
|
|
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
if (utfdata[i] < 0x80) {
|
|
ddata[i] = utfdata[i];
|
|
} else {
|
|
if (!u_gb2312_int(utfdata[i], ddata + i)) {
|
|
return errtxt(ZINT_ERROR_INVALID_DATA, symbol, 810, "Invalid character in input");
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* If `full_multibyte` set, copy byte input stream to array of ints, putting double-bytes that match GRIDMATRIX
|
|
* Chinese mode in a single entry. If `full_multibyte` not set, do a straight copy */
|
|
static void gb2312_cpy(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
unsigned int i, j, length;
|
|
unsigned char c1, c2;
|
|
|
|
if (full_multibyte) {
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
if (length - i >= 2) {
|
|
c1 = source[i];
|
|
c2 = source[i + 1];
|
|
if (((c1 >= 0xA1 && c1 <= 0xA9) || (c1 >= 0xB0 && c1 <= 0xF7)) && c2 >= 0xA1 && c2 <= 0xFE) {
|
|
/* This may or may not be valid GB 2312 (EUC-CN), but don't care as long as it can be encoded in
|
|
* GRIDMATRIX Chinese mode */
|
|
ddata[j] = (c1 << 8) | c2;
|
|
i++;
|
|
} else {
|
|
ddata[j] = c1;
|
|
}
|
|
} else {
|
|
ddata[j] = source[i];
|
|
}
|
|
}
|
|
*p_length = j;
|
|
} else {
|
|
/* Straight copy */
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
ddata[i] = source[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL void gb2312_cpy_test(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
gb2312_cpy(source, p_length, ddata, full_multibyte);
|
|
}
|
|
#endif
|
|
|
|
/* Call `gb2312_cpy()` for each segment */
|
|
INTERNAL void gb2312_cpy_segs(struct zint_seg segs[], const int seg_count, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
int i;
|
|
unsigned int *dd = ddata;
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
gb2312_cpy(segs[i].source, &segs[i].length, dd, full_multibyte);
|
|
dd += segs[i].length;
|
|
}
|
|
}
|
|
|
|
/* Convert UTF-8 string to ECI and place in array of ints using `gb2312_cpy()` */
|
|
INTERNAL int gb2312_utf8_to_eci(const int eci, const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
|
|
if (is_eci_convertible(eci)) {
|
|
int error_number;
|
|
const int eci_length = get_eci_length(eci, source, *p_length);
|
|
unsigned char *converted = (unsigned char *) z_alloca(eci_length + 1);
|
|
|
|
error_number = utf8_to_eci(eci, source, converted, p_length);
|
|
if (error_number != 0) {
|
|
/* Note not setting `symbol->errtxt`, up to caller */
|
|
return error_number;
|
|
}
|
|
|
|
gb2312_cpy(converted, p_length, ddata, full_multibyte || eci == 29);
|
|
} else {
|
|
gb2312_cpy(source, p_length, ddata, full_multibyte);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* HANXIN GB 18030 helpers */
|
|
|
|
/* Convert UTF-8 string to GB 18030 and place in array of ints */
|
|
INTERNAL int gb18030_utf8(struct zint_symbol *symbol, const unsigned char source[], int *p_length,
|
|
unsigned int *ddata) {
|
|
int error_number, ret;
|
|
unsigned int i, j, length;
|
|
unsigned int *utfdata = (unsigned int *) z_alloca(sizeof(unsigned int) * (*p_length + 1));
|
|
|
|
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 0 /*disallow_4byte*/);
|
|
if (error_number != 0) {
|
|
return error_number;
|
|
}
|
|
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
if (utfdata[i] < 0x80) {
|
|
ddata[j] = utfdata[i];
|
|
} else {
|
|
ret = u_gb18030_int(utfdata[i], ddata + j, ddata + j + 1);
|
|
if (ret == 0) { /* Should never happen, as GB 18030 is a UTF i.e. maps all Unicode codepoints */
|
|
return errtxt(ZINT_ERROR_INVALID_DATA, symbol, 820, "Invalid character in input"); /* Not reached */
|
|
}
|
|
if (ret == 4) {
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
|
|
*p_length = j;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* If `full_multibyte` set, copy byte input stream to array of ints, putting double-bytes that match HANXIN
|
|
* Chinese mode in single entry, and quad-bytes in 2 entries. If `full_multibyte` not set, do a straight copy */
|
|
static void gb18030_cpy(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
unsigned int i, j, length;
|
|
int done;
|
|
unsigned char c1, c2, c3, c4;
|
|
|
|
if (full_multibyte) {
|
|
for (i = 0, j = 0, length = *p_length; i < length; i++, j++) {
|
|
done = 0;
|
|
c1 = source[i];
|
|
if (length - i >= 2) {
|
|
if (c1 >= 0x81 && c1 <= 0xFE) {
|
|
c2 = source[i + 1];
|
|
if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0x80 && c2 <= 0xFE)) {
|
|
ddata[j] = (c1 << 8) | c2;
|
|
i++;
|
|
done = 1;
|
|
} else if (length - i >= 4 && (c2 >= 0x30 && c2 <= 0x39)) {
|
|
c3 = source[i + 2];
|
|
c4 = source[i + 3];
|
|
if ((c3 >= 0x81 && c3 <= 0xFE) && (c4 >= 0x30 && c4 <= 0x39)) {
|
|
ddata[j++] = (c1 << 8) | c2;
|
|
ddata[j] = (c3 << 8) | c4;
|
|
i += 3;
|
|
done = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!done) {
|
|
ddata[j] = c1;
|
|
}
|
|
}
|
|
*p_length = j;
|
|
} else {
|
|
/* Straight copy */
|
|
for (i = 0, length = *p_length; i < length; i++) {
|
|
ddata[i] = source[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef ZINT_TEST /* Wrapper for direct testing */
|
|
INTERNAL void gb18030_cpy_test(const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
gb18030_cpy(source, p_length, ddata, full_multibyte);
|
|
}
|
|
#endif
|
|
|
|
/* Call `gb18030_cpy()` for each segment */
|
|
INTERNAL void gb18030_cpy_segs(struct zint_seg segs[], const int seg_count, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
int i;
|
|
unsigned int *dd = ddata;
|
|
|
|
for (i = 0; i < seg_count; i++) {
|
|
gb18030_cpy(segs[i].source, &segs[i].length, dd, full_multibyte);
|
|
dd += segs[i].length;
|
|
}
|
|
}
|
|
|
|
/* Convert UTF-8 string to ECI and place in array of ints using `gb18030_cpy()` */
|
|
INTERNAL int gb18030_utf8_to_eci(const int eci, const unsigned char source[], int *p_length, unsigned int *ddata,
|
|
const int full_multibyte) {
|
|
|
|
if (is_eci_convertible(eci)) {
|
|
int error_number;
|
|
const int eci_length = get_eci_length(eci, source, *p_length);
|
|
unsigned char *converted = (unsigned char *) z_alloca(eci_length + 1);
|
|
|
|
error_number = utf8_to_eci(eci, source, converted, p_length);
|
|
if (error_number != 0) {
|
|
/* Note not setting `symbol->errtxt`, up to caller */
|
|
return error_number;
|
|
}
|
|
|
|
/* GB 18030 (ECI 32) superset of GB 2312 (ECI 29) and GBK (ECI 31) */
|
|
gb18030_cpy(converted, p_length, ddata, full_multibyte || eci == 32 || eci == 29 || eci == 31);
|
|
} else {
|
|
gb18030_cpy(source, p_length, ddata, full_multibyte);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* vim: set ts=4 sw=4 et : */
|