HANXIN ECI conversion, GB 18030 LIBICONV port; some codeword fixes; optimized encoding modes

2024-11-16 20:57:25 +13:00 · 2019-12-08 16:15:34 +00:00 · 2019-12-08 16:15:34 +00:00 · 889e786d95
commit 889e786d95
parent ce8aa92846
35 changed files with 67955 additions and 23877 deletions
--- a/backend/CMakeLists.txt
+++ b/backend/CMakeLists.txt
@ -4,7 +4,7 @@ project(zint)

 find_package(PNG)

-set(zint_COMMON_SRCS common.c library.c large.c reedsol.c gs1.c eci.c general_field.c sjis.c gb2312.c)
+set(zint_COMMON_SRCS common.c library.c large.c reedsol.c gs1.c eci.c general_field.c sjis.c gb2312.c gb18030.c)
 set(zint_ONEDIM_SRCS code.c code128.c 2of5.c upcean.c telepen.c medical.c plessey.c rss.c)
 set(zint_POSTAL_SRCS postal.c auspost.c imail.c mailmark.c)
 set(zint_TWODIM_SRCS code16k.c codablock.c dmatrix.c pdf417.c qr.c maxicode.c composite.c aztec.c code49.c code1.c gridmtx.c hanxin.c dotcode.c ultra.c)
--- a/backend/common.c
+++ b/backend/common.c
@ -310,8 +310,9 @@ unsigned int decode_utf8(unsigned int* state, unsigned int* codep, const unsigne
    return *state;
 }

-/* Convert UTF-8 to UTF-16 for codepoints <= U+FFFF (ie four-byte sequences (requiring UTF-16 surrogates) not allowed) */
-int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int vals[], size_t *length) {
+/* Convert UTF-8 to Unicode. If `disallow_4byte` unset, allow all values (UTF-32).
+ * If `disallow_4byte` set, only allow codepoints <= U+FFFF (ie four-byte sequences not allowed) (UTF-16, no surrogates) */
+int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[], size_t *length, int disallow_4byte) {
    size_t bpos;
    int    jpos;
    unsigned int codepoint, state = 0;
@ -328,7 +329,7 @@ int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int va
            strcpy(symbol->errtxt, "240: Corrupt Unicode data");
            return ZINT_ERROR_INVALID_DATA;
        }
-        if (codepoint > 0xffff) {
+        if (disallow_4byte && codepoint > 0xffff) {
            strcpy(symbol->errtxt, "242: Unicode sequences of more than 3 bytes not supported");
            return ZINT_ERROR_INVALID_DATA;
        }
@ -342,9 +343,8 @@ int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int va
    return 0;
 }

-
+/* Enforce minimum permissable height of rows */
 void set_minimum_height(struct zint_symbol *symbol, const int min_height) {
-    /* Enforce minimum permissable height of rows */
    int fixed_height = 0;
    int zero_count = 0;
    int i;
@ -368,3 +368,94 @@ void set_minimum_height(struct zint_symbol *symbol, const int min_height) {
    }
 }

+/* Calculate optimized encoding modes. Adapted from Project Nayuki */
+void pn_define_mode(char* mode, const unsigned int data[], const size_t length, const int debug,
+        unsigned int state[], const char mode_types[], const int num_modes, pn_head_costs head_costs, pn_switch_cost switch_cost, pn_eod_cost eod_cost, pn_cur_cost cur_cost) {
+    /*
+     * Copyright (c) Project Nayuki. (MIT License)
+     * https://www.nayuki.io/page/qr-code-generator-library
+     *
+     * Permission is hereby granted, free of charge, to any person obtaining a copy of
+     * this software and associated documentation files (the "Software"), to deal in
+     * the Software without restriction, including without limitation the rights to
+     * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+     * the Software, and to permit persons to whom the Software is furnished to do so,
+     * subject to the following conditions:
+     * - The above copyright notice and this permission notice shall be included in
+     *   all copies or substantial portions of the Software.
+     */
+    int i, j, k, cm_i;
+    unsigned int min_cost;
+    char cur_mode;
+#ifndef _MSC_VER
+    unsigned int prev_costs[num_modes];
+    char char_modes[length * num_modes];
+    unsigned int cur_costs[num_modes];
+#else
+    unsigned int* prev_costs;
+    char* char_modes;
+    unsigned int* cur_costs;
+    prev_costs = (unsigned int*) _alloca(num_modes * sizeof(unsigned int));
+    char_modes = (char*) _alloca(length * num_modes);
+    cur_costs = (unsigned int*) _alloca(num_modes * sizeof(unsigned int));
+#endif
+
+    /* char_modes[i * num_modes + j] represents the mode to encode the code point at index i such that the final segment ends in
+     * mode_types[j] and the total number of bits is minimized over all possible choices */
+    memset(char_modes, 0, length * num_modes);
+
+    /* At the beginning of each iteration of the loop below, prev_costs[j] is the minimum number of 1/6 (1/GM_MULT) bits needed
+     * to encode the entire string prefix of length i, and end in mode_types[j] */
+    memcpy(prev_costs, (*head_costs)(state), num_modes * sizeof(unsigned int));
+
+    /* Calculate costs using dynamic programming */
+    for (i = 0, cm_i = 0; i < length; i++, cm_i += num_modes) {
+        memset(cur_costs, 0, num_modes * sizeof(unsigned int));
+
+        (*cur_cost)(state, data, length, i, char_modes, prev_costs, cur_costs);
+
+        if (eod_cost && i == length - 1) { /* Add end of data costs if last character */
+            for (j = 0; j < num_modes; j++) {
+                if (char_modes[cm_i + j]) {
+                    cur_costs[j] += (*eod_cost)(state, j);
+                }
+            }
+        }
+
+        /* Start new segment at the end to switch modes */
+        for (j = 0; j < num_modes; j++) { /* To mode */
+            for (k = 0; k < num_modes; k++) { /* From mode */
+                if (j != k && char_modes[cm_i + k]) {
+                    unsigned int new_cost = cur_costs[k] + (*switch_cost)(state, k, j);
+                    if (!char_modes[cm_i + j] || new_cost < cur_costs[j]) {
+                        cur_costs[j] = new_cost;
+                        char_modes[cm_i + j] = mode_types[k];
+                    }
+                }
+            }
+        }
+
+        memcpy(prev_costs, cur_costs, num_modes * sizeof(unsigned int));
+    }
+
+    /* Find optimal ending mode */
+    min_cost = prev_costs[0];
+    cur_mode = mode_types[0];
+    for (i = 1; i < num_modes; i++) {
+        if (prev_costs[i] < min_cost) {
+            min_cost = prev_costs[i];
+            cur_mode = mode_types[i];
+        }
+    }
+
+    /* Get optimal mode for each code point by tracing backwards */
+    for (i = length - 1, cm_i = i * num_modes; i >= 0; i--, cm_i -= num_modes) {
+        j = strchr(mode_types, cur_mode) - mode_types;
+        cur_mode = char_modes[cm_i + j];
+        mode[i] = cur_mode;
+    }
+
+    if (debug & ZINT_DEBUG_PRINT) {
+        printf("  Mode: %.*s\n", (int)length, mode);
+    }
+}
--- a/backend/common.h
+++ b/backend/common.h
@ -75,8 +75,15 @@ extern "C" {
    extern int is_extendable(const int symbology);
    extern int is_composite(const int symbology);
    extern unsigned int decode_utf8(unsigned int* state, unsigned int* codep, const unsigned char byte);
-    extern int utf8toutf16(struct zint_symbol *symbol, const unsigned char source[], int vals[], size_t *length);
+    extern int utf8_to_unicode(struct zint_symbol *symbol, const unsigned char source[], unsigned int vals[], size_t *length, int disallow_4byte);
    extern void set_minimum_height(struct zint_symbol *symbol, const int min_height);
+
+    typedef unsigned int* (*pn_head_costs)(unsigned int state[]);
+    typedef unsigned int (*pn_switch_cost)(unsigned int state[], const int j, const int k);
+    typedef unsigned int (*pn_eod_cost)(unsigned int state[], const int k);
+    typedef void (*pn_cur_cost)(unsigned int state[], const unsigned int data[], const size_t length, const int i, char* char_modes, unsigned int prev_costs[], unsigned int cur_costs[]);
+    extern void pn_define_mode(char* mode, const unsigned int data[], const size_t length, const int debug,
+                    unsigned int state[], const char mode_types[], const int num_modes, pn_head_costs head_costs, pn_switch_cost switch_cost, pn_eod_cost eod_cost, pn_cur_cost cur_cost);
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
--- a/backend/gb18030.c
+++ b/backend/gb18030.c
--- a/backend/gb18030.h
+++ b/backend/gb18030.h
--- a/backend/gb2312.c
+++ b/backend/gb2312.c
@ -1499,23 +1499,17 @@ static const Summary16 gb2312_uni2indx_pageff[15] = {
  { 7441, 0x0000 }, { 7441, 0x0000 }, { 7441, 0x002b },
 };

-int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
+int gb2312_wctomb_zint(unsigned int* r, unsigned int wc) {
    const Summary16 *summary = NULL;
    if (wc >= 0x0000 && wc < 0x0460) {
        if (wc == 0x00b7) { /* ZINT: Patched to duplicate map to 0xA1A4 */
-            if (n < 2) {
-                return -1;
-            }
-            r[0] = 0xA1; r[1] = 0xA4;
+            *r = 0xA1A4;
            return 2;
        }
        summary = &gb2312_uni2indx_page00[(wc>>4)];
    } else if (wc >= 0x2000 && wc < 0x2650) {
        if (wc == 0x2014) { /* ZINT: Patched to duplicate map to 0xA1AA */
-            if (n < 2) {
-                return -1;
-            }
-            r[0] = 0xA1; r[1] = 0xAA;
+            *r = 0xA1AA;
            return 2;
        }
        summary = &gb2312_uni2indx_page20[(wc>>4)-0x200];
@ -1532,10 +1526,6 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
        unsigned short used = summary->used;
        unsigned int i = wc & 0x0f;
        if (used & ((unsigned short) 1 << i)) {
-            unsigned short c;
-            if (n < 2) {
-                return -1;
-            }
            /* Keep in 'used' only the bits 0..i-1. */
            used &= ((unsigned short) 1 << i) - 1;
            /* Add 'summary->indx' and the number of bits set in 'used'. */
@ -1543,8 +1533,7 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
            used = (used & 0x3333) + ((used & 0xcccc) >> 2);
            used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);
            used = (used & 0x00ff) + (used >> 8);
-            c = gb2312_2charset[summary->indx + used];
-            r[0] = (c >> 8); r[1] = (c & 0xff);
+            *r = gb2312_2charset[summary->indx + used];
            return 2;
        }
    }
@ -1553,16 +1542,15 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {

 /* Convert UTF-8 string to GB 2312 (EUC-CN) and place in array of ints */
 int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* gbdata) {
-    int i, error_number, ret;
+    int i, error_number;
    unsigned int length;
-    unsigned char buf[2];
 #ifndef _MSC_VER
-    int utfdata[*p_length + 1]; /* Leave signed for the moment until `utf8toutf16()` signature changed */
+    unsigned int utfdata[*p_length + 1];
 #else
-    int* utfdata = (int*) _alloca((*p_length + 1) * sizeof(int));
+    unsigned int* utfdata = (unsigned int*) _alloca((*p_length + 1) * sizeof(unsigned int));
 #endif

-    error_number = utf8toutf16(symbol, source, utfdata, p_length);
+    error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
    if (error_number != 0) {
        return error_number;
    }
@ -1571,12 +1559,10 @@ int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], si
        if (utfdata[i] < 0x80) {
            gbdata[i] = utfdata[i];
        } else {
-            ret = gb2312_wctomb_zint(buf, utfdata[i], 2);
-            if (ret != 2) {
+            if (!gb2312_wctomb_zint(gbdata + i, utfdata[i])) {
                strcpy(symbol->errtxt, "810: Invalid character in input data");
                return ZINT_ERROR_INVALID_DATA;
            }
-            gbdata[i] = (buf[0] << 8) | buf[1];
        }
    }

--- a/backend/gb2312.h
+++ b/backend/gb2312.h
@ -37,7 +37,7 @@
 extern "C" {
 #endif /* __cplusplus */

-int gb2312_wctomb_zint(unsigned char* r, unsigned int wc, size_t n);
+int gb2312_wctomb_zint(unsigned int* r, unsigned int wc);
 int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* gbdata);
 int gb2312_utf8tosb(int eci, const unsigned char source[], size_t* p_length, unsigned int* gbdata);
 void gb2312_cpy(const unsigned char source[], size_t* p_length, unsigned int* gbdata);
--- a/backend/gridmtx.c
+++ b/backend/gridmtx.c
@ -30,7 +30,7 @@
 */
 /* vim: set ts=4 sw=4 et : */

-/* This file impliments Grid Matrix as specified in
+/* This file implements Grid Matrix as specified in
   AIM Global Document Number AIMD014 Rev. 1.63 Revised 9 Dec 2008 */

 #include <stdio.h>
@ -44,13 +44,15 @@
 #include "gridmtx.h"
 #include "gb2312.h"

+/* define_mode() stuff */
+
 /* Bits multiplied by this for costs, so as to be whole integer divisible by 2 and 3 */
 #define GM_MULT 6

 static char numeral_nondigits[] = " +-.,"; /* Non-digit numeral set, excluding EOL (carriage return/linefeed) */

 /* Whether in numeral or not. If in numeral, *p_numeral_end is set to position after numeral, and *p_numeral_cost is set to per-numeral cost */
-static int numeral_lat(unsigned int gbdata[], const size_t length, const int posn, int* p_numeral_end, int* p_numeral_cost) {
+static int in_numeral(const unsigned int gbdata[], const size_t length, const int posn, unsigned int* p_numeral_end, unsigned int* p_numeral_cost) {
    int i, nondigit, nondigit_posn, digit_cnt;

    if (posn < *p_numeral_end) {
@ -82,7 +84,7 @@ static int numeral_lat(unsigned int gbdata[], const size_t length, const int pos
        }
    }
    if (digit_cnt == 0) { /* Must have at least one digit */
-        *p_numeral_end = -1;
+        *p_numeral_end = 0;
        return 0;
    }
    if (nondigit && nondigit_posn == i - 1) { /* Non-digit can't be at end */
@ -119,168 +121,109 @@ static int numeral_lat(unsigned int gbdata[], const size_t length, const int pos

 #define GM_NUM_MODES 6

-/* Calculate optimized encoding modes. Adapted from Project Nayuki */
-/*
- * Copyright (c) Project Nayuki. (MIT License)
- * https://www.nayuki.io/page/qr-code-generator-library
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of
- * this software and associated documentation files (the "Software"), to deal in
- * the Software without restriction, including without limitation the rights to
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
- * the Software, and to permit persons to whom the Software is furnished to do so,
- * subject to the following conditions:
- * - The above copyright notice and this permission notice shall be included in
- *   all copies or substantial portions of the Software.
- */
-static void define_mode(unsigned int gbdata[], const size_t length, char* mode, int debug) {
-    static char mode_types[] = { GM_CHINESE, GM_NUMBER, GM_LOWER, GM_UPPER, GM_MIXED, GM_BYTE }; /* Must be in same order as GM_H etc */
+/* Initial mode costs */
+static unsigned int head_costs[GM_NUM_MODES] = {
+/*  H            N (+pad prefix)    L            U            M            B (+byte count) */
+    4 * GM_MULT, (4 + 2) * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, (4 + 9) * GM_MULT
+};

-    /* Initial mode costs */
-    static unsigned int head_costs[GM_NUM_MODES] = {
-    /*  H            N (+pad prefix)    L            U            M            B (+byte count) */
-        4 * GM_MULT, (4 + 2) * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, 4 * GM_MULT, (4 + 9) * GM_MULT
-    };
+static unsigned int* gm_head_costs(unsigned int state[]) {
+    return head_costs;
+}

-    /* Costs of switching modes - see AIMD014 Rev. 1.63 Table 9 – Type conversion codes */
+/* Cost of switching modes - see AIMD014 Rev. 1.63 Table 9 – Type conversion codes */
+static unsigned int gm_switch_cost(unsigned int state[], const int j, const int k) {
    static unsigned int switch_costs[GM_NUM_MODES][GM_NUM_MODES] = {
-        /*      H               N                      L             U             M             B  */
-        /*H*/ {                 0, (13 + 2) * GM_MULT, 13 * GM_MULT, 13 * GM_MULT, 13 * GM_MULT, (13 + 9) * GM_MULT },
-        /*N*/ { 10 * GM_MULT,                       0, 10 * GM_MULT, 10 * GM_MULT, 10 * GM_MULT, (10 + 9) * GM_MULT },
-        /*L*/ {  5 * GM_MULT,  (5 + 2) * GM_MULT,                 0,  5 * GM_MULT,  7 * GM_MULT,  (7 + 9) * GM_MULT },
-        /*U*/ {  5 * GM_MULT,  (5 + 2) * GM_MULT,  5 * GM_MULT,                 0,  7 * GM_MULT,  (7 + 9) * GM_MULT },
-        /*M*/ { 10 * GM_MULT, (10 + 2) * GM_MULT, 10 * GM_MULT, 10 * GM_MULT,                 0, (10 + 9) * GM_MULT },
-        /*B*/ {  4 * GM_MULT,  (4 + 2) * GM_MULT,  4 * GM_MULT,  4 * GM_MULT,  4 * GM_MULT,                       0 },
+        /*      H             N                   L             U             M             B  */
+        /*H*/ {            0, (13 + 2) * GM_MULT, 13 * GM_MULT, 13 * GM_MULT, 13 * GM_MULT, (13 + 9) * GM_MULT },
+        /*N*/ { 10 * GM_MULT,                  0, 10 * GM_MULT, 10 * GM_MULT, 10 * GM_MULT, (10 + 9) * GM_MULT },
+        /*L*/ {  5 * GM_MULT,  (5 + 2) * GM_MULT,            0,  5 * GM_MULT,  7 * GM_MULT,  (7 + 9) * GM_MULT },
+        /*U*/ {  5 * GM_MULT,  (5 + 2) * GM_MULT,  5 * GM_MULT,            0,  7 * GM_MULT,  (7 + 9) * GM_MULT },
+        /*M*/ { 10 * GM_MULT, (10 + 2) * GM_MULT, 10 * GM_MULT, 10 * GM_MULT,            0, (10 + 9) * GM_MULT },
+        /*B*/ {  4 * GM_MULT,  (4 + 2) * GM_MULT,  4 * GM_MULT,  4 * GM_MULT,  4 * GM_MULT,                  0 },
    };

-    /* Final end-of-data costs - see AIMD014 Rev. 1.63 Table 9 – Type conversion codes */
+    return switch_costs[j][k];
+}
+
+/* Final end-of-data cost - see AIMD014 Rev. 1.63 Table 9 – Type conversion codes */
+static unsigned int gm_eod_cost(unsigned int state[], const int k) {
    static unsigned int eod_costs[GM_NUM_MODES] = {
    /*  H             N             L            U            M             B  */
        13 * GM_MULT, 10 * GM_MULT, 5 * GM_MULT, 5 * GM_MULT, 10 * GM_MULT, 4 * GM_MULT
    };

-    unsigned int prev_costs[GM_NUM_MODES];
-    int i, j, k;
-    int byte_count = 0;
-    int numeral_end = -1, numeral_cost;
-    int cur_mode_index;
-    unsigned int min_cost;
+    return eod_costs[k];
+}

-    /* char_modes[i][j] represents the mode to encode the code point at index i such that the final segment ends in mode_types[j] and the
-     * total number of bits is minimized over all possible choices */
-#ifndef _MSC_VER
-    char char_modes[length][GM_NUM_MODES];
-#else
-    char** char_modes = (char**) _alloca(length * GM_NUM_MODES);
-#endif
-    memset(char_modes, 0, length * GM_NUM_MODES);
+/* Calculate cost of encoding current character */
+static void gm_cur_cost(unsigned int state[], const unsigned int gbdata[], const size_t length, const int i, char* char_modes, unsigned int prev_costs[], unsigned int cur_costs[]) {
+    int cm_row = i * GM_NUM_MODES;
+    int double_byte, space, numeric, lower, upper, control, double_digit, eol;
+    unsigned int* p_numeral_end = &state[0];
+    unsigned int* p_numeral_cost = &state[1];
+    unsigned int* p_byte_count = &state[2];

-    /* At the beginning of each iteration of the loop below, prev_costs[j] is the minimum number of 1/6 (1/GM_MULT) bits needed
-     * to encode the entire string prefix of length i, and end in mode_types[j] */
-    memcpy(prev_costs, head_costs, sizeof(head_costs));
+    double_byte = gbdata[i] > 0xFF;
+    space = gbdata[i] == ' ';
+    numeric = gbdata[i] >= '0' && gbdata[i] <= '9';
+    lower = gbdata[i] >= 'a' && gbdata[i] <= 'z';
+    upper = gbdata[i] >= 'A' && gbdata[i] <= 'Z';
+    control = !space && !numeric && !lower && !upper && gbdata[i] < 0x7F; /* Exclude DEL */
+    double_digit = i < length - 1 && numeric && gbdata[i + 1] >= '0' && gbdata[i + 1] <= '9';
+    eol = i < length - 1 && gbdata[i] == 13 && gbdata[i + 1] == 10;

-    /* Calculate costs using dynamic programming */
-    for (i = 0; i < length; i++) {
-        int double_byte, space, numeric, lower, upper, control, double_digit, eol;
-        unsigned int cur_costs[GM_NUM_MODES] = { 0, 0, 0, 0, 0, 0 };
+    /* Hanzi mode can encode anything */
+    cur_costs[GM_H] = prev_costs[GM_H] + (double_digit || eol ? 39 : 78); /* (6.5 : 13) * GM_MULT */
+    char_modes[cm_row + GM_H] = GM_CHINESE;

-        double_byte = gbdata[i] > 0xFF;
-        space = gbdata[i] == ' ';
-        numeric = gbdata[i] >= '0' && gbdata[i] <= '9';
-        lower = gbdata[i] >= 'a' && gbdata[i] <= 'z';
-        upper = gbdata[i] >= 'A' && gbdata[i] <= 'Z';
-        control = !space && !numeric && !lower && !upper && gbdata[i] < 0x7F; /* Exclude DEL */
-        double_digit = i < length - 1 && numeric && gbdata[i + 1] >= '0' && gbdata[i + 1] <= '9';
-        eol = i < length - 1 && gbdata[i] == 13 && gbdata[i + 1] == 10;
-
-        /* Hanzi mode can encode anything */
-        cur_costs[GM_H] = prev_costs[GM_H] + (double_digit || eol ? 39 : 78); /* (6.5 : 13) * GM_MULT */
-        char_modes[i][GM_H] = 'H';
-
-        /* Byte mode can encode anything */
-        if (byte_count == 512 || (double_byte && byte_count == 511)) {
-            cur_costs[GM_B] = head_costs[GM_B];
-            if (double_byte && byte_count == 511) {
-                cur_costs[GM_B] += 48; /* 8 * GM_MULT */
-                double_byte = 0; /* Splitting double-byte so mark as single */
-            }
-            byte_count = 0;
+    /* Byte mode can encode anything */
+    if (*p_byte_count == 512 || (double_byte && *p_byte_count == 511)) {
+        cur_costs[GM_B] = head_costs[GM_B];
+        if (double_byte && *p_byte_count == 511) {
+            cur_costs[GM_B] += 48; /* 8 * GM_MULT */
+            double_byte = 0; /* Splitting double-byte so mark as single */
        }
-        cur_costs[GM_B] += prev_costs[GM_B] + (double_byte ? 96 : 48); /* (16 : 8) * GM_MULT */
-        char_modes[i][GM_B] = 'B';
-        byte_count += double_byte ? 2 : 1;
+        *p_byte_count = 0;
+    }
+    cur_costs[GM_B] += prev_costs[GM_B] + (double_byte ? 96 : 48); /* (16 : 8) * GM_MULT */
+    char_modes[cm_row + GM_B] = GM_BYTE;
+    *p_byte_count += double_byte ? 2 : 1;

-        if (numeral_lat(gbdata, length, i, &numeral_end, &numeral_cost)) {
-            cur_costs[GM_N] = prev_costs[GM_N] + numeral_cost;
-            char_modes[i][GM_N] = 'N';
-        }
-
-        if (control) {
-            cur_costs[GM_L] = prev_costs[GM_L] + 78; /* (7 + 6) * GM_MULT */
-            char_modes[i][GM_L] = 'L';
-            cur_costs[GM_U] = prev_costs[GM_U] + 78; /* (7 + 6) * GM_MULT */
-            char_modes[i][GM_U] = 'U';
-            cur_costs[GM_M] = prev_costs[GM_M] + 96; /* (10 + 6) * GM_MULT */
-            char_modes[i][GM_M] = 'M';
-        } else {
-            if (lower || space) {
-                cur_costs[GM_L] = prev_costs[GM_L] + 30; /* 5 * GM_MULT */
-                char_modes[i][GM_L] = 'L';
-            }
-            if (upper || space) {
-                cur_costs[GM_U] = prev_costs[GM_U] + 30; /* 5 * GM_MULT */
-                char_modes[i][GM_U] = 'U';
-            }
-            if (numeric || lower || upper || space) {
-                cur_costs[GM_M] = prev_costs[GM_M] + 36; /* 6 * GM_MULT */
-                char_modes[i][GM_M] = 'M';
-            }
-        }
-
-        if (i == length - 1) { /* Add end of data costs if last character */
-            for (j = 0; j < GM_NUM_MODES; j++) {
-                if (char_modes[i][j]) {
-                    cur_costs[j] += eod_costs[j];
-                }
-            }
-        }
-
-        /* Start new segment at the end to switch modes */
-        for (j = 0; j < GM_NUM_MODES; j++) { /* To mode */
-            for (k = 0; k < GM_NUM_MODES; k++) { /* From mode */
-                if (j != k && char_modes[i][k]) {
-                    unsigned int new_cost = cur_costs[k] + switch_costs[k][j];
-                    if (!char_modes[i][j] || new_cost < cur_costs[j]) {
-                        cur_costs[j] = new_cost;
-                        char_modes[i][j] = mode_types[k];
-                    }
-                }
-            }
-        }
-
-        memcpy(prev_costs, cur_costs, sizeof(cur_costs));
+    if (in_numeral(gbdata, length, i, p_numeral_end, p_numeral_cost)) {
+        cur_costs[GM_N] = prev_costs[GM_N] + *p_numeral_cost;
+        char_modes[cm_row + GM_N] = GM_NUMBER;
    }

-    /* Find optimal ending mode */
-    cur_mode_index = 0;
-    min_cost = prev_costs[0];
-    for (i = 1; i < GM_NUM_MODES; i++) {
-        if (prev_costs[i] < min_cost) {
-            min_cost = prev_costs[i];
-            cur_mode_index = i;
+    if (control) {
+        cur_costs[GM_L] = prev_costs[GM_L] + 78; /* (7 + 6) * GM_MULT */
+        char_modes[cm_row + GM_L] = GM_LOWER;
+        cur_costs[GM_U] = prev_costs[GM_U] + 78; /* (7 + 6) * GM_MULT */
+        char_modes[cm_row + GM_U] = GM_UPPER;
+        cur_costs[GM_M] = prev_costs[GM_M] + 96; /* (10 + 6) * GM_MULT */
+        char_modes[cm_row + GM_M] = GM_MIXED;
+    } else {
+        if (lower || space) {
+            cur_costs[GM_L] = prev_costs[GM_L] + 30; /* 5 * GM_MULT */
+            char_modes[cm_row + GM_L] = GM_LOWER;
+        }
+        if (upper || space) {
+            cur_costs[GM_U] = prev_costs[GM_U] + 30; /* 5 * GM_MULT */
+            char_modes[cm_row + GM_U] = GM_UPPER;
+        }
+        if (numeric || lower || upper || space) {
+            cur_costs[GM_M] = prev_costs[GM_M] + 36; /* 6 * GM_MULT */
+            char_modes[cm_row + GM_M] = GM_MIXED;
        }
    }
+}

-    /* Get optimal mode for each code point by tracing backwards */
-    for (i = length - 1; i >= 0; i--) {
-        char cur_mode = char_modes[i][cur_mode_index];
-        cur_mode_index = strchr(mode_types, cur_mode) - mode_types;
-        mode[i] = cur_mode;
-    }
+/* Calculate optimized encoding modes */
+static void define_mode(char* mode, const unsigned int gbdata[], const size_t length, const int debug) {
+    static char mode_types[] = { GM_CHINESE, GM_NUMBER, GM_LOWER, GM_UPPER, GM_MIXED, GM_BYTE }; /* Must be in same order as GM_H etc */
+    unsigned int state[3] = { 0 /*numeral_end*/, 0 /*numeral_cost*/, 0 /*byte_count*/ };

-    if (debug & ZINT_DEBUG_PRINT) {
-        printf("  Mode: %.*s\n", (int)length, mode);
-    }
+    pn_define_mode(mode, gbdata, length, debug, state, mode_types, GM_NUM_MODES, gm_head_costs, gm_switch_cost, gm_eod_cost, gm_cur_cost);
 }

 /* Add the length indicator for byte encoded blocks */
@ -352,7 +295,7 @@ static int gm_encode(unsigned int gbdata[], const size_t length, char binary[],
        }
    }

-    define_mode(gbdata, length, mode, debug);
+    define_mode(mode, gbdata, length, debug);

    do {
        int next_mode = mode[sp];
@ -774,14 +717,11 @@ static int gm_encode(unsigned int gbdata[], const size_t length, char binary[],
 }

 static void gm_test_codeword_dump(struct zint_symbol *symbol, int* codewords, int length) {
-    int i, max, cnt_len;
-    if (length >= 33) {
+    int i, max = length, cnt_len = 0;
+    if (length > 33) {
        sprintf(symbol->errtxt, "(%d) ", length); /* Place the number of codewords at the front */
        cnt_len = strlen(symbol->errtxt);
        max = 33 - (cnt_len + 2) / 3;
-    } else {
-        max = length > 33 ? 33 : length;
-        cnt_len = 0;
    }
    for (i = 0; i < max; i++) { /* 33*3 < errtxt 100 chars */
        sprintf(symbol->errtxt + cnt_len + i * 3, "%02X ", codewords[i]);
--- a/backend/hanxin.c
+++ b/backend/hanxin.c
--- a/backend/reedsol.c
+++ b/backend/reedsol.c
@ -51,6 +51,8 @@
 #include <stdio.h>		// only needed for debug (main)
 #ifdef _MSC_VER
 #include <malloc.h>
+#else
+#include <stdlib.h>
 #endif
 #include "reedsol.h"
 static int logmod; // 2**symsize - 1
--- a/backend/sjis.c
+++ b/backend/sjis.c
@ -62,32 +62,20 @@ extern int utf_to_eci(const int eci, const unsigned char source[], unsigned char
 * JISX0201.1976-0 (libiconv-1.16/lib/jisx0201.h)
 */

-static int jisx0201_wctomb(unsigned char* r, unsigned int wc, size_t n) {
+static int jisx0201_wctomb(unsigned int* r, unsigned int wc) {
    if (wc < 0x0080 && !(wc == 0x005c || wc == 0x007e)) {
-        if (n < 1) {
-            return -1;
-        }
        *r = wc;
        return 1;
    }
    if (wc == 0x00a5) {
-        if (n < 1) {
-            return -1;
-        }
        *r = 0x5c;
        return 1;
    }
    if (wc == 0x203e) {
-        if (n < 1) {
-            return -1;
-        }
        *r = 0x7e;
        return 1;
    }
    if (wc >= 0xff61 && wc < 0xffa0) {
-        if (n < 1) {
-            return -1;
-        }
        *r = wc - 0xfec0;
        return 1;
    }
@ -1453,7 +1441,7 @@ static const Summary16 jisx0208_uni2indx_pageff[15] = {
  { 6877, 0x0000 }, { 6877, 0x0000 }, { 6877, 0x0028 },
 };

-static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
+static int jisx0208_wctomb(unsigned int* r, unsigned int wc) {
    const Summary16 *summary = NULL;
    if (wc >= 0x0000 && wc < 0x0100) {
        summary = &jisx0208_uni2indx_page00[(wc>>4)];
@ -1474,10 +1462,6 @@ static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
        unsigned short used = summary->used;
        unsigned int i = wc & 0x0f;
        if (used & ((unsigned short) 1 << i)) {
-            unsigned short c;
-            if (n < 2) {
-                return -1;
-            }
            /* Keep in 'used' only the bits 0..i-1. */
            used &= ((unsigned short) 1 << i) - 1;
            /* Add 'summary->indx' and the number of bits set in 'used'. */
@ -1485,8 +1469,7 @@ static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
            used = (used & 0x3333) + ((used & 0xcccc) >> 2);
            used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);
            used = (used & 0x00ff) + (used >> 8);
-            c = jisx0208_2charset[summary->indx + used];
-            r[0] = (c >> 8); r[1] = (c & 0xff);
+            *r = jisx0208_2charset[summary->indx + used];
            return 2;
        }
    }
@ -1497,19 +1480,19 @@ static int jisx0208_wctomb(unsigned char* r, unsigned int wc, size_t n) {
 * SHIFT_JIS (libiconv-1.16/lib/sjis.h)
 */

-/* Returns 1 or 2 on success, 0 if no mapping, -1 if buffer too small */
-int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n) {
+/* Returns 1 or 2 on success, 0 if no mapping */
+int sjis_wctomb_zint(unsigned int* r, unsigned int wc) {
    int ret;

    /* Try JIS X 0201-1976. */
-    ret = jisx0201_wctomb(r, wc, n);
+    ret = jisx0201_wctomb(r, wc);
    if (ret) {
        return ret;
    }

    /* Try JIS X 0208-1990. */
    /* ZINT: Note leaving mapping of full-width reverse solidus U+FF3C to 0x815F (duplicate of patched U+005C) to avoid having to regen tables */
-    ret = jisx0208_wctomb(r, wc, n);
+    ret = jisx0208_wctomb(r, wc);
    if (ret) {
        return ret;
    }
@ -1519,13 +1502,9 @@ int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n) {
    /* ZINT: https://file.allitebooks.com/20160708/CJKV%20Information%20Processing.pdf (table 4-86, p. 286, 2nd ed.) */
    if (wc >= 0xe000 && wc < 0xe758) {
        unsigned char c1, c2;
-        if (n < 2) {
-            return -1;
-        }
        c1 = (unsigned int) (wc - 0xe000) / 188;
        c2 = (unsigned int) (wc - 0xe000) % 188;
-        r[0] = c1 + 0xf0;
-        r[1] = c2 < 0x3f ? c2 + 0x40 : c2 + 0x41;
+        *r = ((c1 + 0xf0) << 8) | (c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
        return 2;
    }

@ -1534,31 +1513,24 @@ int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n) {

 /* Convert UTF-8 string to Shift JIS and place in array of ints */
 int sjis_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* jisdata) {
-    int i, error_number, ret;
+    int i, error_number;
    unsigned int length;
-    unsigned char buf[2];
 #ifndef _MSC_VER
-    int utfdata[*p_length + 1]; /* Leave signed for the moment until `utf8toutf16()` signature changed */
+    unsigned int utfdata[*p_length + 1];
 #else
-    int* utfdata = (int*) _alloca((*p_length + 1) * sizeof(int));
+    unsigned int* utfdata = (unsigned int*) _alloca((*p_length + 1) * sizeof(unsigned int));
 #endif

-    error_number = utf8toutf16(symbol, source, utfdata, p_length);
+    error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
    if (error_number != 0) {
        return error_number;
    }

    for (i = 0, length = *p_length; i < length; i++) {
-        ret = sjis_wctomb_zint(buf, utfdata[i], 2);
-        if (ret <= 0) {
+        if (!sjis_wctomb_zint(jisdata + i, utfdata[i])) {
            strcpy(symbol->errtxt, "800: Invalid character in input data");
            return ZINT_ERROR_INVALID_DATA;
        }
-        if (ret == 1) {
-            jisdata[i] = buf[0];
-        } else {
-            jisdata[i] = (buf[0] << 8) | buf[1];
-        }
    }

    return 0;
--- a/backend/sjis.h
+++ b/backend/sjis.h
@ -37,7 +37,7 @@
 extern "C" {
 #endif /* __cplusplus */

-int sjis_wctomb_zint(unsigned char* r, unsigned int wc, size_t n);
+int sjis_wctomb_zint(unsigned int* r, unsigned int wc);
 int sjis_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* jisdata);
 int sjis_utf8tosb(int eci, const unsigned char source[], size_t* p_length, unsigned int* jisdata);
 void sjis_cpy(const unsigned char source[], size_t* p_length, unsigned int* jisdata);
--- a/backend/tests/CMakeLists.txt
+++ b/backend/tests/CMakeLists.txt
@ -51,9 +51,11 @@ zint_add_test(common, test_common)
 zint_add_test(composite, test_composite)
 zint_add_test(dmatrix, test_dmatrix)
 zint_add_test(eci, test_eci)
+zint_add_test(gb18030, test_gb18030)
 zint_add_test(gb2312, test_gb2312)
 zint_add_test(gridmtx, test_gridmtx)
 zint_add_test(gs1, test_gs1)
+zint_add_test(hanxin, test_hanxin)
 zint_add_test(imail, test_imail)
 zint_add_test(library, test_library)
 zint_add_test(mailmark, test_mailmark)
--- a/backend/tests/test_common.c
+++ b/backend/tests/test_common.c
@ -31,7 +31,7 @@

 #include "testcommon.h"

-static void test_utf8toutf16(void)
+static void test_utf8_to_unicode(void)
 {
    testStart("");

@ -39,6 +39,7 @@ static void test_utf8toutf16(void)
    struct item {
        unsigned char* data;
        int length;
+        int disallow_4byte;
        int ret;
        size_t ret_length;
        int expected_vals[20];
@ -46,11 +47,11 @@ static void test_utf8toutf16(void)
    };
    // s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
    struct item data[] = {
-        /*  0*/ { "", -1, 0, 0, {}, "" },
-        /*  1*/ { "\000a\302\200\340\240\200", 7, 0, 4, { 0, 'a', 0x80, 0x800 }, "NUL a C280 E0A080" },
-        /*  2*/ { "\357\277\277", -1, 0, 1, { 0xFFFF }, "EFBFBF" },
-        /*  3*/ { "\360\220\200\200", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Four-byte F0908080" },
-        /*  4*/ { "a\200b", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Orphan continuation 0x80" },
+        /*  0*/ { "", -1, 1, 0, 0, {}, "" },
+        /*  1*/ { "\000a\302\200\340\240\200", 7, 1, 0, 4, { 0, 'a', 0x80, 0x800 }, "NUL a C280 E0A080" },
+        /*  2*/ { "\357\277\277", -1, 1, 0, 1, { 0xFFFF }, "EFBFBF" },
+        /*  3*/ { "\360\220\200\200", -1, 1, ZINT_ERROR_INVALID_DATA, -1, {}, "Four-byte F0908080" },
+        /*  4*/ { "a\200b", -1, 1, ZINT_ERROR_INVALID_DATA, -1, {}, "Orphan continuation 0x80" },
    };
    int data_size = sizeof(data) / sizeof(struct item);

@ -62,7 +63,7 @@ static void test_utf8toutf16(void)
        int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
        size_t ret_length = length;

-        ret = utf8toutf16(&symbol, data[i].data, vals, &ret_length);
+        ret = utf8_to_unicode(&symbol, data[i].data, vals, &ret_length, data[i].disallow_4byte);
        assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
        if (ret == 0) {
            assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %ld != %ld\n", i, ret_length, data[i].ret_length);
@ -77,7 +78,7 @@ static void test_utf8toutf16(void)

 int main()
 {
-    test_utf8toutf16();
+    test_utf8_to_unicode();

    testReport();

--- a/backend/tests/test_gb18030.c
+++ b/backend/tests/test_gb18030.c
@ -0,0 +1,317 @@
+/*
+    libzint - the open source barcode library
+    Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+    3. Neither the name of the project nor the names of its contributors
+       may be used to endorse or promote products derived from this software
+       without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+    SUCH DAMAGE.
+ */
+/* vim: set ts=4 sw=4 et : */
+
+#include "testcommon.h"
+#include "test_gb18030_tab.h"
+#include "../gb18030.h"
+
+// As control convert to GB 18030 using table generated from GB18030.TXT plus simple processing.
+// The version of GB18030.TXT is libiconv-1.11/GB18030.TXT taken from https://haible.de/bruno/charsets/conversion-tables/GB18030.html
+// The generated file backend/tests/test_gb18030_tab.h does not include U+10000..10FFFF codepoints to save space.
+// See also backend/tests/tools/data/GB18030.TXT.README and backend/tests/tools/gen_test_tab.php.
+static int gb18030_wctomb_zint2(unsigned int* r1, unsigned int* r2, unsigned int wc)
+{
+    unsigned int c;
+    // GB18030 two-byte extension (libiconv-1.16/lib/gb18030ext.h)
+    if (wc == 0x1E3F) { // GB 18030-2005 change, was PUA U+E7C7 below, see Table 3-39, p.111, Lunde 2nd ed.
+        *r1 = 0xA8BC;
+        return 2;
+    }
+    // GB18030 four-byte extension (libiconv-1.16/lib/gb18030uni.h)
+    if (wc == 0xE7C7) { // PUA
+        *r1 = 0x8135;
+        *r2 = 0xF437;
+        return 4;
+    }
+    // GB18030 two-byte extension (libiconv-1.16/lib/gb18030ext.h)
+    if (wc >= 0x9FB4 && wc <= 0x9FBB) { // GB 18030-2005 change, were PUA, see Table 3-37, p.108, Lunde 2nd ed.
+        if (wc == 0x9FB4) {
+            *r1 = 0xFE59;
+        } else if (wc == 0x9FB5) {
+            *r1 = 0xFE61;
+        } else if (wc == 0x9FB6 || wc == 0x9FB7) {
+            *r1 = 0xFE66 + (wc - 0x9FB6);
+        } else if (wc == 0x9FB8) {
+            *r1 = 0xFE6D;
+        } else if (wc == 0x9FB9) {
+            *r1 = 0xFE7E;
+        } else if (wc == 0x9FBA) {
+            *r1 = 0xFE90;
+        } else {
+            *r1 = 0xFEA0;
+        }
+        return 2;
+    }
+    // GB18030 two-byte extension (libiconv-1.16/lib/gb18030ext.h)
+    if (wc >= 0xFE10 && wc <= 0xFE19) { // GB 18030-2005 change, were PUA, see Table 3-37, p.108, Lunde 2nd ed.
+        if (wc == 0xFE10) {
+            *r1 = 0xA6D9;
+        } else if (wc == 0xFE11) {
+            *r1 = 0xA6DB;
+        } else if (wc == 0xFE12) {
+            *r1 = 0xA6DA;
+        } else if (wc >= 0xFE13 && wc <= 0xFE16) {
+            *r1 = 0xA6DC + (wc - 0xFE13);
+        } else if (wc == 0xFE17 || wc == 0xFE18) {
+            *r1 = 0xA6EC + (wc - 0xFE17);
+        } else {
+            *r1 = 0xA6F3;
+        }
+        return 2;
+    }
+    // GB18030 four-byte extension (libiconv-1.16/lib/gb18030uni.h)
+    if (wc >= 0xFE1A && wc <= 0xFE2F) { // These are Vertical Forms (U+FE1A..FE1F unassigned) and Combining Half Marks (U+FE20..FE2F)
+        if (wc >= 0xFE1A && wc <= 0xFE1D) {
+            c = 0x84318336 + (wc - 0xFE1A);
+        } else if (wc >= 0xFE1E && wc <= 0xFE27) {
+            c = 0x84318430 + (wc - 0xFE1E);
+        } else {
+            c = 0x84318530 + (wc - 0xFE28);
+        }
+        *r1 = c >> 16;
+        *r2 = c & 0xFFFF;
+        return 4;
+    }
+    // GB18030 (libiconv-1.16/lib/gb18030.h)
+    if (wc >= 0x10000 && wc < 0x10400) { // Code set 3 (Unicode U+10000..U+10FFFF)
+        c = wc - 0x10000;
+        *r1 = 0x9030;
+        *r2 = 0x8130 + (c % 10) + 0x100 * (c / 10);
+        return 4;
+    }
+    int tab_length = sizeof(test_gb18030_tab) / sizeof(unsigned int);
+    for (int i = test_gb18030_tab_ind[wc >> 12]; i < tab_length; i += 2) {
+        if (test_gb18030_tab[i + 1] == wc) {
+            c = test_gb18030_tab[i];
+            if (c <= 0xFFFF) {
+                *r1 = c;
+                return c <= 0xFF ? 1 : 2;
+            }
+            *r1 = c >> 16;
+            *r2 = c & 0xFFFF;
+            return 4;
+        }
+    }
+    return 0;
+}
+
+static void test_gb18030_wctomb_zint(void)
+{
+    testStart("");
+
+    int ret, ret2;
+    unsigned int val1_1, val1_2, val2_1, val2_2;
+
+    for (unsigned int i = 0; i < 0x10400; i++) { // Don't bother with U+10400..U+10FFFF, programmatically filled
+        if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
+            continue;
+        }
+        val1_1 = val1_2 = val2_1 = val2_2 = 0;
+        ret = gb18030_wctomb_zint(&val1_1, &val1_2, i);
+        ret2 = gb18030_wctomb_zint2(&val2_1, &val2_2, i);
+        assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val1_1 0x%04X, val2_1 0x%04X, val1_2 0x%04X, val2_2 0x%04X\n", i, i, ret, ret2, val1_1, val2_1, val1_2, val2_2);
+        if (ret2) {
+            assert_equal(val1_1, val2_1, "i:%d 0x%04X val1_1 0x%04X != val2_1 0x%04X\n", i, i, val1_1, val2_1);
+            assert_equal(val1_2, val2_2, "i:%d 0x%04X val1_2 0x%04X != val2_2 0x%04X\n", i, i, val1_2, val2_2);
+        }
+    }
+
+    testFinish();
+}
+
+static void test_gb18030_utf8tomb(void)
+{
+    testStart("");
+
+    int ret;
+    struct item {
+        unsigned char* data;
+        int length;
+        int ret;
+        size_t ret_length;
+        unsigned int expected_gbdata[20];
+        char* comment;
+    };
+    // é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA8A6, UTF-8 C3A9
+    // β U+03B2 in ISO 8859-7 Greek (but not other ISO 8859 or Win page), in GB 18030 0xA6C2, UTF-8 CEB2
+    // ¤ U+00A4 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA1E8, UTF-8 C2A4
+    // ¥ U+00A5 in ISO 8859-1 0xA5, in GB 18030 4-byte 0x81308436, UTF-8 C2A5
+    // ・ U+30FB katakana middle dot, not in any ISO or Win page, in GB 18030 0xA1A4, duplicate of mapping of U+00B7, UTF-8 E383BB
+    // · U+00B7 middle dot in ISO 8859-1 0xB7, in GB 18030 "GB 18030 subset" 0xA1A4, duplicate of mapping of U+30FB, UTF-8 C2B7
+    // ― U+2015 horizontal bar in ISO 8859-7 Greek and ISO 8859-10 Nordic, not in any Win page, in GB 18030 "GB18030.TXT" 0xA1AA, duplicate of mapping of U+2014, UTF-8 E28095
+    // — U+2014 em dash, not in any ISO, in Win 1250 and other Win, in GB 18030 "GB 18030 subset" 0xA1AA, duplicate of mapping of U+2015, UTF-8 E28094
+    // s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
+    struct item data[] = {
+        /*  0*/ { "é", -1, 0, 1, { 0xA8A6 }, "" },
+        /*  1*/ { "β", -1, 0, 1, { 0xA6C2 }, "" },
+        /*  2*/ { "¤", -1, 0, 1, { 0xA1E8 }, "" },
+        /*  3*/ { "¥", -1, 0, 2, { 0x8130, 0x8436 }, "0x81308436" },
+        /*  4*/ { "・", -1, 0, 2, { 0x8139, 0xA739 }, "" },
+        /*  5*/ { "·", -1, 0, 1, { 0xA1A4 }, "GB 18030 subset mapping" },
+        /*  6*/ { "―", -1, 0, 1, { 0xA844 }, "GB18030.TXT mapping" },
+        /*  7*/ { "—", -1, 0, 1, { 0xA1AA }, "GB 18030 subset mapping" },
+        /*  8*/ { "aβc・·—é—Z", -1, 0, 10, { 'a', 0xA6C2, 'c', 0x8139, 0xA739, 0xA1A4, 0xA1AA, 0xA8A6, 0xA1AA, 'Z' }, "" },
+    };
+
+    int data_size = sizeof(data) / sizeof(struct item);
+
+    struct zint_symbol symbol;
+    unsigned int gbdata[20];
+
+    for (int i = 0; i < data_size; i++) {
+
+        int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
+        size_t ret_length = length;
+
+        ret = gb18030_utf8tomb(&symbol, data[i].data, &ret_length, gbdata);
+        assert_equal(ret, data[i].ret, "i:%d ret %d != %d (%s)\n", i, ret, data[i].ret, symbol.errtxt);
+        if (ret == 0) {
+            assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
+            for (int j = 0; j < ret_length; j++) {
+                assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] 0x%04X != 0x%04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
+            }
+        }
+    }
+
+    testFinish();
+}
+
+static void test_gb18030_utf8tosb(void)
+{
+    testStart("");
+
+    int ret;
+    struct item {
+        int eci;
+        unsigned char* data;
+        int length;
+        int ret;
+        size_t ret_length;
+        unsigned int expected_gbdata[20];
+        char* comment;
+    };
+    // é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win, in HANXIN Chinese mode first byte range 0x81..FE
+    // β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page)
+    // ¥ U+00A5 in ISO 8859-1 0xA5, in first byte range 0x81..FE
+    // ÿ U+00FF in ISO 8859-1 0xFF, outside first byte and second/third/fourth byte ranges
+    // @ U+0040 in ASCII 0x40, outside first byte range, in double-byte second byte range, outside quad-byte second/third/fourth byte ranges
+    // 9 U+0039 in ASCII 0x39, outside first byte range, outside double-byte second byte range and quad-byte third byte range, in quad-byte second/fourth byte ranges
+    // s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
+    struct item data[] = {
+        /*  0*/ { 3, "é", -1, 0, 1, { 0xE9 }, "First byte in range but only one byte" },
+        /*  1*/ { 3, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {}, "Not in ECI 3 (ISO 8859-1)" },
+        /*  2*/ { 9, "β", -1, 0, 1, { 0xE2 }, "In ECI 9 (ISO 8859-7)" },
+        /*  3*/ { 3, "¥", -1, 0, 1, { 0xA5 }, "First byte in range but only one byte" },
+        /*  4*/ { 3, "¥é", -1, 0, 1, { 0xA5E9 }, "In double-byte range" },
+        /*  5*/ { 3, "¥ÿ", -1, 0, 2, { 0xA5, 0xFF }, "First byte in range but not second" },
+        /*  6*/ { 3, "¥9é9", -1, 0, 2, { 0xA539, 0xE939 }, "In quad-byte range" },
+        /*  7*/ { 3, "¥9", -1, 0, 2, { 0xA5, 0x39 }, "In quad-byte first/second range but only 2 bytes, not in double-byte range" },
+        /*  8*/ { 3, "¥9é", -1, 0, 3, { 0xA5, 0x39, 0xE9 }, "In quad-byte first/second/third range but only 3 bytes, no bytes in double-byte range" },
+        /*  9*/ { 3, "¥9é@", -1, 0, 3, { 0xA5, 0x39, 0xE940 }, "In quad-byte first/second/third range but not fourth, second 2 bytes in double-byte range" },
+        /* 10*/ { 3, "¥@é9", -1, 0, 3, { 0xA540, 0xE9, 0x39 }, "In quad-byte first/third/fourth range but not second, first 2 bytes in double-byte range" },
+        /* 11*/ { 3, "¥9@9", -1, 0, 4, { 0xA5, 0x39, 0x40, 0x39 }, "In quad-byte first/second/fourth range but not third, no bytes in double-byte range" },
+        /* 12*/ { 3, "é9éé¥9é@¥9é9¥9é0é@@¥¥é0é1", -1, 0, 15, { 0xE9, 0x39, 0xE9E9, 0xA5, 0x39, 0xE940, 0xA539, 0xE939, 0xA539, 0xE930, 0xE940, 0x40, 0xA5A5, 0xE930, 0xE931 }, "" },
+    };
+
+    int data_size = sizeof(data) / sizeof(struct item);
+
+    unsigned int gbdata[20];
+
+    for (int i = 0; i < data_size; i++) {
+
+        int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
+        size_t ret_length = length;
+
+        ret = gb18030_utf8tosb(data[i].eci, data[i].data, &ret_length, gbdata);
+        assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
+        if (ret == 0) {
+            assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
+            for (int j = 0; j < ret_length; j++) {
+                assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
+            }
+        }
+    }
+
+    testFinish();
+}
+
+static void test_gb18030_cpy(void)
+{
+    testStart("");
+
+    int ret;
+    struct item {
+        unsigned char* data;
+        int length;
+        int ret;
+        size_t ret_length;
+        unsigned int expected_jisdata[20];
+        char* comment;
+    };
+    // s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
+    struct item data[] = {
+        /*  0*/ { "\351", -1, 0, 1, { 0xE9 }, "In HANXIN Chinese mode first-byte range but only one byte" },
+        /*  1*/ { "\351\241", -1, 0, 1, { 0xE9A1 }, "In HANXIN Chinese range" },
+        /*  2*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
+        /*  3*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
+        /*  4*/ { "\241\240\241\376\367\376\367\377\2012\2013", -1, 0, 7, { 0xA1A0, 0xA1FE, 0xF7FE, 0xF7, 0xFF, 0x8132, 0x8133 }, "" },
+    };
+
+    int data_size = sizeof(data) / sizeof(struct item);
+
+    unsigned int jisdata[40];
+
+    for (int i = 0; i < data_size; i++) {
+
+        int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
+        size_t ret_length = length;
+
+        gb18030_cpy(data[i].data, &ret_length, jisdata);
+        assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %zu != %zu\n", i, ret_length, data[i].ret_length);
+        for (int j = 0; j < ret_length; j++) {
+            assert_equal(jisdata[j], data[i].expected_jisdata[j], "i:%d jisdata[%d] %04X != %04X\n", i, j, jisdata[j], data[i].expected_jisdata[j]);
+        }
+    }
+
+    testFinish();
+}
+
+int main()
+{
+    test_gb18030_wctomb_zint();
+    test_gb18030_utf8tomb();
+    test_gb18030_utf8tosb();
+    test_gb18030_cpy();
+
+    testReport();
+
+    return 0;
+}
--- a/backend/tests/test_gb18030_tab.h
+++ b/backend/tests/test_gb18030_tab.h
--- a/backend/tests/test_gb2312.c
+++ b/backend/tests/test_gb2312.c
@ -35,18 +35,16 @@

 // As control convert to GB 2312 using simple table generated from unicode.org GB2312.TXT plus simple processing
 // GB2312.TXT no longer on unicode.org site but available from https://haible.de/bruno/charsets/conversion-tables/GB2312.html
-static int gb2312_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
+static int gb2312_wctomb_zint2(unsigned int* r, unsigned int wc)
 {
    // Shortcut
    if ((wc > 0x0451 && wc < 0x2015) || (wc > 0x3229 && wc < 0x4E00) || (wc > 0x9FA0 && wc < 0xFF01) || wc > 0xFFE5) {
        return 0;
    }
-    int tab_length = sizeof(test_gb2312_tab) / sizeof(unsigned short);
-    for (int i = 0; i < tab_length; i += 2) {
+    int tab_length = sizeof(test_gb2312_tab) / sizeof(unsigned int);
+    for (int i = test_gb2312_tab_ind[wc >> 12]; i < tab_length; i += 2) {
        if (test_gb2312_tab[i + 1] == wc) {
-            unsigned short c = test_gb2312_tab[i] + 0x8080; // Table in GB 2312 not EUC-CN
-            r[0] = (c >> 8);
-            r[1] = c & 0xFF;
+            *r = test_gb2312_tab[i] + 0x8080; // Table in GB 2312 not EUC-CN
            return 2;
        }
    }
@ -68,11 +66,9 @@ static void test_gb2312_wctomb_zint(void)
        if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
            continue;
        }
-        buf[0] = buf[1] = buf2[0] = buf2[1] = 0;
-        ret = gb2312_wctomb_zint(buf, i, 2);
-        val = ret == 1 ? buf[0] : (buf[0] << 8) | buf[1];
-        ret2 = gb2312_wctomb_zint2(buf2, i, 2);
-        val2 = ret2 == 1 ? buf2[0] : (buf2[0] << 8) | buf2[1];
+        val = val2 = 0;
+        ret = gb2312_wctomb_zint(&val, i);
+        ret2 = gb2312_wctomb_zint2(&val2, i);
        if (i == 0xB7) { // Extra mapping middle dot U+00B7 to 0xA1A4, duplicate of U+30FB (Katakana middle dot)
            assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val 0x%04X\n", i, i, ret, val);
            assert_equal(val, 0xA1A4, "i:%d 0x%04X val 0x%04X != 0xA1A4\n", i, i, val);
@ -227,9 +223,9 @@ static void test_gb2312_cpy(void)
    struct item data[] = {
        /*  0*/ { "\351", -1, 0, 1, { 0xE9 }, "In GRIDMATRIX Chinese mode first-byte range but only one byte" },
        /*  1*/ { "\351\241", -1, 0, 1, { 0xE9A1 }, "In GRIDMATRIX Chinese range" },
-        /*  0*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
-        /*  0*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
-        /*  0*/ { "\241\240\241\376\367\376\367\377", -1, 0, 6, { 0xA1, 0xA0, 0xA1FE, 0xF7FE, 0xF7, 0xFF }, "" },
+        /*  2*/ { "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
+        /*  3*/ { "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
+        /*  4*/ { "\241\240\241\376\367\376\367\377", -1, 0, 6, { 0xA1, 0xA0, 0xA1FE, 0xF7FE, 0xF7, 0xFF }, "" },
    };

    int data_size = sizeof(data) / sizeof(struct item);
--- a/backend/tests/test_gb2312_tab.h
+++ b/backend/tests/test_gb2312_tab.h
@ -1,5 +1,5 @@
 /* Generated by gen_test_tab.php from GB2312.TXT */
-static const unsigned short test_gb2312_tab[] = {
+static const unsigned int test_gb2312_tab[] = {
    0x2168, 0x00A4,
    0x216C, 0x00A7,
    0x2127, 0x00A8,
@ -7446,3 +7446,22 @@ static const unsigned short test_gb2312_tab[] = {
    0x237E, 0xFFE3,
    0x2324, 0xFFE5,
 };
+
+static const unsigned int test_gb2312_tab_ind[] = {
+0,
+298,
+298,
+694,
+1168,
+1708,
+4686,
+7508,
+9962,
+12638,
+14694,
+14694,
+14694,
+14694,
+14694,
+14694,
+};
--- a/backend/tests/test_gridmtx.c
+++ b/backend/tests/test_gridmtx.c
@ -180,7 +180,7 @@ static void test_input(void)

        #ifdef TEST_INPUT_GENERATE_EXPECTED
        printf("        /*%3d*/ { %s, %d, \"%s\", %s, %d, \"%s\", \"%s\" },\n",
-                i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
+                i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
                ret < 5 ? symbol->eci : -1, symbol->errtxt, data[i].comment);
        #else
        if (ret < 5) {
--- a/backend/tests/test_hanxin.c
+++ b/backend/tests/test_hanxin.c
@ -0,0 +1,409 @@
+/*
+    libzint - the open source barcode library
+    Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+    3. Neither the name of the project nor the names of its contributors
+       may be used to endorse or promote products derived from this software
+       without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+    SUCH DAMAGE.
+ */
+/* vim: set ts=4 sw=4 et : */
+
+#include "testcommon.h"
+
+#define TEST_INPUT_GENERATE_EXPECTED 1
+//#define TEST_ENCODE_GENERATE_EXPECTED 1
+
+static void test_options(void)
+{
+    testStart("");
+
+    int ret;
+    struct item {
+        unsigned char* data;
+        int option_1;
+        int option_2;
+        int ret_encode;
+        int ret_vector;
+        int expected_size;
+    };
+    // s/\/\*[ 0-9]*\*\//\=printf("\/*%3d*\/", line(".") - line("'<"))
+    struct item data[] = {
+        /*  0*/ { "12345", -1, -1, 0, 0, 23 }, // Default version 1, ECC auto-set to 4
+        /*  1*/ { "12345", 1, -1, 0, 0, 23 },
+        /*  2*/ { "12345", -1, 2, 0, 0, 25 },
+        /*  3*/ { "12345", -1, 85, 0, 0, 23 }, // Version > max version 85 so ignored
+        /*  4*/ { "12345", -1, 84, 0, 0, 189 },
+        /*  5*/ { "1234567890123456789012345678901234567890123", 1, 1, ZINT_ERROR_TOO_LONG, -1, -1 },
+        /*  6*/ { "1234567890123456", 4, 1, ZINT_ERROR_TOO_LONG, -1, -1 },
+        /*  7*/ { "12345678901234567", 4, 2, 0, 0, 25 },
+        /*  8*/ { "12345678901234567", 4, -1, 0, 0, 25 }, // Version auto-set to 2
+        /*  9*/ { "12345678901234567", -1, -1, 0, 0, 23 }, // Version auto-set to 1, ECC auto-set to 3
+        /* 10*/ { "12345678901234567", 5, -1, 0, 0, 23 }, // ECC > max ECC 4 so ignored and auto-settings version 1, ECC 3 used
+        /* 11*/ { "1234567890123456789012345678901234567890123", -1, -1, 0, 0, 25 }, // Version auto-set to 2, ECC auto-set to 2
+    };
+    int data_size = sizeof(data) / sizeof(struct item);
+
+    for (int i = 0; i < data_size; i++) {
+
+        struct zint_symbol* symbol = ZBarcode_Create();
+        assert_nonnull(symbol, "Symbol not created\n");
+
+        symbol->symbology = BARCODE_HANXIN;
+        if (data[i].option_1 != -1) {
+            symbol->option_1 = data[i].option_1;
+        }
+        if (data[i].option_2 != -1) {
+            symbol->option_2 = data[i].option_2;
+        }
+
+        int length = strlen(data[i].data);
+
+        ret = ZBarcode_Encode(symbol, data[i].data, length);
+        assert_equal(ret, data[i].ret_encode, "i:%d ZBarcode_Encode ret %d != %d (%s)\n", i, ret, data[i].ret_encode, symbol->errtxt);
+
+        if (data[i].ret_vector != -1) {
+            ret = ZBarcode_Buffer_Vector(symbol, 0);
+            assert_equal(ret, data[i].ret_vector, "i:%d ZBarcode_Buffer_Vector ret %d != %d\n", i, ret, data[i].ret_vector);
+            assert_equal(symbol->width, data[i].expected_size, "i:%d symbol->width %d != %d\n", i, symbol->width, data[i].expected_size);
+            assert_equal(symbol->rows, data[i].expected_size, "i:%d symbol->rows %d != %d\n", i, symbol->rows, data[i].expected_size);
+        }
+
+        ZBarcode_Delete(symbol);
+    }
+
+    testFinish();
+}
+
+static void test_input(void)
+{
+    testStart("");
+
+    int ret;
+    struct item {
+        int input_mode;
+        int eci;
+        unsigned char* data;
+        int length;
+        int ret;
+        int expected_eci;
+        char* expected;
+        char* comment;
+    };
+    // é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA8A6, UTF-8 C3A9
+    // β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page), in GB 18030 0xA6C2, UTF-8 CEB2
+    // ÿ U+00FF in ISO 8859-1 0xFF, not in GB 18030, outside first byte and second byte range, UTF-8 C3BF
+    // 啊 U+554A GB 18030 Region One 0xB0A1, UTF-8 E5958A
+    // 亍 U+4E8D GB 18030 Region Two 0xD8A1, UTF-8 E4BA8D
+    // 齄 U+9F44 GB 18030 Region Two 0xF7FE, UTF-8 E9BD84
+    // 丂 U+4E02 GB 18030 2-byte Region 0x8140, UTF-8 E4B882
+    // PAD U+0080 GB 18030 4-byte Region 0x81308130, UTF-8 C280 (\302\200)
+    // <20> (REPLACEMENT CHARACTER) U+FFFD GB 18030 4-byte Region 0x81308130, UTF-8 EFBFBD (\357\277\275)
+    struct item data[] = {
+        /*  0*/ { UNICODE_MODE, 0, "é", -1, 0, 0, "30 00 F4 80 00 00 00 00 00", "B1 (ISO 8859-1)" },
+        /*  1*/ { UNICODE_MODE, 3, "é", -1, 0, 3, "80 33 00 0F 48 00 00 00 00", "ECI-3 B1 (ISO 8859-1)" },
+        /*  2*/ { UNICODE_MODE, 29, "é", -1, 0, 29, "81 D4 FC FF FF 00 00 00 00", "ECI-29 H(1)1 (GB 18030) (Region One)" },
+        /*  3*/ { UNICODE_MODE, 26, "é", -1, 0, 26, "81 A4 70 2F FF 00 00 00 00", "ECI-26 H(1)1 (UTF-8) (Region One)" },
+        /*  4*/ { DATA_MODE, 0, "é", -1, 0, 0, "47 02 FF F0 00 00 00 00 00", "H(1)1 (UTF-8) (Region One)" },
+        /*  5*/ { DATA_MODE, 0, "\351", -1, 0, 0, "30 00 F4 80 00 00 00 00 00", "B1 (ISO 8859-1) (0xE9)" },
+        /*  6*/ { UNICODE_MODE, 0, "β", -1, 0, 0, "30 01 53 61 00 00 00 00 00", "B2 (GB 18030) (2-byte Region)" },
+        /*  7*/ { UNICODE_MODE, 9, "β", -1, 0, 9, "80 93 00 0F 10 00 00 00 00", "ECI-9 B1 (ISO 8859-7)" },
+        /*  8*/ { UNICODE_MODE, 29, "β", -1, 0, 29, "81 D3 00 15 36 10 00 00 00", "ECI-29 B2 (GB 18030) (2-byte Region)" },
+        /*  9*/ { UNICODE_MODE, 26, "β", -1, 0, 26, "81 A4 B1 5F FF 00 00 00 00", "ECI-26 H(1)1 (UTF-8) (Region One)" },
+        /* 10*/ { DATA_MODE, 0, "β", -1, 0, 0, "4B 15 FF F0 00 00 00 00 00", "H(1)1 (UTF-8) (Region One)" },
+        /* 11*/ { UNICODE_MODE, 0, "ÿ", -1, 0, 0, "30 00 FF 80 00 00 00 00 00", "B1 (ISO 8859-1)" },
+        /* 12*/ { UNICODE_MODE, 0, "ÿÿÿ", -1, 0, 0, "30 01 FF FF FF 80 00 00 00", "B3 (ISO 8859-1)" },
+        /* 13*/ { UNICODE_MODE, 0, "\302\200", -1, 0, 0, "70 00 00 00 00 00 00 00 00", "H(f)1 (GB 18030) (4-byte Region)" },
+        /* 14*/ { UNICODE_MODE, 0, "\302\200<EFBFBD>", -1, 0, 0, "70 00 00 38 26 7E 40 00 00", "H(f)2 (GB 18030) (both 4-byte Region)" },
+        /* 15*/ { UNICODE_MODE, 0, "啊亍齄丂\302\200", -1, 0, 0, "64 68 50 3C AC 28 80 00 FF FE E0 00 00 00 00 00 00", "H(d)4 H(f)1 (GB 18030)" },
+        /* 16*/ { DATA_MODE, 0, "\177\177", -1, 0, 0, "2F BD F7 F0 00 00 00 00 00", "T2 (ASCII)" },
+        /* 17*/ { DATA_MODE, 0, "\177\177\177", -1, 0, 0, "2F BD F7 DF C0 00 00 00 00", "T3 (ASCII)" },
+        /* 18*/ { UNICODE_MODE, 0, "123", -1, 0, 0, "11 EF FF 00 00 00 00 00 00", "N3 (ASCII)" },
+        /* 19*/ { UNICODE_MODE, 0, "12345", -1, 0, 0, "11 EC 2D FF 80 00 00 00 00", "N5 (ASCII)" },
+        /* 20*/ { UNICODE_MODE, 0, "Aa%$Bb9", -1, 0, 0, "22 A4 FA 18 3E 2E 52 7F 00", "T7 (ASCII)" },
+        /* 21*/ { UNICODE_MODE, 0, "Summer Palace Ticket for 6 June 2015 13:00;2015年6月6日夜01時00分PM頤和園のチケット;2015년6월6일13시오후여름궁전티켓.2015年6月6号下午13:00的颐和园门票;", -1, 0, 0, "(189) 27 38 C3 0A 35 F9 CF 99 92 F9 26 A3 E7 3E 76 C9 AE A3 7F CC 08 04 0C CD EE 44 06 C4 09 4E AD", "T20 B64 N4 H(f)1 T1 H(f)1 T1 H(f)1 T2 H(f)9 B35 (GB 18030)" },
+        /* 22*/ { UNICODE_MODE, 0, "\000\014\033 #/059:<@AMZ", 15, 0, 0, "2F 80 31 B7 1F AF E0 05 27 EB 2E CB E2 96 8F F0 00", "T15 (ASCII)" },
+        /* 23*/ { UNICODE_MODE, 0, "Z[\\`alz{~\177", -1, 0, 0, "28 FE CF 4E 3E 92 FF 7E E7 CF 7F 00 00", "T10 (ASCII)" },
+    };
+    int data_size = sizeof(data) / sizeof(struct item);
+
+    char escaped[1024];
+
+    for (int i = 0; i < data_size; i++) {
+
+        struct zint_symbol* symbol = ZBarcode_Create();
+        assert_nonnull(symbol, "Symbol not created\n");
+
+        symbol->symbology = BARCODE_HANXIN;
+        symbol->input_mode = data[i].input_mode;
+        symbol->eci = data[i].eci;
+        symbol->debug = ZINT_DEBUG_TEST; // Needed to get codeword dump in errtxt
+
+        int length = data[i].length == -1 ? strlen(data[i].data) : data[i].length;
+
+        ret = ZBarcode_Encode(symbol, data[i].data, length);
+        assert_equal(ret, data[i].ret, "i:%d ZBarcode_Encode ret %d != %d (%s)\n", i, ret, data[i].ret, symbol->errtxt);
+
+        #ifdef TEST_INPUT_GENERATE_EXPECTED
+        printf("        /*%3d*/ { %s, %d, \"%s\", %s, %d, \"%s\", \"%s\" },\n",
+                i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
+                ret < 5 ? symbol->eci : -1, symbol->errtxt, data[i].comment);
+        #else
+        if (ret < 5) {
+
+            assert_equal(symbol->eci, data[i].expected_eci, "i:%d eci %d != %d\n", i, symbol->eci, data[i].expected_eci);
+            assert_zero(strcmp(symbol->errtxt, data[i].expected), "i:%d strcmp(%s, %s) != 0\n", i, symbol->errtxt, data[i].expected);
+        }
+        #endif
+
+        ZBarcode_Delete(symbol);
+    }
+
+    testFinish();
+}
+
+static void test_encode(void)
+{
+    testStart("");
+
+    int ret;
+    struct item {
+        unsigned char* data;
+        int input_mode;
+        int option_1;
+        int option_2;
+        int ret;
+
+        int expected_rows;
+        int expected_width;
+        char* comment;
+        char* expected;
+    };
+    struct item data[] = {
+        /*  0*/ { "1234", UNICODE_MODE, -1, -1, 0, 23, 23, "",
+                    "11111110101000101111111"
+                    "10000000010001000000001"
+                    "10111110101110001111101"
+                    "10100000101011000000101"
+                    "10101110010011101110101"
+                    "10101110101111101110101"
+                    "10101110011010001110101"
+                    "00000000110001100000000"
+                    "00010101111011001010101"
+                    "01001010101101000111110"
+                    "10101101001010101010100"
+                    "11011101010100100010010"
+                    "10101010010011101010100"
+                    "01010100111110000011110"
+                    "10101010001011110101000"
+                    "00000000110111100000000"
+                    "11111110011001001110101"
+                    "00000010110000101110101"
+                    "11111010101110001110101"
+                    "00001010010110100000101"
+                    "11101010001100101111101"
+                    "11101010000011000000001"
+                    "11101010101010101111111"
+               },
+        /*  1*/ { "1234567890", UNICODE_MODE, 1, 1, 0, 23, 23, "**NOT SAME** as ISO 20830 Draft K.1 Example 1, uses masking pattern 2 instead of pattern 1; however matches pattern 2 example (excluding Function Info)",
+                    "11111110001011101111111"
+                    "10000000100000000000001"
+                    "10111110011111001111101"
+                    "10100000000111000000101"
+                    "10101110011110101110101"
+                    "10101110011111101110101"
+                    "10101110101111101110101"
+                    "00000000001100000000000"
+                    "00010101011111010010101"
+                    "01010001100001001001101"
+                    "00100100100100101011100"
+                    "11111000010111111111111"
+                    "01001001001001001001000"
+                    "11110010100100110010000"
+                    "10101001011111010101000"
+                    "00000000001000000000000"
+                    "11111110110000101110101"
+                    "00000010111111001110101"
+                    "11111010101001001110101"
+                    "00001010000011000000101"
+                    "11101010010111001111101"
+                    "11101010001001100000001"
+                    "11101010100100001111111"
+               },
+        /*  2*/ { "1234567890ABCDEFGabcdefg,Han Xin Code", UNICODE_MODE, 3, 10, 0, 41, 41, "Same as ISO 20830 Draft K.2 Example 2 (happens to use same mask pattern, 2)",
+                    "11111110001011000000100000010101101111111"
+                    "10000000001011110010000000011010100000001"
+                    "10111110111111111010111011101111101111101"
+                    "10100000101001001001001001001001100000101"
+                    "10101110000100100101010010010111001110101"
+                    "10101110010111110111111011101100101110101"
+                    "10101110101111001001011101110011001110101"
+                    "00000000011001100100100100100100100000000"
+                    "00011110111111111111111001101111110010101"
+                    "10110011011100110010001001100000001001001"
+                    "11001100100100100100100100100100100100100"
+                    "11111111001110101101011011110011011110110"
+                    "10100001001001000001100001010010001001001"
+                    "10100100100100100100100101100100001110011"
+                    "10011111100010110000001111111111101000001"
+                    "10110000001001001001001001001001001001001"
+                    "10101011011110000011001101111001100100100"
+                    "11111111000110010101111011111111111111111"
+                    "11001001001001000110011001110111100000010"
+                    "10111110100100100100100110001110011011000"
+                    "11111111111111111111111111111111111111101"
+                    "00000000000000000000101001001001001011000"
+                    "01110110101001001010101000010110100100100"
+                    "11111111101110010010100011111111111111111"
+                    "01001001001001001100110001101111011101011"
+                    "00101111100100100100100100001101001101110"
+                    "11111111111111111110111111111111111001111"
+                    "10010110100000101110111010011001001001001"
+                    "00101001101100100010100100100100100100100"
+                    "11111111111110110000111000110000110000101"
+                    "00000001001001001000101011011100000100101"
+                    "00100100100100100100100100100100101011111"
+                    "10101001100101110100111011100010101111000"
+                    "00000000110001001000101001001111000000000"
+                    "11111110001011100100100100100100101110101"
+                    "00000010111111111110111111111111001110101"
+                    "11111010001001000000111111111011001110101"
+                    "00001010100101111000101010100100100000101"
+                    "11101010111111111100101100001111101111101"
+                    "11101010101001001000101001001001000000001"
+                    "11101010100100100100111111111111001111111"
+               },
+        /*  3*/ { "Summer Palace Ticket for 6 June 2015 13:00;2015年6月6日夜01時00分PM頤和園のチケット;2015년6월6일13시오후여름궁전티켓.2015年6月6号下午13:00的颐和园门票;", UNICODE_MODE, 2, 17, 0, 55, 55, "**NOT SAME** as ISO 20830 Draft K.3 Example 3, different encoding modes; if same encoding modes forced, uses masking pattern 1 instead of pattern 2, but matches pattern 1 example (excluding Function Info)",
+                    "1111111001111111111011100100110101101010101100101111111"
+                    "1000000000000000001100011000011001000010101111100000001"
+                    "1011111011110010101110010110100000111010101101101111101"
+                    "1010000001010100001101011100001101100100010100000000101"
+                    "1010111000011011001111001000010010110010101010001110101"
+                    "1010111011010101001101010100001010011001000110001110101"
+                    "1010111001101001001001110010001001100100001001001110101"
+                    "0000000011100111101101111010001010001100110011000000000"
+                    "0010010101010100001100111100101010101111010001101010101"
+                    "1111111011101110101000110010100010000101010101010101010"
+                    "1010100111011011001101110110100101100011101000111110110"
+                    "0011100111010001101001111011100001001111110010000011001"
+                    "0011000100100010101011000001101101010000001010011010000"
+                    "1100111101010101001101010101010100010100001110110101000"
+                    "0000000100111001001010101100101100000001011111001110100"
+                    "1101000010010001001101110001010101101100101110001110111"
+                    "0101010101011100001110010001111110101010101010101010101"
+                    "0001011000101000101011010011111000010010000011110101100"
+                    "1001000100000110001111111111111111111000011101001110001"
+                    "0010110011110110100000000000000000001101011101001100000"
+                    "1111111100000100100000111011111001001111011001011100101"
+                    "1101010101010101010101010100111011001110101010100111101"
+                    "1101010011001001100110000001001000101000001011111110000"
+                    "1000111001010111001010111111101100101101010000111001101"
+                    "1110101100000011001001001011001010101010101010101010101"
+                    "1101001110000100101100101011001100001001110111011001000"
+                    "1001101010000000001000010010101100001011101001110010101"
+                    "1101001100101100100011100010110000101101110100110010110"
+                    "1000001010101010101010101010111101001010101100011001100"
+                    "1101000101010110010101100101011000101001001000001000001"
+                    "1010011101101101010001000111011011101011111010101111001"
+                    "1101101100100000010010000011011001001101010101010101010"
+                    "1010111000110111100101100011101010001010001001101110011"
+                    "1100110000001101010011010000001011101011110011010101001"
+                    "1011101111011001101010110111101100101100110001101100101"
+                    "1000110111010101010101010101010100001101011110111010101"
+                    "1111111111111111111110001011001010101111111111111111111"
+                    "0000000000000000001110000010010000000000000000000000001"
+                    "0100011110001011001010000111010001001101001001010010101"
+                    "0101011111000100101000110110001100101111110100110101001"
+                    "0100011010101010101010100110110101100111011001011000101"
+                    "1000110011110001101000011011111101100010100001110000101"
+                    "1111001110101101101000100011011010001010011000010000001"
+                    "0001110000001011001100000011111101010101010101010101001"
+                    "1010011101001101001101011101000010010100010000001110101"
+                    "1101100101011001101001000100010000001101111001000111001"
+                    "1010101010110101101010010110010011001111101011010100100"
+                    "0000000000000100001001100011100100010101010101100000000"
+                    "1111111000101010101010101110111100111011011111001110101"
+                    "0000001000101011001010000100010101001110001111101110101"
+                    "1111101000100001101101000001100001001011101001001110101"
+                    "0000101001000100101010110010010101010110011001000000101"
+                    "1110101010111000101011111101011101111100001110101111101"
+                    "1110101011010101001101010101010101000000001010000000001"
+                    "1110101011010001001111111111111111101000001110001111111"
+               },
+    };
+    int data_size = sizeof(data) / sizeof(struct item);
+
+    for (int i = 0; i < data_size; i++) {
+
+        struct zint_symbol* symbol = ZBarcode_Create();
+        assert_nonnull(symbol, "Symbol not created\n");
+
+        symbol->symbology = BARCODE_HANXIN;
+        symbol->input_mode = data[i].input_mode;
+        if (data[i].option_1 != -1) {
+            symbol->option_1 = data[i].option_1;
+        }
+        if (data[i].option_2 != -1) {
+            symbol->option_2 = data[i].option_2;
+        }
+
+        int length = strlen(data[i].data);
+
+        ret = ZBarcode_Encode(symbol, data[i].data, length);
+        assert_equal(ret, data[i].ret, "i:%d ZBarcode_Encode ret %d != %d (%s)\n", i, ret, data[i].ret, symbol->errtxt);
+
+        #ifdef TEST_ENCODE_GENERATE_EXPECTED
+        printf("        /*%3d*/ { \"%s\", %s, %d, %d, %s, %d, %d, \"%s\",\n",
+                i, data[i].data, testUtilInputModeName(data[i].input_mode), data[i].option_1, data[i].option_2, testUtilErrorName(data[i].ret),
+                symbol->rows, symbol->width, data[i].comment);
+        testUtilModulesDump(symbol, "                    ", "\n");
+        printf("               },\n");
+        #else
+        if (ret < 5) {
+            assert_equal(symbol->rows, data[i].expected_rows, "i:%d symbol->rows %d != %d (%s)\n", i, symbol->rows, data[i].expected_rows, data[i].data);
+            assert_equal(symbol->width, data[i].expected_width, "i:%d symbol->width %d != %d (%s)\n", i, symbol->width, data[i].expected_width, data[i].data);
+
+            if (ret == 0) {
+                int width, row;
+                ret = testUtilModulesCmp(symbol, data[i].expected, &width, &row);
+                assert_zero(ret, "i:%d testUtilModulesCmp ret %d != 0 width %d row %d (%s)\n", i, ret, width, row, data[i].data);
+            }
+        }
+        #endif
+
+        ZBarcode_Delete(symbol);
+    }
+
+    testFinish();
+}
+
+int main()
+{
+    test_options();
+    test_input();
+    test_encode();
+
+    testReport();
+
+    return 0;
+}
--- a/backend/tests/test_maxicode.c
+++ b/backend/tests/test_maxicode.c
@ -107,7 +107,7 @@ static void test_best_supported_set(void)

        #ifdef TEST_GENERATE_EXPECTED
        printf("        /*%2d*/ { %s, \"%s\", %d, %.0f, %.0f, %d, %d, %d, \"%s\",\n",
-                i, testUtilBarcodeName(data[i].symbology), testUtilEscape(data[i].data, escaped_data, sizeof(escaped_data)), ret,
+                i, testUtilBarcodeName(data[i].symbology), testUtilEscape(data[i].data, length, escaped_data, sizeof(escaped_data)), ret,
                data[i].w, data[i].h, data[i].ret_vector, symbol->rows, symbol->width, data[i].comment);
        testUtilModulesDump(symbol, "                    ",  "\n");
        printf("                },\n");
--- a/backend/tests/test_qr.c
+++ b/backend/tests/test_qr.c
@ -255,7 +255,7 @@ static void test_qr_input(void)

        #ifdef TEST_QR_INPUT_GENERATE_EXPECTED
        printf("        /*%3d*/ { %s, %d, \"%s\", %s, %d, \"%s\", \"%s\" },\n",
-                i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
+                i, testUtilInputModeName(data[i].input_mode), data[i].eci, testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
                ret < 5 ? symbol->eci : -1, symbol->errtxt, data[i].comment);
        #else
        if (ret < 5) {
@ -348,7 +348,7 @@ static void test_microqr_input(void)

        #ifdef TEST_MICROQR_INPUT_GENERATE_EXPECTED
        printf("        /*%3d*/ { %s, \"%s\", %s, \"%s\", \"%s\" },\n",
-                i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
+                i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
                symbol->errtxt, data[i].comment);
        #else
        if (ret < 5) {
@ -404,7 +404,7 @@ static void test_upnqr_input(void)

        #ifdef TEST_UPNQR_INPUT_GENERATE_EXPECTED
        printf("        /*%3d*/ { %s, \"%s\", %s, \"%s\", \"%s\" },\n",
-                i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
+                i, testUtilInputModeName(data[i].input_mode), testUtilEscape(data[i].data, length, escaped, sizeof(escaped)), testUtilErrorName(data[i].ret),
                symbol->errtxt, data[i].comment);
        #else
        if (ret < 5) {
--- a/backend/tests/test_sjis.c
+++ b/backend/tests/test_sjis.c
@ -34,10 +34,10 @@
 #include "../sjis.h"

 // As control convert to Shift JIS using simple table generated from https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT plus simple processing
-static int sjis_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
+static int sjis_wctomb_zint2(unsigned int* r, unsigned int wc)
 {
    if (wc < 0x20 || wc == 0x7F) {
-        r[0] = wc;
+        *r = wc;
        return 1;
    }
    // Shortcut
@ -45,43 +45,34 @@ static int sjis_wctomb_zint2(unsigned char* r, unsigned int wc, size_t n)
        return 0;
    }
    if (wc >= 0xE000 && wc <= 0xE757) { // PUA mappings, not in SHIFTJIS.TXT
-        unsigned short c;
        if (wc <= 0xE0BB) {
-            c = wc - 0xE000 + 0xF040 + (wc >= 0xE000 + 0x3F);
+            *r = wc - 0xE000 + 0xF040 + (wc >= 0xE000 + 0x3F);
        } else if (wc <= 0xE177) {
-            c = wc - 0xE0BC + 0xF140 + (wc >= 0xE0BC + 0x3F);
+            *r = wc - 0xE0BC + 0xF140 + (wc >= 0xE0BC + 0x3F);
        } else if (wc <= 0xE233) {
-            c = wc - 0xE178 + 0xF240 + (wc >= 0xE178 + 0x3F);
+            *r = wc - 0xE178 + 0xF240 + (wc >= 0xE178 + 0x3F);
        } else if (wc <= 0xE2EF) {
-            c = wc - 0xE234 + 0xF340 + (wc >= 0xE234 + 0x3F);
+            *r = wc - 0xE234 + 0xF340 + (wc >= 0xE234 + 0x3F);
        } else if (wc <= 0xE3AB) {
-            c = wc - 0xE2F0 + 0xF440 + (wc >= 0xE2F0 + 0x3F);
+            *r = wc - 0xE2F0 + 0xF440 + (wc >= 0xE2F0 + 0x3F);
        } else if (wc <= 0xE467) {
-            c = wc - 0xE3AC + 0xF540 + (wc >= 0xE3AC + 0x3F);
+            *r = wc - 0xE3AC + 0xF540 + (wc >= 0xE3AC + 0x3F);
        } else if (wc <= 0xE523) {
-            c = wc - 0xE468 + 0xF640 + (wc >= 0xE468 + 0x3F);
+            *r = wc - 0xE468 + 0xF640 + (wc >= 0xE468 + 0x3F);
        } else if (wc <= 0xE5DF) {
-            c = wc - 0xE524 + 0xF740 + (wc >= 0xE524 + 0x3F);
+            *r = wc - 0xE524 + 0xF740 + (wc >= 0xE524 + 0x3F);
        } else if (wc <= 0xE69B) {
-            c = wc - 0xE5E0 + 0xF840 + (wc >= 0xE5E0 + 0x3F);
+            *r = wc - 0xE5E0 + 0xF840 + (wc >= 0xE5E0 + 0x3F);
        } else {
-            c = wc - 0xE69C + 0xF940 + (wc >= 0xE69C + 0x3F);
+            *r = wc - 0xE69C + 0xF940 + (wc >= 0xE69C + 0x3F);
        }
-        r[0] = (c >> 8);
-        r[1] = c & 0xFF;
        return 2;
    }
-    int tab_length = sizeof(test_sjis_tab) / sizeof(unsigned short);
-    for (int i = 0; i < tab_length; i += 2) {
+    int tab_length = sizeof(test_sjis_tab) / sizeof(unsigned int);
+    for (int i = test_sjis_tab_ind[wc >> 12]; i < tab_length; i += 2) {
        if (test_sjis_tab[i + 1] == wc) {
-            unsigned short c = test_sjis_tab[i];
-            if (c < 0xFF) {
-                r[0] = c;
-                return 1;
-            }
-            r[0] = (c >> 8);
-            r[1] = c & 0xFF;
-            return 2;
+            *r = test_sjis_tab[i];
+            return *r > 0xFF ? 2 : 1;
        }
    }
    return 0;
@ -92,18 +83,15 @@ static void test_sjis_wctomb_zint(void)
    testStart("");

    int ret, ret2;
-    unsigned char buf[2], buf2[2];
    unsigned int val, val2;

    for (unsigned int i = 0; i < 0xFFFE; i++) {
        if (i >= 0xD800 && i <= 0xDFFF) { // UTF-16 surrogates
            continue;
        }
-        buf[0] = buf[1] = buf2[0] = buf2[1] = 0;
-        ret = sjis_wctomb_zint(buf, i, 2);
-        val = ret == 1 ? buf[0] : (buf[0] << 8) | buf[1];
-        ret2 = sjis_wctomb_zint2(buf2, i, 2);
-        val2 = ret2 == 1 ? buf2[0] : (buf2[0] << 8) | buf2[1];
+        val = val2 = 0;
+        ret = sjis_wctomb_zint(&val, i);
+        ret2 = sjis_wctomb_zint2(&val2, i);
        if (i == 0xFF3C) { // Extra mapping full-width reverse solidus U+FF3C to 0x815F, duplicate of U+005C (backslash)
            assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val 0x%04X\n", i, i, ret, val);
            assert_equal(val, 0x815F, "i:%d 0x%04X val 0x%04X != 0x815F\n", i, i, val);
--- a/backend/tests/test_sjis_tab.h
+++ b/backend/tests/test_sjis_tab.h
@ -1,5 +1,5 @@
 /* Generated by gen_test_tab.php from SHIFTJIS.TXT */
-static const unsigned short test_sjis_tab[] = {
+static const unsigned int test_sjis_tab[] = {
    0x0020, 0x0020,
    0x0021, 0x0021,
    0x0022, 0x0022,
@ -7038,3 +7038,22 @@ static const unsigned short test_sjis_tab[] = {
    0x8150, 0xFFE3,
    0x818F, 0xFFE5,
 };
+
+static const unsigned int test_sjis_tab_ind[] = {
+0,
+440,
+440,
+656,
+1054,
+1466,
+4228,
+7042,
+9404,
+11812,
+13766,
+13766,
+13766,
+13766,
+13766,
+13766,
+};
--- a/backend/tests/testcommon.c
+++ b/backend/tests/testcommon.c
@ -253,6 +253,7 @@ char* testUtilBarcodeName(int symbology) {
        { BARCODE_GRIDMATRIX, "BARCODE_GRIDMATRIX", 142 },
        { BARCODE_UPNQR, "BARCODE_UPNQR", 143 },
        { BARCODE_ULTRA, "BARCODE_ULTRA", 144 },
+        { BARCODE_RMQR, "BARCODE_RMQR", 145 },
    };
    int data_size = sizeof(data) / sizeof(struct item);

@ -365,13 +366,14 @@ int testUtilIsValidUTF8(const unsigned char str[], const size_t length) {
    return state == 0;
 }

-char* testUtilEscape(char* buffer, char* escaped, int escaped_size)
+char* testUtilEscape(char* buffer, int length, char* escaped, int escaped_size)
 {
    int i;
    unsigned char* b = buffer;
-    int non_utf8 = !testUtilIsValidUTF8(buffer, strlen(buffer));
+    unsigned char* be = buffer + length;
+    int non_utf8 = !testUtilIsValidUTF8(buffer, length);

-    for (i = 0; i < escaped_size && *b; b++) {
+    for (i = 0; b < be && i < escaped_size; b++) {
        if (non_utf8 || *b < ' ' || *b == '\177') {
            if (i < escaped_size - 4) {
                sprintf(escaped + i, "\\%.3o", *b);
@ -383,6 +385,12 @@ char* testUtilEscape(char* buffer, char* escaped, int escaped_size)
                escaped[i + 1] = *b;
            }
            i += 2;
+        } else if (b + 1 < be && *b == 0xC2 && *(b + 1) < 0xA0) {
+            if (i < escaped_size - 8) {
+                sprintf(escaped + i, "\\%.3o\\%.3o", *b, *(b + 1));
+            }
+            i += 8;
+            b++;
        } else {
            escaped[i++] = *b;
        }
--- a/backend/tests/testcommon.h
+++ b/backend/tests/testcommon.h
@ -70,7 +70,7 @@ char* testUtilBarcodeName(int symbology);
 char* testUtilErrorName(int error_number);
 char* testUtilInputModeName(int input_mode);
 int testUtilDAFTConvert(const struct zint_symbol* symbol, char* buffer, int buffer_size);
-char* testUtilEscape(char* buffer, char* escaped, int escaped_size);
+char* testUtilEscape(char* buffer, int length, char* escaped, int escaped_size);
 char* testUtilReadCSVField(char* buffer, char* field, int field_size);
 int testUtilSymbolCmp(const struct zint_symbol* a, const struct zint_symbol* b);
 struct zint_vector* testUtilVectorCpy(const struct zint_vector* in);
--- a/backend/tests/tools/data/GB18030.TXT.README
+++ b/backend/tests/tools/data/GB18030.TXT.README
@ -0,0 +1,3 @@
+# GB18030.TXT not included as 21MB in size. It can be downloaded from
+# https://haible.de/bruno/charsets/conversion-tables/GB18030.html
+# The version used is libiconv-1.11/GB18030.TXT
--- a/backend/tests/tools/gen_test_tab.php
+++ b/backend/tests/tools/gen_test_tab.php
@ -4,6 +4,20 @@
    libzint - the open source barcode library
    Copyright (C) 2008-2019 Robin Stuart <rstuart114@gmail.com>
 */
+/* To create backend/tests/test_sjis_tab.h (from backend/tests/build directory):
+ *
+ *   php ../tools/gen_test_tab.php
+ *
+ * To create backend/tests/test_gb2312_tab.h;
+ *
+ *   php ../tools/gen_test_tab.php -f GB2312.TXT -s gb2312_tab
+ *
+ * To create backend/tests/test_gb18030_tab.h (note that backend/tests/tools/data/GB18030.TXT
+ * will have to be downloaded first from https://haible.de/bruno/charsets/conversion-tables/GB18030.html
+ * using the version libiconv-1.11/GB18030.TXT):
+ *
+ *   php ../tools/gen_test_tab.php -f GB18030.TXT -s gb18030_tab
+ */
 /* vim: set ts=4 sw=4 et : */

 $basename = basename(__FILE__);
@ -35,7 +49,10 @@ foreach ($lines as $line) {
    if ($line === '' || strncmp($line, '0x', 2) !== 0) {
        continue;
    }
-    $tab_lines[] = preg_replace_callback('/^0x([0-9A-F]{2,4})[ \t]+0x([0-9A-F]{4}).*$/', function ($matches) {
+    if (preg_match('/^0x([0-9A-F]{2,8})[ \t]+0x([0-9A-F]{5})/', $line)) { // Exclude U+10000..10FFFF to save space
+        continue;
+    }
+    $tab_lines[] = preg_replace_callback('/^0x([0-9A-F]{2,8})[ \t]+0x([0-9A-F]{4}).*$/', function ($matches) {
        global $sort;
        $mb = hexdec($matches[1]);
        $unicode = hexdec($matches[2]);
@ -50,8 +67,20 @@ array_multisort($sort, $tab_lines);

 $out = array();
 $out[] = '/* Generated by ' . $basename . ' from ' . $file_name . ' */';
-$out[] = 'static const unsigned short test_' . $suffix_name . '[] = {';
+$out[] = 'static const unsigned int test_' . $suffix_name . '[] = {';
 $out = array_merge($out, $tab_lines);
 $out[] = '};';

+$out[] = '';
+$out[] = 'static const unsigned int test_' . $suffix_name . '_ind[] = {';
+$first = 0;
+foreach ($sort as $ind => $unicode) {
+    $div = (int)($unicode / 0x1000);
+    while ($div >= $first) {
+        $out[] = ($ind * 2) . ',';
+        $first++;
+    }
+}
+$out[] = '};';
+
 file_put_contents($out_dirname . '/test_' . $suffix_name . '.h', implode("\n", $out) . "\n");
--- a/backend_qt/backend_qt.pro
+++ b/backend_qt/backend_qt.pro
@ -60,6 +60,7 @@ HEADERS +=  ../backend/aztec.h \
            ../backend/dmatrix.h \
            ../backend/eci.h \
            ../backend/font.h \
+            ../backend/gb18030.h \
            ../backend/gb2312.h \
            ../backend/gridmtx.h \
            ../backend/gs1.h \
@ -90,6 +91,7 @@ SOURCES += ../backend/2of5.c \
           ../backend/dotcode.c \
           ../backend/eci.c \
           ../backend/emf.c \
+           ../backend/gb18030.c \
           ../backend/gb2312.c \
           ../backend/general_field.c \
           ../backend/gif.c \
--- a/backend_qt/backend_vc8.pro
+++ b/backend_qt/backend_vc8.pro
@ -24,6 +24,7 @@ HEADERS +=  ../backend/aztec.h \
            ../backend/composite.h \
            ../backend/dmatrix.h \
            ../backend/font.h \
+            ../backend/gb18030.h \
            ../backend/gb2312.h \
            ../backend/gridmtx.h \
            ../backend/gs1.h \
@ -50,6 +51,8 @@ SOURCES += ../backend/2of5.c \
           ../backend/common.c \
           ../backend/composite.c \
           ../backend/dmatrix.c \
+           ../backend/gb18030.c \
+           ../backend/gb2312.c \
           ../backend/gridmtx.c \
           ../backend/gs1.c \
           ../backend/imail.c \
@ -63,7 +66,6 @@ SOURCES += ../backend/2of5.c \
           ../backend/ps.c \
           ../backend/qr.c \
           ../backend/reedsol.c \
-           ../backend/render.c \
           ../backend/rss.c \
           ../backend/svg.c \
           ../backend/telepen.c \
--- a/backend_tcl/configure.in
+++ b/backend_tcl/configure.in
@ -84,6 +84,7 @@ TEA_ADD_SOURCES([
 	../backend/dmatrix.c
 	../backend/dotcode.c
 	../backend/eci.c
+    ../backend/gb18030.c
    ../backend/gb2312.c
    ../backend/general_field.c
 	../backend/gif.c
--- a/backend_tcl/zint_tcl.dsp
+++ b/backend_tcl/zint_tcl.dsp
@ -156,6 +156,10 @@ SOURCE=..\backend\emf.c
 # End Source File
 # Begin Source File

+SOURCE=..\backend\gb18030.c
+# End Source File
+# Begin Source File
+
 SOURCE=..\backend\gb2312.c
 # End Source File
 # Begin Source File
--- a/docs/manual.txt
+++ b/docs/manual.txt
@ -2276,7 +2276,7 @@ under
 development, so it is recommended it should not yet be used for a production
 environment. The symbology is capable of encoding characters in the GB18030
 character set (up to 4-byte characters) and is also able to support the ECI
-mechanism. Han Xin does not support the encoding of GS-1 data.
+mechanism. Support for the encoding of GS-1 data has not yet been implemented.

 The size of the symbol can be specified using the --ver= option or setting
 option_2 to a value between 1 and 84 according to the following table.
@ -2383,10 +2383,6 @@ Mode  |  Recovery Capacity
 4     |  Approx 30%
 --------------------------

-It is not possible to select both symbol size and error correction capacity for
-the same symbol. If both options are selected then the error correction
-capacity selection will be ignored.
-
 6.7 Other Barcode-Like Markings
 -------------------------------
 6.7.1. Facing Identification Mark (FIM)
--- a/win32/libzint.vcxproj
+++ b/win32/libzint.vcxproj
@ -319,6 +319,7 @@
    <ClCompile Include="..\backend\dotcode.c" />
    <ClCompile Include="..\backend\eci.c" />
    <ClCompile Include="..\backend\emf.c" />
+    <ClCompile Include="..\backend\gb18030.c" />
    <ClCompile Include="..\backend\gb2312.c" />
    <ClCompile Include="..\backend\general_field.c" />
    <ClCompile Include="..\backend\gif.c" />
@ -340,7 +341,6 @@
    <ClCompile Include="..\backend\qr.c" />
    <ClCompile Include="..\backend\raster.c" />
    <ClCompile Include="..\backend\reedsol.c" />
-    <ClCompile Include="..\backend\render.c" />
    <ClCompile Include="..\backend\rss.c" />
    <ClCompile Include="..\backend\sjis.c" />
    <ClCompile Include="..\backend\svg.c" />
--- a/win32/zint_cmdline_vc6/zint_cmdline_vc6.dsp
+++ b/win32/zint_cmdline_vc6/zint_cmdline_vc6.dsp
@ -152,6 +152,10 @@ SOURCE=..\..\backend\emf.c
 # End Source File
 # Begin Source File

+SOURCE=..\..\backend\gb18030.c
+# End Source File
+# Begin Source File
+
 SOURCE=..\..\backend\gb2312.c
 # End Source File
 # Begin Source File