2016-08-16 23:43:41 +12:00
|
|
|
/* eci.c - Extended Channel Interpretations
|
|
|
|
|
|
|
|
libzint - the open source barcode library
|
2020-04-05 03:53:29 +12:00
|
|
|
Copyright (C) 2009 - 2020 Robin Stuart <rstuart114@gmail.com>
|
2016-08-16 23:43:41 +12:00
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
|
2017-10-24 08:37:52 +13:00
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
2016-08-16 23:43:41 +12:00
|
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
2017-10-24 08:37:52 +13:00
|
|
|
documentation and/or other materials provided with the distribution.
|
2016-08-16 23:43:41 +12:00
|
|
|
3. Neither the name of the project nor the names of its contributors
|
|
|
|
may be used to endorse or promote products derived from this software
|
2017-10-24 08:37:52 +13:00
|
|
|
without specific prior written permission.
|
2016-08-16 23:43:41 +12:00
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
2017-10-24 08:37:52 +13:00
|
|
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
2016-08-16 23:43:41 +12:00
|
|
|
SUCH DAMAGE.
|
|
|
|
*/
|
2019-12-19 13:37:55 +13:00
|
|
|
/* vim: set ts=4 sw=4 et : */
|
2016-08-16 23:43:41 +12:00
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "eci.h"
|
2019-12-19 13:37:55 +13:00
|
|
|
#include "common.h"
|
2016-09-06 09:06:50 +12:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
#include <malloc.h>
|
|
|
|
#endif
|
2016-08-16 23:43:41 +12:00
|
|
|
|
|
|
|
/* Convert Unicode to other character encodings */
|
2019-12-19 13:37:55 +13:00
|
|
|
INTERNAL int utf_to_eci(const int eci, const unsigned char source[], unsigned char dest[], size_t *length) {
|
2016-08-16 23:43:41 +12:00
|
|
|
int in_posn;
|
|
|
|
int out_posn;
|
|
|
|
int ext;
|
|
|
|
int done;
|
2018-01-22 00:50:49 +13:00
|
|
|
|
2020-10-01 00:19:12 +13:00
|
|
|
if (eci == 26 || eci == 899) {
|
|
|
|
/* Unicode or 8-bit binary data, do not process - just copy data across */
|
2019-09-02 07:23:15 +12:00
|
|
|
memcpy(dest, source, *length);
|
|
|
|
dest[*length] = '\0';
|
2016-08-16 23:43:41 +12:00
|
|
|
return 0;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
in_posn = 0;
|
|
|
|
out_posn = 0;
|
|
|
|
do {
|
|
|
|
/* Single byte (ASCII) character */
|
2017-09-11 03:03:09 +12:00
|
|
|
int bytelen = 1;
|
|
|
|
int glyph = (int) source[in_posn];
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if ((source[in_posn] >= 0x80) && (source[in_posn] < 0xc0)) {
|
|
|
|
/* Something has gone wrong, abort */
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if ((source[in_posn] >= 0xc0) && (source[in_posn] < 0xe0)) {
|
|
|
|
/* Two-byte character */
|
|
|
|
bytelen = 2;
|
|
|
|
glyph = (source[in_posn] & 0x1f) << 6;
|
|
|
|
|
2020-04-05 03:53:29 +12:00
|
|
|
if ((int) *length < (in_posn + 2)) {
|
2016-08-16 23:43:41 +12:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if (source[in_posn + 1] > 0xc0) {
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
glyph += (source[in_posn + 1] & 0x3f);
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if ((source[in_posn] >= 0xe0) && (source[in_posn] < 0xf0)) {
|
|
|
|
/* Three-byte character */
|
|
|
|
bytelen = 3;
|
|
|
|
glyph = (source[in_posn] & 0x0f) << 12;
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2020-04-05 03:53:29 +12:00
|
|
|
if ((int) *length < (in_posn + 2)) {
|
2016-08-16 23:43:41 +12:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2020-04-05 03:53:29 +12:00
|
|
|
if ((int) *length < (in_posn + 3)) {
|
2016-08-16 23:43:41 +12:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if (source[in_posn + 1] > 0xc0) {
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if (source[in_posn + 2] > 0xc0) {
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
glyph += (source[in_posn + 1] & 0x3f) << 6;
|
|
|
|
glyph += (source[in_posn + 2] & 0x3f);
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2019-09-02 07:23:15 +12:00
|
|
|
if (source[in_posn] >= 0xf0 || glyph > 0x2122) {
|
|
|
|
/* Not in any ISO 8859 or Windows page */
|
2016-08-16 23:43:41 +12:00
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if (glyph < 128) {
|
|
|
|
dest[out_posn] = glyph;
|
|
|
|
} else {
|
|
|
|
done = 0;
|
|
|
|
for (ext = 0; ext < 128; ext++) {
|
|
|
|
switch (eci) {
|
|
|
|
case 3: // Latin-1
|
|
|
|
if (glyph == iso_8859_1[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 4: // Latin-2
|
|
|
|
if (glyph == iso_8859_2[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 5: // Latin-3
|
|
|
|
if (glyph == iso_8859_3[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 6: // Latin-4
|
|
|
|
if (glyph == iso_8859_4[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 7: // Latin/Cyrillic
|
|
|
|
if (glyph == iso_8859_5[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 8: // Latin/Arabic
|
|
|
|
if (glyph == iso_8859_6[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 9: // Latin/Greek
|
|
|
|
if (glyph == iso_8859_7[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 10: // Latin/Hebrew
|
|
|
|
if (glyph == iso_8859_8[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 11: // Latin-5
|
|
|
|
if (glyph == iso_8859_9[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 12: // Latin-6
|
|
|
|
if (glyph == iso_8859_10[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 13: // Latin/Thai
|
|
|
|
if (glyph == iso_8859_11[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 15: // Latin-7
|
|
|
|
if (glyph == iso_8859_13[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 16: // Latin-8
|
|
|
|
if (glyph == iso_8859_14[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 17: // Latin-9
|
|
|
|
if (glyph == iso_8859_15[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 18: // Latin-10
|
|
|
|
if (glyph == iso_8859_16[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 21: // Windows-1250
|
|
|
|
if (glyph == windows_1250[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 22: // Windows-1251
|
|
|
|
if (glyph == windows_1251[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 23: // Windows-1252
|
|
|
|
if (glyph == windows_1252[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 24: // Windows-1256
|
|
|
|
if (glyph == windows_1256[ext]) {
|
|
|
|
dest[out_posn] = ext + 128;
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-09-02 07:23:15 +12:00
|
|
|
if (done) {
|
|
|
|
break;
|
|
|
|
}
|
2016-08-16 23:43:41 +12:00
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
if (!(done)) {
|
|
|
|
return ZINT_ERROR_INVALID_DATA;
|
|
|
|
}
|
|
|
|
}
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
in_posn += bytelen;
|
|
|
|
out_posn++;
|
2020-04-05 03:53:29 +12:00
|
|
|
} while (in_posn < (int) *length);
|
2016-08-16 23:43:41 +12:00
|
|
|
dest[out_posn] = '\0';
|
|
|
|
*length = out_posn;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find the lowest ECI mode which will encode a given set of Unicode text */
|
2019-12-19 13:37:55 +13:00
|
|
|
INTERNAL int get_best_eci(unsigned char source[], size_t length) {
|
2016-08-16 23:43:41 +12:00
|
|
|
int eci = 3;
|
|
|
|
|
|
|
|
#ifndef _MSC_VER
|
|
|
|
unsigned char local_source[length + 1];
|
|
|
|
#else
|
2016-08-23 04:58:32 +12:00
|
|
|
unsigned char *local_source = (unsigned char*) _alloca(length + 1);
|
2016-08-16 23:43:41 +12:00
|
|
|
#endif
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
do {
|
|
|
|
if (utf_to_eci(eci, source, local_source, &length) == 0) {
|
|
|
|
return eci;
|
2016-09-06 09:06:50 +12:00
|
|
|
}
|
2016-08-16 23:43:41 +12:00
|
|
|
eci++;
|
|
|
|
} while (eci < 25);
|
2016-09-06 09:06:50 +12:00
|
|
|
|
2016-08-16 23:43:41 +12:00
|
|
|
return 26; // If all of these fail, use Unicode!
|
|
|
|
}
|