From d0465375bb21e6624b3302848f8057f38ad82342 Mon Sep 17 00:00:00 2001 From: gitlost Date: Sun, 2 Mar 2025 20:50:55 +0000 Subject: [PATCH] Add convenience API funcs `ZBarcode_UTF8_To_ECI()` and `ZBarcode_Dest_Len_ECI()`, primarily for ZXingC++ but also useful in general --- ChangeLog | 6 +- backend/eci.c | 42 ++++- backend/eci_sb.h | 47 ++++- backend/library.c | 63 ++++++- backend/tests/test_eci.c | 18 +- backend/tests/test_ksx1001_tab.h | 33 ++++ backend/tests/test_library.c | 111 ++++++++++++ backend/tools/gen_eci_sb_h.php | 100 ++++++++++- backend/zint.h | 16 +- docs/manual.html | 295 +++++++++++++++++-------------- docs/manual.pmd | 43 ++++- docs/manual.txt | 40 ++++- 12 files changed, 647 insertions(+), 167 deletions(-) diff --git a/ChangeLog b/ChangeLog index ad625556..b1a429e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -Version 2.15.0.9 (dev) not released yet (2025-02-28) +Version 2.15.0.9 (dev) not released yet (2025-03-02) ==================================================== Changes @@ -7,6 +7,10 @@ Changes - Add new `BARCODE_RAW_TEXT` option for `output_options` and new warning `ZINT_WARN_HRT_RAW_TEXT` for when set when outputting HRT +Changes +------- +- Add API funcs `ZBarcode_UTF8_To_ECI()` and `ZBarcode_Dest_Len_ECI()` + Bugs ---- - AZTEC: fix GS1 mode with Structured Append (wasn't outputting initial FNC1) diff --git a/backend/eci.c b/backend/eci.c index 16c77fde..582660f3 100644 --- a/backend/eci.c +++ b/backend/eci.c @@ -1,7 +1,7 @@ /* eci.c - Extended Channel Interpretations */ /* libzint - the open source barcode library - Copyright (C) 2009-2024 Robin Stuart + Copyright (C) 2009-2025 Robin Stuart Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,6 +43,30 @@ /* Single-byte stuff */ +/* ECI 2 (bottom half ASCII, top half CP437), included for libzueci compatibility - assumes valid Unicode */ +static int u_cp437(const unsigned int u, unsigned char *dest) { + int s, e; + if (u < 0x80) { + *dest = (unsigned char) u; + return 1; + } + + s = 0; + e = ARRAY_SIZE(cp437_u) - 1; + while (s <= e) { + const int m = (s + e) >> 1; + if (cp437_u[m] < u) { + s = m + 1; + } else if (cp437_u[m] > u) { + e = m - 1; + } else { + *dest = cp437_sb[m]; + return 1; + } + } + return 0; +} + /* Base ISO/IEC 8859 routine to convert Unicode codepoint `u` */ static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u, const unsigned char *tab_sb, int e, unsigned char *dest) { @@ -181,6 +205,15 @@ static int u_utf32le(const unsigned int u, unsigned char *dest) { return 4; } +/* ECI 899 Binary, included for libzueci compatibility - assumes valid Unicode */ +static int u_binary(const unsigned int u, unsigned char *dest) { + if (u <= 0xFF) { + *dest = (unsigned char) u; + return 1; + } + return 0; +} + /* Multibyte stuff */ /* Acknowledgements to Bruno Haible for a no. of techniques used here */ @@ -701,7 +734,7 @@ typedef int (*eci_func_t)(const unsigned int u, unsigned char *dest); INTERNAL int utf8_to_eci(const int eci, const unsigned char source[], unsigned char dest[], int *p_length) { static const eci_func_t eci_funcs[36] = { - NULL, NULL, NULL, NULL, u_iso8859_2, /*0-4*/ + NULL, NULL, u_cp437, NULL, u_iso8859_2, /*0-4*/ u_iso8859_3, u_iso8859_4, u_iso8859_5, u_iso8859_6, u_iso8859_7, /*5-9*/ u_iso8859_8, u_iso8859_9, u_iso8859_10, u_iso8859_11, NULL, /*10-14*/ u_iso8859_13, u_iso8859_14, u_iso8859_15, u_iso8859_16, NULL, /*15-19*/ @@ -717,7 +750,8 @@ INTERNAL int utf8_to_eci(const int eci, const unsigned char source[], unsigned c int length = *p_length; /* Special case ISO/IEC 8859-1 */ - if (eci == 0 || eci == 3) { /* Default ECI 0 to ISO/IEC 8859-1 */ + /* Default ECI 0 to ISO/IEC 8859-1 (and ECI 1 for libzueci compatibility) */ + if (eci == 0 || eci == 3 || eci == 1) { while (in_posn < length) { do { decode_utf8(&state, &codepoint, source[in_posn++]); @@ -737,6 +771,8 @@ INTERNAL int utf8_to_eci(const int eci, const unsigned char source[], unsigned c if (eci == 170) { /* ASCII Invariant (archaic subset) */ eci_func = u_ascii_inv; + } else if (eci == 899) { /* Binary, for libzueci compatibility */ + eci_func = u_binary; } else { eci_func = eci_funcs[eci]; if (eci_func == NULL) { diff --git a/backend/eci_sb.h b/backend/eci_sb.h index b5995ec4..7dcdc7c7 100644 --- a/backend/eci_sb.h +++ b/backend/eci_sb.h @@ -1,9 +1,10 @@ -/* eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php" - from "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" - and "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */ +/* eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php" from + "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT" (for libzueci compatibility) and + "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" and + "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */ /* libzint - the open source barcode library - Copyright (C) 2021-2022 Robin Stuart + Copyright (C) 2021-2025 Robin Stuart Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -35,6 +36,44 @@ #ifndef Z_ECI_SB_H #define Z_ECI_SB_H +/* Tables for ECI 2 CP437 (for libzueci compatibility) */ +static const unsigned short cp437_u[128] = { /* Unicode codepoints sorted */ + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A5, 0x00AA, 0x00AB, 0x00AC, + 0x00B0, 0x00B1, 0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BB, 0x00BC, + 0x00BD, 0x00BF, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C9, 0x00D1, + 0x00D6, 0x00DC, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E4, 0x00E5, + 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, + 0x00EE, 0x00EF, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F6, 0x00F7, + 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FF, 0x0192, 0x0393, 0x0398, + 0x03A3, 0x03A6, 0x03A9, 0x03B1, 0x03B4, 0x03B5, 0x03C0, 0x03C3, + 0x03C4, 0x03C6, 0x207F, 0x20A7, 0x2219, 0x221A, 0x221E, 0x2229, + 0x2248, 0x2261, 0x2264, 0x2265, 0x2310, 0x2320, 0x2321, 0x2500, + 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, + 0x2534, 0x253C, 0x2550, 0x2551, 0x2552, 0x2553, 0x2554, 0x2555, + 0x2556, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, + 0x255E, 0x255F, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x2580, + 0x2584, 0x2588, 0x258C, 0x2590, 0x2591, 0x2592, 0x2593, 0x25A0, +}; +static const unsigned char cp437_sb[128] = { /* Single-byte in Unicode order */ + 0xFF, 0xAD, 0x9B, 0x9C, 0x9D, 0xA6, 0xAE, 0xAA, + 0xF8, 0xF1, 0xFD, 0xE6, 0xFA, 0xA7, 0xAF, 0xAC, + 0xAB, 0xA8, 0x8E, 0x8F, 0x92, 0x80, 0x90, 0xA5, + 0x99, 0x9A, 0xE1, 0x85, 0xA0, 0x83, 0x84, 0x86, + 0x91, 0x87, 0x8A, 0x82, 0x88, 0x89, 0x8D, 0xA1, + 0x8C, 0x8B, 0xA4, 0x95, 0xA2, 0x93, 0x94, 0xF6, + 0x97, 0xA3, 0x96, 0x81, 0x98, 0x9F, 0xE2, 0xE9, + 0xE4, 0xE8, 0xEA, 0xE0, 0xEB, 0xEE, 0xE3, 0xE5, + 0xE7, 0xED, 0xFC, 0x9E, 0xF9, 0xFB, 0xEC, 0xEF, + 0xF7, 0xF0, 0xF3, 0xF2, 0xA9, 0xF4, 0xF5, 0xC4, + 0xB3, 0xDA, 0xBF, 0xC0, 0xD9, 0xC3, 0xB4, 0xC2, + 0xC1, 0xC5, 0xCD, 0xBA, 0xD5, 0xD6, 0xC9, 0xB8, + 0xB7, 0xBB, 0xD4, 0xD3, 0xC8, 0xBE, 0xBD, 0xBC, + 0xC6, 0xC7, 0xCC, 0xB5, 0xB6, 0xB9, 0xD1, 0xD2, + 0xCB, 0xCF, 0xD0, 0xCA, 0xD8, 0xD7, 0xCE, 0xDF, + 0xDC, 0xDB, 0xDD, 0xDE, 0xB0, 0xB1, 0xB2, 0xFE, +}; + /* Forward reference to base ISO/IEC 8859 routine - see "eci.c" */ static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u, const unsigned char *tab_sb, int e, unsigned char *dest); diff --git a/backend/library.c b/backend/library.c index 1218c43a..163abdae 100644 --- a/backend/library.c +++ b/backend/library.c @@ -979,7 +979,7 @@ static int map_invalid_symbology(struct zint_symbol *symbol) { return warn_number; } -/* Encode a barcode. If `length` is 0, `source` must be NUL-terminated */ +/* Encode a barcode. If `length` is 0 or negative, `source` must be NUL-terminated */ int ZBarcode_Encode(struct zint_symbol *symbol, const unsigned char *source, int length) { struct zint_seg segs[1]; @@ -2070,6 +2070,67 @@ float ZBarcode_XdimDp_From_Scale(int symbol_id, float scale, float xdim_mm_or_dp return xdim_mm_or_dpmm; } +/* Whether `eci` is valid character set ECI */ +static int is_valid_char_set_eci(const int eci) { + /* Allowing ECI 1 and ECI 2 for libzueci compatibility (and ECI 0, which is mapped to ECI 2) */ + return (eci <= 35 && eci >= 0 && eci != 14 && eci != 19) || eci == 170 || eci == 899; +} + +/* Convert UTF-8 `source` of length `length` to `eci`-encoded `dest`, setting `p_dest_length` to length of `dest` + on output. If `length` is 0 or negative, `source` must be NUL-terminated. Returns 0 on success, else + ZINT_ERROR_INVALID_OPTION or ZINT_ERROR_INVALID_DATA. Compatible with libzueci `zueci_utf8_to_eci()` */ +int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length, unsigned char dest[], int *p_dest_length) { + int error_number; + + /* Map ECI 0 to ECI 2 (CP437) for libzueci compatibility */ + if (eci == 0) { + eci = 2; + } + if (!is_valid_char_set_eci(eci) || !source || !p_dest_length) { + return ZINT_ERROR_INVALID_OPTION; + } + if (length <= 0) { + length = (int) ustrlen(source); /* Note `zueci_utf8_to_eci()` doesn't do this */ + } + if (!is_valid_utf8(source, length)) { + return ZINT_ERROR_INVALID_DATA; + } + + if (eci == 26) { /* UTF-8 - no change */ + memcpy(dest, source, length); + *p_dest_length = length; + return 0; + } + + /* Only set `p_dest_length` on success, for libzueci compatibility */ + if ((error_number = utf8_to_eci(eci, source, dest, &length)) == 0) { + *p_dest_length = length; + } + return error_number; /* 0 or ZINT_ERROR_INVALID_DATA */ +} + +/* Calculate sufficient length needed to convert UTF-8 `source` of length `length` from UTF-8 to `eci`, and place + in `p_dest_length`. If `length` is 0 or negative, `source` must be NUL-terminated. Returns 0 on success, else + ZINT_ERROR_INVALID_OPTION or ZINT_ERROR_INVALID_DATA. Compatible with libzueci `zueci_dest_len_eci()` */ +int ZBarcode_Dest_Len_ECI(int eci, const unsigned char *source, int length, int *p_dest_length) { + /* Map ECI 0 to ECI 2 (CP437) for libzueci compatibility */ + if (eci == 0) { + eci = 2; + } + if (!is_valid_char_set_eci(eci) || !source || !p_dest_length) { + return ZINT_ERROR_INVALID_OPTION; + } + if (length <= 0) { + length = (int) ustrlen(source); /* Note `zueci_dest_len_eci()` doesn't do this */ + } + if (!is_valid_utf8(source, length)) { + return ZINT_ERROR_INVALID_DATA; + } + *p_dest_length = get_eci_length(eci, source, length); + + return 0; +} + /* Whether Zint built without PNG support */ int ZBarcode_NoPng(void) { #ifdef ZINT_NO_PNG diff --git a/backend/tests/test_eci.c b/backend/tests/test_eci.c index a852c08f..e8cb2125 100644 --- a/backend/tests/test_eci.c +++ b/backend/tests/test_eci.c @@ -647,6 +647,19 @@ static const unsigned short int windows_1256[] = { 0x064b, 0x064c, 0x064d, 0x064e, 0x00f4, 0x064f, 0x0650, 0x00f7, 0x0651, 0x00f9, 0x0652, 0x00fb, 0x00fc, 0x200e, 0x200f, 0x06d2 }; +/* Taken from https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT */ + +static const unsigned short int cp437[] = { + 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, + 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, + 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, + 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0 +}; + static void test_utf8_to_eci_sb(const testCtx *const p_ctx) { struct item { @@ -674,6 +687,7 @@ static void test_utf8_to_eci_sb(const testCtx *const p_ctx) { /* 16*/ { 22, windows_1251 }, /* 17*/ { 23, windows_1252 }, /* 18*/ { 24, windows_1256 }, + /* 19*/ { 2, cp437 }, }; int data_size = ARRAY_SIZE(data); int i, length, ret; @@ -749,8 +763,8 @@ static void test_utf8_to_eci_ascii(const testCtx *const p_ctx) { /* 16*/ { 170, "~", -1, ZINT_ERROR_INVALID_DATA }, /* 17*/ { 170, "\302\200", -1, ZINT_ERROR_INVALID_DATA }, /* 18*/ { 170, "~", -1, ZINT_ERROR_INVALID_DATA }, - /* 19*/ { 1, "A", -1, ZINT_ERROR_INVALID_DATA }, - /* 20*/ { 2, "A", -1, ZINT_ERROR_INVALID_DATA }, + /* 19*/ { 1, "A", -1, 0 }, /* Now succeeds (maps to ISO/ECI 8859-1 for libzueci compatibility) */ + /* 20*/ { 2, "A", -1, 0 }, /* Now succeeds (maps to CP437 for libzueci compatibility) */ /* 21*/ { 14, "A", -1, ZINT_ERROR_INVALID_DATA }, /* 22*/ { 19, "A", -1, ZINT_ERROR_INVALID_DATA }, /* 23*/ { 26, "A", -1, ZINT_ERROR_INVALID_DATA }, diff --git a/backend/tests/test_ksx1001_tab.h b/backend/tests/test_ksx1001_tab.h index 5462aa1e..387aade4 100644 --- a/backend/tests/test_ksx1001_tab.h +++ b/backend/tests/test_ksx1001_tab.h @@ -1,3 +1,34 @@ +/* + libzint - the open source barcode library + Copyright (C) 2021-2025 Robin Stuart + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the project nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + */ +/* SPDX-License-Identifier: BSD-3-Clause */ + /* Generated by gen_test_tab.php from KSX1001.TXT */ static const unsigned int test_ksx1001_tab[] = { 0x222E, 0x00A1, @@ -8292,3 +8323,5 @@ static const unsigned int test_ksx1001_tab_ind[] = { 15712, 16248, }; + +/* vim: set ts=4 sw=4 et : */ diff --git a/backend/tests/test_library.c b/backend/tests/test_library.c index 378dc294..b4e9d052 100644 --- a/backend/tests/test_library.c +++ b/backend/tests/test_library.c @@ -2177,6 +2177,116 @@ static void test_xdimdp_from_scale(const testCtx *const p_ctx) { testFinish(); } +static void test_utf8_to_eci(const testCtx *const p_ctx) { + + struct item { + int eci; + const char *data; + int length; + int ret_dest; + int expected_dest_length; + int ret; + const char *expected; + int expected_length; + }; + /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */ + static const struct item data[] = { + /* 0*/ { 3, "1234", -1, 0, 4, 0, "1234", -1 }, + /* 1*/ { 3, "1234", 0, 0, 4, 0, "1234", -1 }, /* Zero length allowed */ + /* 2*/ { 3, "1234", -2, 0, 4, 0, "1234", -1 }, /* Negative length allowed */ + /* 3*/ { 3, "", -1, 0, 0, 0, "", 0 }, /* Empty allowed */ + /* 4*/ { 3, NULL, -1, ZINT_ERROR_INVALID_OPTION, 0, -1, "", -1 }, + /* 5*/ { -1, "1234", -1, ZINT_ERROR_INVALID_OPTION, 0, -1, "", -1 }, + /* 6*/ { 0, "1234", -1, 0, 4, 0, "1234", -1 }, + /* 7*/ { 1, "1234", -1, 0, 4, 0, "1234", -1 }, + /* 8*/ { 2, "1234", -1, 0, 4, 0, "1234", -1 }, + /* 9*/ { 0, "1234é", -1, 0, 6, 0, "1234\202", 5 }, /* CP437 */ + /* 10*/ { 1, "1234é", -1, 0, 6, 0, "1234\351", 5 }, /* Same as ISO/IEC 8859-1 */ + /* 11*/ { 2, "1234é", -1, 0, 6, 0, "1234\202", 5 }, /* CP437 */ + /* 12*/ { 3, "1234é", -1, 0, 6, 0, "1234\351", 5 }, + /* 13*/ { 4, "1234˘", -1, 0, 6, 0, "1234\242", 5 }, /* ISO/IEC 8859-2 */ + /* 14*/ { 5, "1234Ħ", -1, 0, 6, 0, "1234\241", 5 }, /* ISO/IEC 8859-3 */ + /* 15*/ { 6, "1234ĸ", -1, 0, 6, 0, "1234\242", 5 }, /* ISO/IEC 8859-4 */ + /* 16*/ { 7, "1234Ё", -1, 0, 6, 0, "1234\241", 5 }, /* ISO/IEC 8859-5 */ + /* 17*/ { 8, "1234ء", -1, 0, 6, 0, "1234\301", 5 }, /* ISO/IEC 8859-6 */ + /* 18*/ { 9, "1234π", -1, 0, 6, 0, "1234\360", 5 }, /* ISO/IEC 8859-7 */ + /* 19*/ { 11, "1234ğ", -1, 0, 6, 0, "1234\360", 5 }, /* ISO/IEC 8859-9 */ + /* 20*/ { 12, "1234Ŋ", -1, 0, 6, 0, "1234\257", 5 }, /* ISO/IEC 8859-10 */ + /* 21*/ { 13, "1234๐", -1, 0, 7, 0, "1234\360", 5 }, /* ISO/IEC 8859-11 */ + /* 22*/ { 14, "1234", -1, ZINT_ERROR_INVALID_OPTION, 0, -1, "", -1 }, + /* 23*/ { 15, "1234š", -1, 0, 6, 0, "1234\360", 5 }, /* ISO/IEC 8859-13 */ + /* 24*/ { 16, "1234ŵ", -1, 0, 6, 0, "1234\360", 5 }, /* ISO/IEC 8859-14 */ + /* 25*/ { 17, "1234œ", -1, 0, 6, 0, "1234\275", 5 }, /* ISO/IEC 8859-15 */ + /* 26*/ { 18, "1234Ł", -1, 0, 6, 0, "1234\243", 5 }, /* ISO/IEC 8859-16 */ + /* 27*/ { 19, "1234", -1, ZINT_ERROR_INVALID_OPTION, 0, -1, "", -1 }, + /* 28*/ { 20, "1234点", -1, 0, 7, 0, "1234\223\137", 6 }, /* Shift JIS */ + /* 29*/ { 20, "1234¥", -1, 0, 6, 0, "1234\\", 5 }, /* Shift JIS - Yen sign -> backslash */ + /* 30*/ { 20, "1234~", -1, 0, 5, ZINT_ERROR_INVALID_DATA, "", -1 }, /* Shift JIS - no mapping for tilde */ + /* 31*/ { 20, "1234\\", -1, 0, 6, 0, "1234\201\137", -1 }, /* Shift JIS - backslash -> full-width reverse solidus */ + /* 32*/ { 21, "1234Ą", -1, 0, 6, 0, "1234\245", 5 }, /* Windows-1250 */ + /* 33*/ { 22, "1234ѓ", -1, 0, 6, 0, "1234\203", 5 }, /* Windows-1251 */ + /* 34*/ { 23, "1234ƒ", -1, 0, 6, 0, "1234\203", 5 }, /* Windows-1252 */ + /* 35*/ { 24, "1234پ", -1, 0, 6, 0, "1234\201", 5 }, /* Windows-1256 */ + /* 36*/ { 25, "1234é", -1, 0, 10, 0, "\0001\0002\0003\0004\000\351", 10 }, /* UTF-16BE */ + /* 37*/ { 26, "1234é", -1, 0, 6, 0, "1234é", 6 }, /* UTF-8 */ + /* 38*/ { 27, "1234é", -1, 0, 6, ZINT_ERROR_INVALID_DATA, "", -1 }, /* ASCII */ + /* 39*/ { 27, "1234", -1, 0, 4, 0, "1234", -1 }, /* ASCII */ + /* 40*/ { 28, "1234_", -1, 0, 7, 0, "1234\241\304", 6 }, /* Big5 */ + /* 41*/ { 29, "1234崂", -1, 0, 7, 0, "1234\341\300", 6 }, /* GB 2312 */ + /* 42*/ { 30, "1234가", -1, 0, 7, 0, "1234\260\241", 6 }, /* EUC-KR */ + /* 43*/ { 31, "1234郎", -1, 0, 7, 0, "1234\375\234", 6 }, /* GBK */ + /* 44*/ { 32, "1234崂", -1, 0, 14, 0, "1234\341\300", 6 }, /* GB 18030 */ + /* 45*/ { 33, "1234é", -1, 0, 10, 0, "1\0002\0003\0004\000\351\000", 10 }, /* UTF-16LE */ + /* 46*/ { 34, "1234é", -1, 0, 20, 0, "\000\000\0001\000\000\0002\000\000\0003\000\000\0004\000\000\000\351", 20 }, /* UTF-16BE */ + /* 47*/ { 35, "1234é", -1, 0, 20, 0, "1\000\000\0002\000\000\0003\000\000\0004\000\000\000\351\000\000\000", 20 }, /* UTF-16LE */ + /* 48*/ { 170, "1234", -1, 0, 4, 0, "1234", 4 }, /* ISO 646 Invariant */ + /* 49*/ { 170, "1234#", -1, 0, 5, ZINT_ERROR_INVALID_DATA, "", -1 }, /* ISO 646 Invariant */ + /* 50*/ { 899, "1234\000\127\302\200ÿ", 10, 0, 10, 0, "1234\000\127\200\377", 8 }, /* Binary */ + }; + const int data_size = ARRAY_SIZE(data); + int i, length, ret; + int expected_length; + + testStart("test_utf8_to_eci"); + + for (i = 0; i < data_size; i++) { + int ret_dest; + unsigned char dest[1024]; + int dest_length; + + if (testContinue(p_ctx, i)) continue; + + length = data[i].length == -1 && data[i].data ? (int) strlen(data[i].data) : data[i].length; + + ret_dest = ZBarcode_Dest_Len_ECI(data[i].eci, TCU(data[i].data), length, &dest_length); + assert_equal(ret_dest, data[i].ret_dest, "i:%d ZBarcode_Dest_Len_ECI(%d, %s) ret_dest %d != %d\n", + i, data[i].eci, data[i].data, ret_dest, data[i].ret_dest); + + if (ret_dest < ZINT_ERROR) { + assert_equal(dest_length, data[i].expected_dest_length, + "i:%d ZBarcode_Dest_Len_ECI dest_length %d != expected_dest_length %d\n", + i, dest_length, data[i].expected_dest_length); + + expected_length = data[i].expected_length == -1 ? (int) strlen(data[i].expected) : data[i].expected_length; + ret = ZBarcode_UTF8_To_ECI(data[i].eci, TCU(data[i].data), length, dest, &dest_length); + assert_equal(ret, data[i].ret, "i:%d ZBarcode_UTF8_To_ECI(%d, %s) ret %d != %d\n", + i, data[i].eci, data[i].data, ret, data[i].ret); + if (ret < ZINT_ERROR) { + assert_equal(dest_length, expected_length, + "i:%d ZBarcode_UTF8_To_ECI dest_length %d != expected_length %d\n", + i, dest_length, expected_length); + #if 0 + printf("dest_length %d\n", dest_length); debug_print_escape(TCU(dest), dest_length, NULL); printf("\n"); + #endif + assert_zero(memcmp(dest, data[i].expected, expected_length), "i:%d memcmp(\"%s\", \"%s\", %d) != 0\n", + i, dest, data[i].expected, expected_length); + } + } + } + + testFinish(); +} + int main(int argc, char *argv[]) { testFunction funcs[] = { /* name, func */ @@ -2205,6 +2315,7 @@ int main(int argc, char *argv[]) { { "test_reset", test_reset }, { "test_scale_from_xdimdp", test_scale_from_xdimdp }, { "test_xdimdp_from_scale", test_xdimdp_from_scale }, + { "test_utf8_to_eci", test_utf8_to_eci }, }; testRun(argc, argv, funcs, ARRAY_SIZE(funcs)); diff --git a/backend/tools/gen_eci_sb_h.php b/backend/tools/gen_eci_sb_h.php index 5a3cc794..f7c24a12 100644 --- a/backend/tools/gen_eci_sb_h.php +++ b/backend/tools/gen_eci_sb_h.php @@ -2,7 +2,7 @@ /* Generate ECI single-byte tables & routines from unicode.org mapping files */ /* libzint - the open source barcode library - Copyright (C) 2022-2023 Robin Stuart + Copyright (C) 2022-2025 Robin Stuart */ /* SPDX-License-Identifier: BSD-3-Clause */ /* @@ -20,12 +20,13 @@ $out_dirname = isset($opts['o']) ? $opts['o'] : ($dirname . '/..'); // Where to $out = array(); $head = <<<'EOD' -/* eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php" - from "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" - and "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */ +/* eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php" from + "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT" (for libzueci compatibility) and + "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" and + "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */ /* libzint - the open source barcode library - Copyright (C) 2021-2022 Robin Stuart + Copyright (C) 2021-2025 Robin Stuart Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -60,6 +61,95 @@ EOD; $out = explode("\n", $head); +// Read the CP437 file + +$tot_cp437 = 0; + +//$file = $data_dirname . '/' . 'CP437.TXT'; +$file = 'https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT'; + +if (($get = file_get_contents($file)) === false) { + error_log($error = "$basename: ERROR: Could not read mapping file \"$file\""); + exit($error . PHP_EOL); +} + +$lines = explode("\n", $get); + +// Parse the file. + +$sort = array(); +$sb = array(); +foreach ($lines as $line) { + $line = trim($line); + if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) { + continue; + } + $matches = array(); + if (preg_match('/^0x([0-9a-f]{2})[ \t]+0x([0-9a-f]{4})[ \t].*$/', $line, $matches)) { + $mb = hexdec($matches[1]); + $unicode = hexdec($matches[2]); + if ($unicode >= 0x80) { + $sort[] = $unicode; + $sb[] = $mb; + } + } +} + +array_multisort($sort, $sb); + +// Output. + +$out[] = ''; +$out[] = '/* Tables for ECI 2 CP437 (for libzueci compatibility) */'; +$cnt = count($sort); +$out[] = 'static const unsigned short cp437_u[' . $cnt . '] = { /* Unicode codepoints sorted */'; +$line = ' '; +for ($i = 0; $i < $cnt; $i++) { + if ($i && $i % 8 === 0) { + $out[] = $line; + $line = ' '; + } + $line .= sprintf(' 0x%04X,', $sort[$i]); +} +if ($line !== ' ') { + $out[] = $line; +} +$out[] = '};'; +$tot_cp437 += $cnt * 2; + +$cnt = count($sb); +$out[] = 'static const unsigned char cp437_sb[' . $cnt . '] = { /* Single-byte in Unicode order */'; +$line = ' '; +for ($i = 0; $i < $cnt; $i++) { + if ($i && $i % 8 === 0) { + $out[] = $line; + $line = ' '; + } + $line .= sprintf(' 0x%02X,', $sb[$i]); +} +if ($line !== ' ') { + $out[] = $line; +} +$out[] = '};'; +$tot_cp437 += $cnt; + +$u_sb = array_flip($sb); +$b = 0x80; +$cnt = 256 - $b; +$max_idx = -1; +for ($i = 0; $i < $cnt; $i++) { + if (isset($u_sb[$i + $b])) { + $max_idx = $i; + } +} +$cnt = $max_idx + 1; +$tot_cp437 += $cnt; + +if (0) { + $out[] = ''; + $out[] = '/* Total CP437 bytes: ' . $tot_cp437 . ' */'; +} + $u_iso8859 = <<<'EOD' /* Forward reference to base ISO/IEC 8859 routine - see "eci.c" */ diff --git a/backend/zint.h b/backend/zint.h index dce15dc5..52a7d988 100644 --- a/backend/zint.h +++ b/backend/zint.h @@ -141,7 +141,7 @@ extern "C" { /* Segment for use with `ZBarcode_Encode_Segs()` below */ struct zint_seg { unsigned char *source; /* Data to encode */ - int length; /* Length of `source`. If 0, `source` must be NUL-terminated */ + int length; /* Length of `source`. If 0 or negative, `source` must be NUL-terminated */ int eci; /* Extended Channel Interpretation */ }; @@ -405,7 +405,7 @@ extern "C" { ZINT_EXTERN void ZBarcode_Delete(struct zint_symbol *symbol); - /* Encode a barcode. If `length` is 0, `source` must be NUL-terminated */ + /* Encode a barcode. If `length` is 0 or negative, `source` must be NUL-terminated */ ZINT_EXTERN int ZBarcode_Encode(struct zint_symbol *symbol, const unsigned char *source, int length); /* Encode a barcode with multiple ECI segments */ @@ -488,6 +488,18 @@ extern "C" { const char *filetype); + /* Convert UTF-8 `source` of length `length` to `eci`-encoded `dest`, setting `p_dest_length` to length of `dest` + on output. If `length` is 0 or negative, `source` must be NUL-terminated. Returns 0 on success, else + ZINT_ERROR_INVALID_OPTION or ZINT_ERROR_INVALID_DATA. Compatible with libzueci `zueci_utf8_to_eci()` */ + ZINT_EXTERN int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length, unsigned char dest[], + int *p_dest_length); + + /* Calculate sufficient length needed to convert UTF-8 `source` of length `length` from UTF-8 to `eci`, and place + in `p_dest_length`. If `length` is 0 or negative, `source` must be NUL-terminated. Returns 0 on success, else + ZINT_ERROR_INVALID_OPTION or ZINT_ERROR_INVALID_DATA. Compatible with libzueci `zueci_dest_len_eci()` */ + ZINT_EXTERN int ZBarcode_Dest_Len_ECI(int eci, const unsigned char *source, int length, int *p_dest_length); + + /* Whether Zint built without PNG support */ ZINT_EXTERN int ZBarcode_NoPng(void); diff --git a/docs/manual.html b/docs/manual.html index 635ad652..91281848 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -488,7 +488,10 @@ Availability
  • 5.15 Checking Symbology Capabilities
  • -
  • 5.16 Zint +
  • 5.16 UTF-8 to ECI +convenience functions
  • +
  • 5.17 Zint Version
  • 6. Types @@ -4105,8 +4108,8 @@ segments and seg_count being the number of elements it contains. The zint_seg structure is of the form:

    struct zint_seg {
         unsigned char *source; /* Data to encode */
    -    int length;            /* Length of `source`. If 0, `source` must be
    -                              NUL-terminated */
    +    int length;            /* Length of `source`. If 0 or negative, `source`
    +                              must be NUL-terminated */
         int eci;               /* Extended Channel Interpretation */
     };

    The symbology must support ECIs (see Table } else { printf("PDF417 does not support ECI\n"); } -

    5.16 Zint Version

    +

    5.16 UTF-8 to ECI +convenience functions

    +

    As a convenience the conversion done by Zint from UTF-8 to ECIs is +exposed in two helper functions (compatible with the +libzueci18 functions +zueci_utf8_to_eci() and +zueci_dest_len_eci()):

    +
    int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length,
    +    unsigned char dest[], int *p_dest_length);
    +
    +int ZBarcode_Dest_Len_ECI(int eci, const unsigned char *source, int length,
    +    int *p_dest_length);
    +

    Call ZBarcode_Dest_Len_ECI() to get the size of buffer +sufficient to accommodate the conversion, then call +ZBarcode_UTF8_To_ECI() with an appropriately sized buffer +to do the conversion. The final destination length, returned in +p_dest_length, may be smaller than the estimate given by +ZBarcode_Dest_Len_ECI(). If length is zero or +less, source must be NUL-terminated. The destination buffer +is not NUL-terminated. The obsolete ECIs 0, 1 and 2 are supported.

    +

    5.17 Zint Version

    Whether the Zint library linked to was built with PNG support may be determined with:

    -
    int ZBarcode_NoPng();
    +
    int ZBarcode_NoPng();

    which returns 1 if no PNG support is available, else zero.

    Lastly, the version of the Zint library linked to is returned by:

    -
    int ZBarcode_Version();
    +
    int ZBarcode_Version();

    The version parts are separated by hundreds. For instance, version "2.9.1" is returned as "20901".

    6. Types of Symbology

    @@ -4453,12 +4477,12 @@ calculated by Zint. In addition EAN-2 and EAN-5 add-on symbols can be added using the + character. For example, to draw a UPC-A symbol with the data 72527270270 with an EAN-5 add-on showing the data 12345 use the command:

    -
    zint -b UPCA -d "72527270270+12345"
    +
    zint -b UPCA -d "72527270270+12345"

    or using the API encode a data string with the + character included:

    -
    my_symbol->symbology = BARCODE_UPCA;
    -error = ZBarcode_Encode_and_Print(my_symbol, "72527270270+12345", 0, 0);
    +
    my_symbol->symbology = BARCODE_UPCA;
    +error = ZBarcode_Encode_and_Print(my_symbol, "72527270270+12345", 0, 0);
    zint -b UPCA --compliantheight -d "72527270270+12345" @@ -4472,12 +4496,12 @@ input and validates the check digit before encoding.

    --guardwhitespace (API output_options |= EANUPC_GUARD_WHITESPACE). For UPC, this is only relevant when there is add-on:

    -
    zint -b UPCA -d "72527270270+12345" --guardwhitespace
    +
    zint -b UPCA -d "72527270270+12345" --guardwhitespace

    or using the API:

    -
    my_symbol->symbology = BARCODE_UPCA;
    -my_symbol->output_options |= EANUPC_GUARD_WHITESPACE;
    -error = ZBarcode_Encode_and_Print(my_symbol, "72527270270+12345", 0, 0);
    +
    my_symbol->symbology = BARCODE_UPCA;
    +my_symbol->output_options |= EANUPC_GUARD_WHITESPACE;
    +error = ZBarcode_Encode_and_Print(my_symbol, "72527270270+12345", 0, 0);
    zint -b UPCA --compliantheight -d "72527270270+12345" --guardwhitespace @@ -4504,19 +4528,19 @@ check digit is calculated by Zint. EAN-2 and EAN-5 add-on symbols can be added using the + character as with UPC-A. In addition Zint also supports Number System 1 encoding by entering a 7-digit article number starting with the digit 1. For example:

    -
    zint -b UPCE -d "1123456"
    +
    zint -b UPCE -d "1123456"

    or

    -
    my_symbol->symbology = BARCODE_UPCE;
    -error = ZBarcode_Encode_and_Print(my_symbol, "1123456", 0, 0);
    +
    my_symbol->symbology = BARCODE_UPCE;
    +error = ZBarcode_Encode_and_Print(my_symbol, "1123456", 0, 0);

    If your input data already includes the check digit symbology BARCODE_UPCE_CHK (38) can be used which takes a 7 or 8-digit input and validates the check digit before encoding.

    As with UPC-A, a quiet zone indicator can be added when there is an add-on by setting --guardwhitespace (API output_options |= EANUPC_GUARD_WHITESPACE):

    -
    zint -b UPCE -d "1123456+12" --guardwhitespace
    +
    zint -b UPCE -d "1123456+12" --guardwhitespace
    zint -b UPCE --compliantheight -d "1123456+12" --guardwhitespace @@ -4546,8 +4570,8 @@ numbers respectively. Zint will decide which symbology to use depending on the length of the input data. In addition EAN-2 and EAN-5 add-on symbols can be added to EAN-8 and EAN-13 symbols using the + character as with UPC symbols. For example:

    -
    zint -b EANX -d "54321"
    +
    zint -b EANX -d "54321"
    zint -b EANX --compliantheight -d "54321" @@ -4555,15 +4579,15 @@ alt="zint -b EANX --compliantheight -d "54321"" /> aria-hidden="true">zint -b EANX --compliantheight -d "54321"

    will encode a stand-alone EAN-5, whereas

    -
    zint -b EANX -d "7432365+54321"
    +
    zint -b EANX -d "7432365+54321"

    will encode an EAN-8 symbol with an EAN-5 add-on. As before these results can be achieved using the API:

    -
    my_symbol->symbology = BARCODE_EANX;
    -
    -error = ZBarcode_Encode_and_Print(my_symbol, "54321", 0, 0);
    -
    -error = ZBarcode_Encode_and_Print(my_symbol, "7432365+54321", 0, 0);
    +
    my_symbol->symbology = BARCODE_EANX;
    +
    +error = ZBarcode_Encode_and_Print(my_symbol, "54321", 0, 0);
    +
    +error = ZBarcode_Encode_and_Print(my_symbol, "7432365+54321", 0, 0);
    zint -b EANX --compliantheight -d "7432365+54321" @@ -4579,8 +4603,8 @@ and validates the check digit before encoding.

    Options to add quiet zone indicators and to adjust the add-on gap and the guard bar descent height are the same as for 6.1.3.2 UPC Version E. For instance:

    -
    zint -b EANX_CHK -d "74323654" --guardwhitespace
    +
    zint -b EANX_CHK -d "74323654" --guardwhitespace
    zint -b EANX_CHK --compliantheight -d "74323654" –guardwhitespace @@ -4872,21 +4896,21 @@ escapes \^A, \^B, \^C and \^@ (the latter turns off manual Code Set selection). For instance the following will force switching to Code Set B for the data "5678" (normally Code Set C would be used throughout):

    -
    zint -b CODE128 -d "1234\^B5678" --extraesc
    +
    zint -b CODE128 -d "1234\^B5678" --extraesc

    The manually selected Code Set will apply until the next Code Set escape sequence or until a \^@, with the exception that data that cannot be represented in that Code Set will be switched as appropriate. If the data contains an extra escape sequence, it can be escaped by doubling the caret (^). For instance

    -
    zint -b CODE128 -d "\^AABC\^^BDEF" --extraesc
    +
    zint -b CODE128 -d "\^AABC\^^BDEF" --extraesc

    will encode the data "ABC\^BDEF" in Code Set A.

    There is also the extra escape \^1, which will encode a special Function Code 1 character (FNC1) anywhere you chose in the data, for instance

    -
    zint -b CODE128 -d "A\^1BC\^1DEF" --extraesc
    +
    zint -b CODE128 -d "A\^1BC\^1DEF" --extraesc

    Zint can encode a maximum of 102 symbol characters, which allows for e.g. 202 all-numeric or 101 all-uppercase characters. Sizes above 120 digits (60 alphanumerics) are not recommended.

    @@ -4900,8 +4924,8 @@ aria-hidden="true">zint -b CODE128AB -d "130170X178"

    It is sometimes advantageous to stop Code 128 from using Code Set C which compresses numerical data. The BARCODE_CODE128AB18 variant (symbology 60) suppresses +href="#fn19" class="footnote-ref" id="fnref19" +role="doc-noteref">19 variant (symbology 60) suppresses Code Set C in favour of Code Sets A and B.

    Note that the special extra escapes mentioned above are not available for this variant (nor for any other).

    @@ -4927,11 +4951,11 @@ correct encoding. GS1-128 does not support extended ASCII (ISO/IEC 8859-1) characters. Check digits for GTIN data AI (01) are not generated and need to be included in the input data. The following is an example of a valid GS1-128 input:

    -
    zint -b 16 -d "[01]98898765432106[3202]012345[15]991231"
    -

    or using the --gs1parens option:

    zint -b 16 --gs1parens -d "(01)98898765432106(3202)012345(15)991231"
    +class="sourceCode bash">zint -b 16 -d "[01]98898765432106[3202]012345[15]991231" +

    or using the --gs1parens option:

    +
    zint -b 16 --gs1parens -d "(01)98898765432106(3202)012345(15)991231"

    6.1.10.4 EAN-14

    6.1.10.3 GS1-128.

    not calculated by Zint when this symbology is encoded. Fixed length data should be entered at the appropriate length for correct encoding. The following is an example of a valid GS1 DataBar Expanded input:

    -
    zint -b 31 -d "[01]98898765432106[3202]012345[15]991231"
    +
    zint -b 31 -d "[01]98898765432106[3202]012345[15]991231"

    6.1.12 Korea Post Barcode

    primarily in the vehicle industry, is to simply stack one-dimensional codes on top of each other. This can be achieved at the command prompt by giving more than one set of input data. For example

    -
    zint -d "This" -d "That"
    +
    zint -d "This" -d "That"

    will draw two Code 128 symbols, one on top of the other. The same result can be achieved using the API by executing the ZBarcode_Encode() function more than once on a symbol. For example:

    -
    my_symbol->symbology = BARCODE_CODE128;
    -
    -error = ZBarcode_Encode(my_symbol, "This", 0);
    -
    -error = ZBarcode_Encode(my_symbol, "That", 0);
    -
    -error = ZBarcode_Print(my_symbol);
    +
    my_symbol->symbology = BARCODE_CODE128;
    +
    +error = ZBarcode_Encode(my_symbol, "This", 0);
    +
    +error = ZBarcode_Encode(my_symbol, "That", 0);
    +
    +error = ZBarcode_Print(my_symbol);
    zint -d "This" -d "That" @@ -5222,8 +5246,8 @@ specifying --bind (API separator bars in integral multiples of the X-dimension (minimum and default 1, maximum 4) can be set by --separator (API option_3):

    -
    zint --bind --notext --separator=2 -d "This" -d "That"
    +
    zint --bind --notext --separator=2 -d "This" -d "That"
    zint --notext --bind --separator=2 -d "This" -d "That" @@ -5501,21 +5525,21 @@ should be entered into a primary string with the data for the 2D component being entered in the normal way. To do this at the command prompt use the --primary switch (API primary). For example:

    -
    zint -b EANX_CC --mode=1 --primary=331234567890 -d "[99]1234-abcd"
    +
    zint -b EANX_CC --mode=1 --primary=331234567890 -d "[99]1234-abcd"

    This creates an EAN-13 linear component with the data "331234567890" and a 2D CC-A (see below) component with the data "(99)1234-abcd". The same results can be achieved using the API as shown below:

    -
    my_symbol->symbology = BARCODE_EANX_CC;
    -
    -my_symbol->option_1 = 1;
    -
    -strcpy(my_symbol->primary, "331234567890");
    -
    -ZBarcode_Encode_and_Print(my_symbol, "[99]1234-abcd", 0, 0);
    +
    my_symbol->symbology = BARCODE_EANX_CC;
    +
    +my_symbol->option_1 = 1;
    +
    +strcpy(my_symbol->primary, "331234567890");
    +
    +ZBarcode_Encode_and_Print(my_symbol, "[99]1234-abcd", 0, 0);

    EAN-2 and EAN-5 add-on data can be used with EAN and UPC symbols using the + symbol as described in sections 6.1.3 UPC (Universal @@ -5641,8 +5665,8 @@ the 6-digit version the first and last digit are ignored, leaving a 2047. The second format "NNN-NN" represents the DX Extract as two numbers separated by a dash (-), the first number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range -0 to 15).19

    +0 to 15).20

    The optional frame number is a number in the range 0 to 63, and may have a half frame indicator "A" appended. Special character sequences (with or without a half frame indicator appended) may also be @@ -5907,13 +5931,13 @@ size to full height can be given in thousandths (permille) using the --vers option (API option_2). The default value is 250 (25%).

    For example the following

    -
    zint -b DAFT -d AAFDTTDAFADTFTTFFFDATFTADTTFFTDAFAFDTF --height=8.494 --vers=256
    +
    zint -b DAFT -d AAFDTTDAFADTFTTFFFDATFTADTTFFTDAFAFDTF --height=8.494 --vers=256

    produces the same barcode (see 6.5.3 Royal Mail 4-State Customer Code (RM4SCC)) as

    -
    zint -b RM4SCC --compliantheight -d "W1J0TR01"
    +
    zint -b RM4SCC --compliantheight -d "W1J0TR01"

    6.6 Matrix Symbols

    6.6.1 Data Matrix (ISO 16022)

    @@ -6554,8 +6578,8 @@ be manually specified by using the --mask switch with values 0-7, or in the API by setting option_3 = (N + 1) << 8 where N is 0-7. To use with ZINT_FULL_MULTIBYTE set

    -
    option_3 = ZINT_FULL_MULTIBYTE | (N + 1) << 8
    +
    option_3 = ZINT_FULL_MULTIBYTE | (N + 1) << 8

    The --fast option (API input_mode |= FAST_MODE) may be used when leaving Zint to automatically select a mask to reduce the number of masks to try to four @@ -6691,8 +6715,8 @@ be manually specified by using the --mask switch with values 0-3, or in the API by setting option_3 = (N + 1) << 8 where N is 0-3. To use with ZINT_FULL_MULTIBYTE set

    -
    option_3 = ZINT_FULL_MULTIBYTE | (N + 1) << 8
    +
    option_3 = ZINT_FULL_MULTIBYTE | (N + 1) << 8

    6.6.5 Rectangular Micro QR Code (rMQR) (ISO 23941)

    @@ -6959,8 +6983,8 @@ Latin-2 formatted use the --binary switch (API input_mode = DATA_MODE).

    The following example creates a symbol from data saved as a Latin-2 file:

    -
    zint -o upnqr.png -b 143 --scale=3 --binary -i upn.txt
    +
    zint -o upnqr.png -b 143 --scale=3 --binary -i upn.txt

    A mask may be manually specified or the --fast option used as with QRCODE.

    6.6.7 MaxiCode (ISO 16023)

    @@ -7019,9 +7043,9 @@ your parcel courier.

    The primary message can be set at the command prompt using the --primary switch (API primary). The secondary message uses the normal data entry method. For example:

    -
    zint -o test.eps -b 57 --primary="999999999840012" \
    -    -d "Secondary Message Here"
    +
    zint -o test.eps -b 57 --primary="999999999840012" \
    +    -d "Secondary Message Here"

    When using the API the primary message must be placed in the primary string. The secondary is entered in the same way as described in 5.2 Encoding and @@ -7034,9 +7058,9 @@ to be prefixed by the ISO/IEC 15434 Format "01" vv is a 2-digit version, by using the --scmvv switch (API option_2 = vv + 1). For example to use the common version "96" (ASC MH10/SC 8):

    -
    zint -b 57 --primary="152382802840001" --scmvv=96 --esc -d \
    -  "1Z00004951\GUPSN\G06X610\G159\G1234567\G1/1\G\GY\G1 MAIN ST\GNY\GNY\R\E"
    +
    zint -b 57 --primary="152382802840001" --scmvv=96 --esc -d \
    +  "1Z00004951\GUPSN\G06X610\G159\G1234567\G1/1\G\GY\G1 MAIN ST\GNY\GNY\R\E"

    will prefix "[)>\R01\G96" to the secondary message. (\R, \G and \E are the escape sequences for Record Separator, Group Separator and End of Transmission @@ -7045,8 +7069,8 @@ Sequences.)

    Modes 4 to 6 can be accessed using the --mode switch (API option_1). Modes 4 to 6 do not have a primary message. For example:

    -
    zint -o test.eps -b 57 --mode=4 -d "A MaxiCode Message in Mode 4"
    +
    zint -o test.eps -b 57 --mode=4 -d "A MaxiCode Message in Mode 4"

    Mode 6 is reserved for the maintenance of scanner hardware and should not be used to encode user data.

    This symbology uses Latin-1 character encoding by default but also @@ -7970,8 +7994,8 @@ be manually specified by using the --mask switch with values 0-3, or in the API by setting option_3 = (N + 1) << 8 where N is 0-3. To use with ZINT_FULL_MULTIBYTE set

    -
    option_3 = ZINT_FULL_MULTIBYTE | (N + 1) << 8
    +
    option_3 = ZINT_FULL_MULTIBYTE | (N + 1) << 8

    6.6.14 Ultracode

    Zint does not currently implement data compression by default, but this can be initiated through the API by setting

    -
    symbol->option_3 = ULTRA_COMPRESSION;
    +
    symbol->option_3 = ULTRA_COMPRESSION;

    With compression, up to 504 digits, 375 alphanumerics or 252 bytes can be encoded.

    Revision 2 of Ultracode (2023) may be specified using @@ -8117,12 +8141,13 @@ not include a check digit.

    7. Legal and Version Information

    7.1 License

    -

    Zint, libzint and Zint Barcode Studio are Copyright © 2025 Robin -Stuart. All historical versions are distributed under the GNU General -Public License version 3 or later. Versions 2.5 and later are released -under a dual license: the encoding library is released under the BSD (3 -clause) license whereas the GUI, Zint Barcode Studio, and the CLI are -released under the GNU General Public License version 3 or later.

    +

    Zint, libzint and Zint Barcode Studio are Copyright © +2025 Robin Stuart. All historical versions are distributed under the GNU +General Public License version 3 or later. Versions 2.5 and later are +released under a dual license: the encoding library is released under +the BSD (3 clause) license whereas the GUI, Zint Barcode Studio, and the +CLI are released under the GNU General Public License version 3 or +later.

    Telepen is a trademark of SB Electronic Systems Ltd.

    QR Code is a registered trademark of Denso Wave Incorporated.

    Mailmark is a registered trademark of Royal Mail Group Ltd.

    @@ -8685,28 +8710,28 @@ properties that correspond to the zint_symbol structure method render() which takes a Qt QPainter to paint with, and a QRectF rectangular area specifying where to paint into:

    -
    /* Encode and display barcode in `paintRect` using `painter`.
    -   Note: legacy argument `mode` is not used */
    -void render(QPainter& painter, const QRectF& paintRect,
    -            AspectRatioMode mode = IgnoreAspectRatio);
    +
    /* Encode and display barcode in `paintRect` using `painter`.
    +   Note: legacy argument `mode` is not used */
    +void render(QPainter& painter, const QRectF& paintRect,
    +            AspectRatioMode mode = IgnoreAspectRatio);

    render() will emit one of two Qt signals - encoded on successful encoding and drawing, or errored on failure. The client can connect and act appropriately, for instance:

    -
    connect(qzint, SIGNAL(encoded()), SLOT(on_encoded()));
    -connect(qzint, SIGNAL(errored()), SLOT(on_errored()));
    +
    connect(qzint, SIGNAL(encoded()), SLOT(on_encoded()));
    +connect(qzint, SIGNAL(errored()), SLOT(on_errored()));

    where qzint is an instance of Zint::QZint and on_encoded() and on_error() are Qt slot methods provided by the caller. On error, the error value and message can be retrieved by the methods getError() and lastError() respectively.

    The other main method is save_to_file():

    -
    /* Encode and print barcode to file `filename`.
    -   Only sets `getError()` on error, not on warning */
    -bool save_to_file(const QString& filename); // `ZBarcode_Print()`
    +
    /* Encode and print barcode to file `filename`.
    +   Only sets `getError()` on error, not on warning */
    +bool save_to_file(const QString& filename); // `ZBarcode_Print()`

    which takes a filename to output to. It too will emit an errored signal on failure, returning false (but nothing on success, which just returns true). Note @@ -8721,12 +8746,12 @@ symbology capabilities, and utility methods such as

    Annex C. Tcl Backend Binding

    A Tcl binding is available in the "backend_tcl” sub-directory. To make on Unix:

    -
    cd backend_tcl
    -autoconf
    -./configure
    -make
    -sudo make install
    +
    cd backend_tcl
    +autoconf
    +./configure
    +make
    +sudo make install

    For Windows, a Microsoft Visual C++ project file is available at "backend_tcl\zint_tcl.vcxproj". Note that this assumes that Tcl/Tk is available in "C:\Tcl" and that the libraries are @@ -8737,21 +8762,21 @@ to match your setup. There is also a Visual Studio makefile available at "backend_tcl\win\README.txt".

    Once built and installed, invoke the Tcl/Tk CLI "wish":

    -
    wish
    +
    wish

    and ignoring the Tk window click back to the command prompt "%" and type:

    -
    package require zint
    -zint help
    +
    package require zint
    +zint help

    which will show the usage message, with options very similiar to the Zint CLI. (One notable difference is that boolean options such as -bold take a 1 or 0 as an argument.)

    A demonstration Tcl/Tk program which is also useful in itself is available at "backend_tcl/demo/demo.tcl". To run type:

    -
    wish demo/demo.tcl
    +
    wish demo/demo.tcl

    which will display the following window.

    --werror given

    EXAMPLES

    Create “out.png” (or “out.gif” if zint built without PNG support) in the current directory, as a Code 128 symbol.

    -
    zint -d 'This Text'
    -

    Create “qr.svg” in the current directory, as a QR Code symbol.

    zint -b QRCode -d 'This Text' -o 'qr.svg'
    +class="sourceCode bash">zint -d 'This Text' +

    Create “qr.svg” in the current directory, as a QR Code symbol.

    +
    zint -b QRCode -d 'This Text' -o 'qr.svg'

    Use batch mode to read from an input file “ean13nos.txt” containing a list of 13-digit GTINs, each on a separate line, to create a series of EAN-13 barcodes, formatting the output filenames to “ean001.gif”, “ean002.gif” etc. using the special character “~”.

    -
    zint -b EANX --batch -i 'ean13nos.txt' -o 'ean~~~.gif'
    +
    zint -b EANX --batch -i 'ean13nos.txt' -o 'ean~~~.gif'

    BUGS

    Please send bug reports to https://sourceforge.net/p/zint/tickets/.

    @@ -9590,13 +9615,17 @@ class="footnote-back" role="doc-backlink">↩︎

  • ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still recognised.↩︎

  • -
  • BARCODE_CODE128AB previously used the name -BARCODE_CODE128B, which is still recognised.

    The library libzueci, which can convert +both to and from UTF-8 and ECI, is available at https://sourceforge.net/projects/libzueci/.↩︎

  • -
  • The DX number may be looked up in The (Modified) Big +

  • BARCODE_CODE128AB previously used the name +BARCODE_CODE128B, which is still recognised.↩︎

  • +
  • The DX number may be looked up in The (Modified) Big Film Database at https://thebigfilmdatabase.merinorus.com.↩︎

  • +href="#fnref20" class="footnote-back" role="doc-backlink">↩︎

    diff --git a/docs/manual.pmd b/docs/manual.pmd index 96073ff7..8f2b4187 100644 --- a/docs/manual.pmd +++ b/docs/manual.pmd @@ -2482,8 +2482,8 @@ being the number of elements it contains. The zint_seg structure is of the form: ```c struct zint_seg { unsigned char *source; /* Data to encode */ - int length; /* Length of `source`. If 0, `source` must be - NUL-terminated */ + int length; /* Length of `source`. If 0 or negative, `source` + must be NUL-terminated */ int eci; /* Extended Channel Interpretation */ }; ``` @@ -2669,7 +2669,34 @@ if (cap & ZINT_CAP_ECI) { } ``` -## 5.16 Zint Version +## 5.16 UTF-8 to ECI convenience functions + +As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in +two helper functions (compatible with the `libzueci`[^18] functions +`zueci_utf8_to_eci()` and `zueci_dest_len_eci()`): + + +```c +int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length, + unsigned char dest[], int *p_dest_length); + +int ZBarcode_Dest_Len_ECI(int eci, const unsigned char *source, int length, + int *p_dest_length); +``` + +Call `ZBarcode_Dest_Len_ECI()` to get the size of buffer sufficient to +accommodate the conversion, then call `ZBarcode_UTF8_To_ECI()` with an +appropriately sized buffer to do the conversion. The final destination length, +returned in `p_dest_length`, may be smaller than the estimate given by +`ZBarcode_Dest_Len_ECI()`. If `length` is zero or less, `source` must be +NUL-terminated. The destination buffer is not NUL-terminated. The obsolete ECIs +0, 1 and 2 are supported. + +[^18]: The library `libzueci`, which can convert both to and from UTF-8 and ECI, +is available at [https://sourceforge.net/projects/libzueci/]( +https://sourceforge.net/projects/libzueci/). + +## 5.17 Zint Version Whether the Zint library linked to was built with PNG support may be determined with: @@ -3192,13 +3219,13 @@ alphanumerics) are not recommended. ![`zint -b CODE128AB -d "130170X178"`](images/code128ab.svg){.lin} It is sometimes advantageous to stop Code 128 from using Code Set C which -compresses numerical data. The `BARCODE_CODE128AB`[^18] variant (symbology 60) +compresses numerical data. The `BARCODE_CODE128AB`[^19] variant (symbology 60) suppresses Code Set C in favour of Code Sets A and B. Note that the special extra escapes mentioned above are not available for this variant (nor for any other). -[^18]: `BARCODE_CODE128AB` previously used the name `BARCODE_CODE128B`, which is +[^19]: `BARCODE_CODE128AB` previously used the name `BARCODE_CODE128B`, which is still recognised. #### 6.1.10.3 GS1-128 @@ -3790,7 +3817,7 @@ first and last digit are ignored, leaving a 4-digit DX Extract number in any case, which must be in the range 16 to 2047. The second format `"NNN-NN"` represents the DX Extract as two numbers separated by a dash (`-`), the first number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range -0 to 15).[^19] +0 to 15).[^20] The optional frame number is a number in the range 0 to 63, and may have a half frame indicator `"A"` appended. Special character sequences (with or without a @@ -3800,7 +3827,7 @@ number 62, `"K"` or `"00"` means frame number 63, and `"F"` means frame number A parity bit is automatically added by Zint. -[^19]: The DX number may be looked up in The (Modified) Big Film Database at +[^20]: The DX number may be looked up in The (Modified) Big Film Database at [https://thebigfilmdatabase.merinorus.com]( https://thebigfilmdatabase.merinorus.com). @@ -4796,7 +4823,7 @@ maximum of 128 digits and does not include a check digit. ## 7.1 License -Zint, libzint and Zint Barcode Studio are Copyright © 2025 Robin Stuart. All +Zint, `libzint` and Zint Barcode Studio are Copyright © 2025 Robin Stuart. All historical versions are distributed under the GNU General Public License version 3 or later. Versions 2.5 and later are released under a dual license: the encoding library is released under the BSD (3 clause) license whereas the GUI, diff --git a/docs/manual.txt b/docs/manual.txt index 6d4d3948..1e6776d2 100644 --- a/docs/manual.txt +++ b/docs/manual.txt @@ -72,7 +72,8 @@ February 2025 - 5.13 Scaling Helpers - 5.14 Verifying Symbology Availability - 5.15 Checking Symbology Capabilities - - 5.16 Zint Version + - 5.16 UTF-8 to ECI convenience functions + - 5.17 Zint Version - 6. Types of Symbology - 6.1 One-Dimensional Symbols - 6.1.1 Code 11 @@ -2434,8 +2435,8 @@ number of elements it contains. The zint_seg structure is of the form: struct zint_seg { unsigned char *source; /* Data to encode */ - int length; /* Length of `source`. If 0, `source` must be - NUL-terminated */ + int length; /* Length of `source`. If 0 or negative, `source` + must be NUL-terminated */ int eci; /* Extended Channel Interpretation */ }; @@ -2599,7 +2600,27 @@ For example: printf("PDF417 does not support ECI\n"); } -5.16 Zint Version +5.16 UTF-8 to ECI convenience functions + +As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in +two helper functions (compatible with the libzueci[18] functions +zueci_utf8_to_eci() and zueci_dest_len_eci()): + + int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length, + unsigned char dest[], int *p_dest_length); + + int ZBarcode_Dest_Len_ECI(int eci, const unsigned char *source, int length, + int *p_dest_length); + +Call ZBarcode_Dest_Len_ECI() to get the size of buffer sufficient to accommodate +the conversion, then call ZBarcode_UTF8_To_ECI() with an appropriately sized +buffer to do the conversion. The final destination length, returned in +p_dest_length, may be smaller than the estimate given by +ZBarcode_Dest_Len_ECI(). If length is zero or less, source must be +NUL-terminated. The destination buffer is not NUL-terminated. The obsolete ECIs +0, 1 and 2 are supported. + +5.17 Zint Version Whether the Zint library linked to was built with PNG support may be determined with: @@ -3069,7 +3090,7 @@ alphanumerics) are not recommended. [zint -b CODE128AB -d "130170X178"] It is sometimes advantageous to stop Code 128 from using Code Set C which -compresses numerical data. The BARCODE_CODE128AB[18] variant (symbology 60) +compresses numerical data. The BARCODE_CODE128AB[19] variant (symbology 60) suppresses Code Set C in favour of Code Sets A and B. Note that the special extra escapes mentioned above are not available for this @@ -3621,7 +3642,7 @@ first and last digit are ignored, leaving a 4-digit DX Extract number in any case, which must be in the range 16 to 2047. The second format "NNN-NN" represents the DX Extract as two numbers separated by a dash (-), the first number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range -0 to 15).[19] +0 to 15).[20] The optional frame number is a number in the range 0 to 63, and may have a half frame indicator "A" appended. Special character sequences (with or without a @@ -5656,8 +5677,11 @@ representable in HRT. [17] ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still recognised. -[18] BARCODE_CODE128AB previously used the name BARCODE_CODE128B, which is still +[18] The library libzueci, which can convert both to and from UTF-8 and ECI, is +available at https://sourceforge.net/projects/libzueci/. + +[19] BARCODE_CODE128AB previously used the name BARCODE_CODE128B, which is still recognised. -[19] The DX number may be looked up in The (Modified) Big Film Database at +[20] The DX number may be looked up in The (Modified) Big Film Database at https://thebigfilmdatabase.merinorus.com.