From 543696cb0674a5e574e36e2df74394d1cb707a40 Mon Sep 17 00:00:00 2001 From: gitlost Date: Thu, 16 Oct 2025 18:23:48 +0100 Subject: [PATCH] ECI: ECI 899 binary in `UNICODE_MODE` now converted from UTF-8, not treated literally as it was before, which was inconsistent (literal interpretation now requires `DATA_MODE`) --- ChangeLog | 6 +- backend/eci.c | 8 +- backend/tests/test_dotcode.c | 40 +++++----- backend/tests/test_eci.c | 150 +++++++++++++++++++++++++---------- backend/tests/test_ultra.c | 28 +++---- docs/manual.html | 109 +++++++++++++------------ docs/manual.pmd | 54 +++++++------ docs/manual.txt | 56 +++++++------ 8 files changed, 267 insertions(+), 184 deletions(-) diff --git a/ChangeLog b/ChangeLog index 29a2a944..01da6d05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,10 @@ -Version 2.15.0.9 (dev) not released yet (2025-09-16) +Version 2.15.0.9 (dev) not released yet (2025-10-16) ==================================================== **Incompatible changes** ------------------------ +- In `UNICODE_MODE`, ECI 899 Binary input now interpreted as UTF-8 (previously + treated as-is, i.e. as binary bytes - this now requires `DATA_MODE`) - Buffer length of member `errtxt` in `zint_symbol` extended 100 -> 160 (client buffers may need checking/extending) - New `raw_segs` & `raw_seg_count` fields in `zint_symbol` for use with new @@ -64,6 +66,8 @@ Bugs - CODABLOCKF: fix misencodation of extended ASCII 0xB0-0xB9 when followed by digit (ignore 2nd byte of FNC4 when categorizing Code C characters) - AZTEC: fix GS1 mode with Structured Append (wasn't outputting initial FNC1) +- ECI: ECI 899 in UNICODE_MODE wasn't being converted from UTF-8, which was + inconsistent - set_height: fix non-compliance false positives by using epsilon in checks - UPU_S10: fix Service Indicator warning re "H" (ticket #331, props Milton Neal) - CLI: fix `separator` check to use new `ZINT_CAP_BINDABLE` instead of diff --git a/backend/eci.c b/backend/eci.c index 65d7fd9e..f9a071a2 100644 --- a/backend/eci.c +++ b/backend/eci.c @@ -211,7 +211,7 @@ static int u_utf32le(const unsigned int u, unsigned char *dest) { /* NOLINTEND(clang-analyzer-security.ArrayBound) */ -/* ECI 899 Binary, included for libzueci compatibility - assumes valid Unicode */ +/* ECI 899 Binary - assumes valid Unicode */ static int u_binary(const unsigned int u, unsigned char *dest) { if (u <= 0xFF) { *dest = (unsigned char) u; @@ -681,8 +681,8 @@ static int chr_range_cnt(const unsigned char string[], const int length, const u /* Is ECI convertible from UTF-8? */ INTERNAL int zint_is_eci_convertible(const int eci) { - if (eci == 26 || (eci > 35 && eci != 170)) { /* Exclude ECI 170 - ASCII Invariant */ - /* UTF-8 (26) or 8-bit binary data (899) or undefined (> 35 and < 899) or not character set (> 899) */ + if (eci == 26 || (eci > 35 && eci != 170 && eci != 899)) { /* Exclude ECI 170 ASCII Invariant & ECI 899 Binary */ + /* UTF-8 (26) or undefined (> 35 and < 899) or not character set (> 899) */ return 0; } return 1; @@ -779,7 +779,7 @@ INTERNAL int zint_utf8_to_eci(const int eci, const unsigned char source[], unsig if (eci == 170) { /* ASCII Invariant (archaic subset) */ eci_func = u_ascii_inv; - } else if (eci == 899) { /* Binary, for libzueci compatibility */ + } else if (eci == 899) { /* Binary */ eci_func = u_binary; } else { eci_func = eci_funcs[eci]; diff --git a/backend/tests/test_dotcode.c b/backend/tests/test_dotcode.c index f27b28c1..cd8b60ce 100644 --- a/backend/tests/test_dotcode.c +++ b/backend/tests/test_dotcode.c @@ -1391,27 +1391,25 @@ static void test_encode_segs(const testCtx *const p_ctx) { "01010000010100010001010000010000010101" "10000010100000001010100000100010001010" }, - /* 7*/ { UNICODE_MODE, 29, -1, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU(""), 0, 0 } }, 0, 20, 29, 1, 0, "BIN_LATCH ECI > 0xFF; ZXing-C++ test can't handle UTF-8 binary", - "10001010001010101000000010001" - "01000001000100010100010101010" - "10000000100000100000000010101" - "00010100010001000101010000000" - "10001000000000001010101010101" - "00000100010100010000010101000" - "10001000101000001000100000000" - "01010101000101010101000101010" - "10101010101010000010001010000" - "00000001000001010101010001010" - "10001000000010100000101010101" - "01000001010100010101010100000" - "00000010101000101000000000101" - "01000101000001000100000000000" - "00100010000010100010100000101" - "01010000010101000101010100010" - "10100000100010000010001000001" - "01010101000000000100000001010" - "10101000101010000010001010001" - "01010101010100010001010001010" + /* 7*/ { UNICODE_MODE, 29, -1, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU(""), 0, 0 } }, 0, 18, 29, 1, 1, "BIN_LATCH ECI > 0xFF", + "10100000000000100010101000101" + "01010100010101010101000100000" + "10101000101010000000001010100" + "01010001000100010101010000010" + "10001010101010101010000000101" + "00010000010000000000010001010" + "10001000100000001000100000000" + "00000001010001000001010101000" + "10001010100010100010101010101" + "01010101000100000101000101000" + "10000010001010100000000000001" + "00000000010101000000010001000" + "00101010100000101010100010100" + "01010100000000010100000100010" + "00001000101010001000101010101" + "01000101010101010001000100010" + "10001000000010100000001000101" + "01000100010000010101010100010" }, /* 8*/ { UNICODE_MODE, 29, -1, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 65536 }, { TU(""), 0, 0 } }, 0, 22, 29, 1, 0, "BIN_LATCH ECI > 0xFFFF; ZXing-C++ test can't handle UTF-8 binary", "10101000100000101000001010001" diff --git a/backend/tests/test_eci.c b/backend/tests/test_eci.c index dd344e49..1fba1ece 100644 --- a/backend/tests/test_eci.c +++ b/backend/tests/test_eci.c @@ -792,8 +792,7 @@ static void test_utf8_to_eci_ascii(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - ret = zint_utf8_to_eci(data[i].eci, (const unsigned char *) data[i].data, (unsigned char *) dest, - &out_length); + ret = zint_utf8_to_eci(data[i].eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(length, out_length, "i:%d length %d != %d\n", i, length, out_length); @@ -845,10 +844,11 @@ static void test_utf8_to_eci_utf16be(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -913,10 +913,11 @@ static void test_utf8_to_eci_utf16le(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -978,10 +979,11 @@ static void test_utf8_to_eci_utf32be(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1045,10 +1047,11 @@ static void test_utf8_to_eci_utf32le(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1116,10 +1119,11 @@ static void test_utf8_to_eci_sjis(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1164,10 +1168,11 @@ static void test_utf8_to_eci_big5(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1212,10 +1217,11 @@ static void test_utf8_to_eci_gb2312(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1260,10 +1266,11 @@ static void test_utf8_to_eci_euc_kr(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1308,10 +1315,11 @@ static void test_utf8_to_eci_gbk(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1356,10 +1364,60 @@ static void test_utf8_to_eci_gb18030(const testCtx *const p_ctx) { length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); out_length = length; - eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length); + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); - assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length); - ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length); + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); + assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); + if (ret == 0) { + assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", + i, out_length, data[i].expected_length); + assert_nonzero(out_length <= eci_length, "i:%d out_length %d > eci_length %d\n", + i, out_length, eci_length); + } + } + + testFinish(); +} + +static void test_utf8_to_eci_binary(const testCtx *const p_ctx) { + + struct item { + const char *data; + int length; + int ret; + int expected_length; + }; + /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */ + struct item data[] = { + /* 0*/ { "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037", 32, 0, 32 }, + /* 1*/ { " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177", 96, 0, 96 }, + /* 2*/ { "\302\200\302\201\302\202\302\203\302\204\302\205\302\206\302\207\302\210\302\211\302\212\302\213\302\214\302\215\302\216\302\217", 32, 0, 16 }, + /* 3*/ { "\302\220\302\221\302\222\302\223\302\224\302\225\302\226\302\227\302\230\302\231\302\232\302\233\302\234\302\235\302\236\302\237", 32, 0, 16 }, + /* 4*/ { "\303\200\303\201\303\202\303\203\303\204\303\205\303\206\303\207\303\210\303\211\303\212\303\213\303\214\303\215\303\216\303\217", 32, 0, 16 }, + /* 5*/ { "\303\220\303\221\303\222\303\223\303\224\303\225\303\226\303\227\303\230\303\231\303\232\303\233\303\234\303\235\303\236\303\237", 32, 0, 16 }, + }; + int data_size = ARRAY_SIZE(data); + int i, length, ret; + const int eci = 899; + + char dest[128] = {0}; /* Suppress clang -fsanitize=memory false positive */ + + testStart(p_ctx->func_name); + + for (i = 0; i < data_size; i++) { + int out_length, eci_length; + + if (testContinue(p_ctx, i)) continue; + + length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data); + out_length = length; + eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length); + + assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length, + ARRAY_SIZE(dest)); + ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length); assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret); if (ret == 0) { assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n", @@ -1385,15 +1443,22 @@ static void test_is_eci_convertible_segs(const testCtx *const p_ctx) { /* 1*/ { { { TU("A"), -1, 26 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } }, /* 2*/ { { { TU("A"), -1, 36 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } }, /* 3*/ { { { TU("A"), -1, 170 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } }, - /* 4*/ { { { TU("A"), -1, 899 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } }, - /* 5*/ { { { TU("A"), -1, 3 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } }, - /* 6*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU(""), 0, 0 } }, 1, { 0, 1, -1 } }, - /* 7*/ { { { TU("A"), -1, 0 }, { TU("A"), -1, 899 }, { TU(""), 0, 0 } }, 1, { 1, 0, -1 } }, - /* 8*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 4 }, { TU("A"), -1, 35 } }, 1, { 1, 1, 1 } }, - /* 9*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 1, 0, 1 } }, - /* 10*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 0, 0, 1 } }, - /* 11*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU("A"), -1, 899 } }, 1, { 0, 1, 0 } }, - /* 12*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 899 } }, 0, { 0, 0, 0 } }, + /* 4*/ { { { TU("A"), -1, 899 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } }, + /* 5*/ { { { TU("A"), -1, 900 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } }, + /* 6*/ { { { TU("A"), -1, 3 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } }, + /* 7*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU(""), 0, 0 } }, 1, { 1, 1, -1 } }, + /* 8*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 0 }, { TU(""), 0, 0 } }, 1, { 0, 1, -1 } }, + /* 9*/ { { { TU("A"), -1, 0 }, { TU("A"), -1, 899 }, { TU(""), 0, 0 } }, 1, { 1, 1, -1 } }, + /* 10*/ { { { TU("A"), -1, 0 }, { TU("A"), -1, 900 }, { TU(""), 0, 0 } }, 1, { 1, 0, -1 } }, + /* 11*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 4 }, { TU("A"), -1, 35 } }, 1, { 1, 1, 1 } }, + /* 12*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 1, 1, 1 } }, + /* 13*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 900 }, { TU("A"), -1, 0 } }, 1, { 1, 0, 1 } }, + /* 14*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 1, 1, 1 } }, + /* 15*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 900 }, { TU("A"), -1, 0 } }, 1, { 0, 0, 1 } }, + /* 16*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU("A"), -1, 899 } }, 1, { 1, 1, 1 } }, + /* 17*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 0 }, { TU("A"), -1, 900 } }, 1, { 0, 1, 0 } }, + /* 18*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 899 } }, 1, { 1, 1, 1 } }, + /* 19*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 900 }, { TU("A"), -1, 900 } }, 0, { 0, 0, 0 } }, }; int data_size = ARRAY_SIZE(data); int i, j, seg_count, ret; @@ -1449,7 +1514,7 @@ static void test_get_best_eci(const testCtx *const p_ctx) { length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length; - ret = zint_get_best_eci((const unsigned char *) data[i].data, length); + ret = zint_get_best_eci(ZCUCP(data[i].data), length); assert_equal(ret, data[i].ret, "i:%d zint_get_best_eci ret %d != %d\n", i, ret, data[i].ret); } @@ -1522,6 +1587,7 @@ int main(int argc, char *argv[]) { { "test_utf8_to_eci_euc_kr", test_utf8_to_eci_euc_kr }, { "test_utf8_to_eci_gbk", test_utf8_to_eci_gbk }, { "test_utf8_to_eci_gb18030", test_utf8_to_eci_gb18030 }, + { "test_utf8_to_eci_binary", test_utf8_to_eci_binary }, { "test_is_eci_convertible_segs", test_is_eci_convertible_segs }, { "test_get_best_eci", test_get_best_eci }, { "test_get_best_eci_segs", test_get_best_eci_segs }, diff --git a/backend/tests/test_ultra.c b/backend/tests/test_ultra.c index 93fee1ca..0926ddb6 100644 --- a/backend/tests/test_ultra.c +++ b/backend/tests/test_ultra.c @@ -1031,20 +1031,20 @@ static void test_encode_segs(const testCtx *const p_ctx) { "78178133151513333118657" "77777777777777777777777" }, - /* 8*/ { UNICODE_MODE, -1, -1, ULTRA_COMPRESSION, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU("òóô"), -1, 10000 } }, 0, 13, 27, 0, "ECIs >= 899; BWIPP no ECI support for Ultracode", - "777777777777777777777777777" - "785786353555666665585335557" - "771783161616113513373663337" - "783786335335661355686335667" - "771785511666353666171656117" - "786781655535111113385163357" - "778787878787878787878787877" - "783781151511666355586355517" - "771785616353113666675666637" - "783781363635661511183311157" - "775786615561353366676566637" - "781785551653535633383633317" - "777777777777777777777777777" + /* 8*/ { UNICODE_MODE, -1, -1, ULTRA_COMPRESSION, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU("òóô"), -1, 10000 } }, 0, 13, 25, 0, "ECIs >= 899; BWIPP no ECI support for Ultracode", + "7777777777777777777777777" + "7857861351566666663855557" + "7767836515611351356766617" + "7857863163566135133811157" + "7717856531635366665766617" + "7837813613511111536833357" + "7787878787878787878787877" + "7867811531166636633855517" + "7757853615311363366733337" + "7837811163166151533856657" + "7757836335635336165761117" + "7817863151353563316833557" + "7777777777777777777777777" }, }; const int data_size = ARRAY_SIZE(data); diff --git a/docs/manual.html b/docs/manual.html index 53a47acf..ffcc0b86 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -2789,7 +2789,8 @@ role="doc-noteref">8 899 -8-bit binary data +8-bit binary data9 @@ -3365,8 +3366,8 @@ member memfile instead of to the output file }

will print the SVG output to stdout (the file "mem.svg" is not created). This is particularly useful for -the textual formats EPS and SVG,9 allowing the output to +the textual formats EPS and SVG,10 allowing the output to be manipulated and processed by the client.

5.7 Setting Options

So far our application is not very useful unless we plan to only make @@ -3405,8 +3406,8 @@ href="#specifying-a-symbology">5.9 Specifying a Symbology. height float Symbol height in X-dimensions, excluding -fixed width-to-height symbols.10 +fixed width-to-height symbols.11 Symbol dependent @@ -3481,8 +3482,8 @@ resulting barcode symbol to. Must end in .png, .gif, .bmp, .emf, .eps, .pcx, .svg, .tif or .txt followed by a terminating -NUL.11 +NUL.12 "out.png" @@ -3892,15 +3893,15 @@ value:

BARCODE_BIND_TOP Boundary bar above the symbol only.12 +href="#fn13" class="footnote-ref" id="fnref13" +role="doc-noteref">13 BARCODE_BIND Boundary bars above and below the symbol -and between rows if stacking multiple symbols.13 +and between rows if stacking multiple symbols.14 BARCODE_BOX @@ -3950,8 +3951,8 @@ Memory (raster). BARCODE_QUIET_ZONES Add compliant quiet zones (additional to -any specified whitespace).14 +any specified whitespace).15 BARCODE_NO_QUIET_ZONES @@ -4257,8 +4258,8 @@ stacked symbologies are not stackable. ZINT_CAP_EANUPC15 +href="#fn16" class="footnote-ref" id="fnref16" +role="doc-noteref">16 Is the symbology EAN/UPC? @@ -4343,8 +4344,8 @@ etc.) the option_1, option_2 and option_3 members will be set to the values used by Zint to create the barcode. This is useful for feedback if the values were left as defaults or were overridden by Zint.

-

In particular for symbologies that have masks,16 +

In particular for symbologies that have masks,17 option_3 will contain the mask used as (N + 1) << 8, N being the mask. Also Aztec Code will return the actual ECC percentage used in option_1 as @@ -4368,8 +4369,8 @@ members of zint_seg will be set accordingly - the unconverted data in source, the data length in length, and the character set the data was converted to in eci. Any check characters encoded will be included,17 and for GS1 data any +href="#fn18" class="footnote-ref" id="fnref18" +role="doc-noteref">18 and for GS1 data any FNC1 separators will be represented as GS (ASCII 29) characters. UPC-A and UPC-E data will be expanded to EAN-13, as will EAN-8 but only if it has an add-on (otherwise it will remain at @@ -4386,8 +4387,8 @@ member using the two helper functions discussed next.

convenience functions

As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in two helper functions (compatible with the -libzueci18 functions +libzueci19 functions zueci_utf8_to_eci() and zueci_dest_len_eci()):

int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length,
@@ -5061,8 +5062,8 @@ digits (60 alphanumerics) are not recommended.

Code 128 Suppress Code Set C (Code Sets A and B only)

It is sometimes advantageous to stop Code 128 from using Code Set C which compresses numerical data. The BARCODE_CODE128AB19 variant (symbology 60) suppresses +href="#fn20" class="footnote-ref" id="fnref20" +role="doc-noteref">20 variant (symbology 60) suppresses Code Set C in favour of Code Sets A and B.

"NNN-NN" represents the DX Extract as two numbers separated by a dash (-), the first number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range -0 to 15).20

+0 to 15).21

The optional frame number is a number in the range 0 to 63, and may have a half frame indicator "A" appended. Special character sequences (with or without a half frame indicator appended) may also be @@ -9731,50 +9732,56 @@ characters undefined: #, $, @, `, {, |, }, ~ (tilde).↩︎

-
  • BARCODE_MEMORY_FILE textual formats EPS and SVG will -have Unix newlines (LF) on both Windows and Unix, i.e. not CR+LF on -Windows.

    Note that unless the --binary switch is +used, 8-bit binary data for ECI 899 must be given as UTF-8, e.g. a byte +"\x80" must be represented as the 2 bytes +"\xC2\x80"; similarly "\xC0" as +"\xC3\x80", etc.↩︎

  • -
  • The height value is ignored for Aztec +

  • BARCODE_MEMORY_FILE textual formats EPS and SVG will +have Unix newlines (LF) on both Windows and Unix, i.e. not CR+LF on +Windows.↩︎

  • +
  • The height value is ignored for Aztec (including HIBC and Aztec Rune), Code One, Data Matrix (including HIBC), DotCode, Grid Matrix, Han Xin, MaxiCode, QR Code (including HIBC, Micro QR, rMQR and UPNQR), and Ultracode - all of which have a fixed width-to-height ratio (or, in the case of Code One, a fixed height).↩︎

  • -
  • For Windows, outfile is assumed to be -UTF-8 encoded.↩︎

  • +
  • For Windows, outfile is assumed to be +UTF-8 encoded.↩︎

  • -
  • The BARCODE_BIND_TOP flag is set by +

  • The BARCODE_BIND_TOP flag is set by default for DPD - see 6.1.10.7 DPD Code.↩︎

  • -
  • The BARCODE_BIND flag is always set for +href="#fnref13" class="footnote-back" role="doc-backlink">↩︎

  • +
  • The BARCODE_BIND flag is always set for Codablock-F, Code 16K and Code 49. Special considerations apply to -ITF-14 - see 6.1.2.6 ITF-14.6.1.2.6 ITF-14.↩︎

  • -
  • Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC +

  • Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN, ITF-14, UPC-A and UPC-E have compliant quiet zones added -by default.↩︎

  • -
  • ZINT_CAP_EANUPC was previously named +

  • ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still recognised.↩︎

  • -
  • DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have -variable masks. Rectangular Micro QR Code has a fixed mask (4).↩︎

  • -
  • Except for Japanese Postal Code, whose check character -is not truly representable in the encoded data.

    DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have +variable masks. Rectangular Micro QR Code has a fixed mask (4).↩︎

  • +
  • Except for Japanese Postal Code, whose check character +is not truly representable in the encoded data.↩︎

  • -
  • The library libzueci, which can convert +

  • The library libzueci, which can convert both to and from UTF-8 and ECI, is available at https://sourceforge.net/projects/libzueci/.↩︎

  • -
  • BARCODE_CODE128AB previously used the name -BARCODE_CODE128B, which is still recognised.↩︎

  • -
  • The DX number may be looked up in The (Modified) Big +

  • BARCODE_CODE128AB previously used the name +BARCODE_CODE128B, which is still recognised.↩︎

  • +
  • The DX number may be looked up in The (Modified) Big Film Database at https://thebigfilmdatabase.merinorus.com.↩︎

  • +href="#fnref21" class="footnote-back" role="doc-backlink">↩︎

    diff --git a/docs/manual.pmd b/docs/manual.pmd index 3356e38a..56a90513 100644 --- a/docs/manual.pmd +++ b/docs/manual.pmd @@ -1366,13 +1366,17 @@ ECI Code Character Encoding Scheme (ISO/IEC 8859 schemes include ASCII) 34 UTF-32BE (High order bytes first) 35 UTF-32LE (Low order bytes first) 170 ISO/IEC 646 Invariant[^8] -899 8-bit binary data +899 8-bit binary data[^9] Table: ECI Codes {#tbl:eci_codes} [^8]: ISO/IEC 646 Invariant is a subset of ASCII with 12 characters undefined: `#`, `$`, `@`, `[`, `\`, `]`, `^`, `` ` ``, `{`, `|`, `}`, `~` (tilde). +[^9]: Note that unless the `--binary` switch is used, 8-bit binary data for ECI +899 must be given as UTF-8, e.g. a byte `"\x80"` must be represented as the 2 +bytes `"\xC2\x80"`; similarly `"\xC0"` as `"\xC3\x80"`, etc. + An ECI value of 0 does not encode any ECI information in the code symbol (unless the data contains non-default character set characters). In this case, the default character set applies (see [#tbl:default_character_sets] above). @@ -1928,10 +1932,10 @@ int main(int argc, char **argv) ``` will print the SVG output to `stdout` (the file `"mem.svg"` is not created). -This is particularly useful for the textual formats EPS and SVG,[^9] allowing +This is particularly useful for the textual formats EPS and SVG,[^10] allowing the output to be manipulated and processed by the client. -[^9]: BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines +[^10]: BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines (LF) on both Windows and Unix, i.e. not CR+LF on Windows. ## 5.7 Setting Options @@ -1952,7 +1956,7 @@ Member Name Type Meaning Default Value `height` float Symbol height in Symbol dependent X-dimensions, excluding fixed width-to-height - symbols.[^10] + symbols.[^11] `scale` float Scale factor for 1.0 adjusting size of image @@ -2002,7 +2006,7 @@ Member Name Type Meaning Default Value `.eps`, `.pcx`, `.svg`, `.tif` or `.txt` followed by a terminating - `NUL`.[^11] + `NUL`.[^12] `primary` character Primary message data for `""` (empty) string more complex symbols, @@ -2129,13 +2133,13 @@ Member Name Type Meaning Default Value Table: API Structure `zint_symbol` {#tbl:api_structure_zint_symbol} -[^10]: The `height` value is ignored for Aztec (including HIBC and Aztec Rune), +[^11]: The `height` value is ignored for Aztec (including HIBC and Aztec Rune), Code One, Data Matrix (including HIBC), DotCode, Grid Matrix, Han Xin, MaxiCode, QR Code (including HIBC, Micro QR, rMQR and UPNQR), and Ultracode - all of which have a fixed width-to-height ratio (or, in the case of Code One, a fixed height). -[^11]: For Windows, `outfile` is assumed to be UTF-8 encoded. +[^12]: For Windows, `outfile` is assumed to be UTF-8 encoded. To alter these values use the syntax shown in the example below. This code has the same result as the previous example except the output is now taller and @@ -2301,10 +2305,10 @@ Value Effect ------------------------- --------------------------------------------------- 0 No options selected. -`BARCODE_BIND_TOP` Boundary bar above the symbol only.[^12] +`BARCODE_BIND_TOP` Boundary bar above the symbol only.[^13] `BARCODE_BIND` Boundary bars above and below the symbol and - between rows if stacking multiple symbols.[^13] + between rows if stacking multiple symbols.[^14] `BARCODE_BOX` Add a box surrounding the symbol and whitespace. @@ -2331,7 +2335,7 @@ Value Effect Symbols in Memory (raster)]. `BARCODE_QUIET_ZONES` Add compliant quiet zones (additional to any - specified whitespace).[^14] + specified whitespace).[^15] `BARCODE_NO_QUIET_ZONES` Disable quiet zones, notably those with defaults. @@ -2353,13 +2357,13 @@ Value Effect Table: API `output_options` Values {#tbl:api_output_options} -[^12]: The `BARCODE_BIND_TOP` flag is set by default for DPD - see [6.1.10.7 DPD +[^13]: The `BARCODE_BIND_TOP` flag is set by default for DPD - see [6.1.10.7 DPD Code]. -[^13]: The `BARCODE_BIND` flag is always set for Codablock-F, Code 16K and Code +[^14]: The `BARCODE_BIND` flag is always set for Codablock-F, Code 16K and Code 49. Special considerations apply to ITF-14 - see [6.1.2.6 ITF-14]. -[^14]: Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN, +[^15]: Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN, ITF-14, UPC-A and UPC-E have compliant quiet zones added by default. ## 5.11 Setting the Input Mode @@ -2625,7 +2629,7 @@ Value Meaning `ZINT_CAP_STACKABLE` Is the symbology stackable? Note that stacked symbologies are not stackable. -`ZINT_CAP_EANUPC`[^15] Is the symbology EAN/UPC? +`ZINT_CAP_EANUPC`[^16] Is the symbology EAN/UPC? `ZINT_CAP_COMPOSITE` Does the symbology support composite data? (see [6.3 GS1 Composite Symbols (ISO 24723)] below) @@ -2661,7 +2665,7 @@ Value Meaning Table: API Capability Flags {#tbl:api_cap} -[^15]: `ZINT_CAP_EANUPC` was previously named `ZINT_CAP_EXTENDABLE`, which is +[^16]: `ZINT_CAP_EANUPC` was previously named `ZINT_CAP_EXTENDABLE`, which is still recognised. For example: @@ -2688,7 +2692,7 @@ On successful encodation (after using `ZBarcode_Encode()` etc.) the `option_1`, create the barcode. This is useful for feedback if the values were left as defaults or were overridden by Zint. -In particular for symbologies that have masks,[^16] `option_3` will contain the +In particular for symbologies that have masks,[^17] `option_3` will contain the mask used as `(N + 1) << 8`, N being the mask. Also Aztec Code will return the actual ECC percentage used in `option_1` as `P << 8`, where P is the integer percentage, the low byte containing the values given in [#tbl:aztec_eccs] (with @@ -2705,7 +2709,7 @@ being set in `raw_seg_count` - which will always be at least one. The `source`, `length` and `eci` members of `zint_seg` will be set accordingly - the unconverted data in `source`, the data length in `length`, and the character set the data was converted to in `eci`. Any check characters encoded will be -included,[^17] and for GS1 data any `FNC1` separators will be represented as +included,[^18] and for GS1 data any `FNC1` separators will be represented as `GS` (ASCII 29) characters. UPC-A and UPC-E data will be expanded to EAN-13, as will EAN-8 but only if it has an add-on (otherwise it will remain at 8 digits), and any add-ons will follow the 13 digits directly (no separator). GS1 Composite @@ -2717,16 +2721,16 @@ is `DATA_MODE`, it remains in binary; otherwise it will be in UTF-8. The UTF-8 source may be converted to the character set of the corresponding `eci` member using the two helper functions discussed next. -[^16]: DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks. +[^17]: DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks. Rectangular Micro QR Code has a fixed mask (4). -[^17]: Except for Japanese Postal Code, whose check character is not truly +[^18]: Except for Japanese Postal Code, whose check character is not truly representable in the encoded data. ## 5.17 UTF-8 to ECI convenience functions As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in -two helper functions (compatible with the `libzueci`[^18] functions +two helper functions (compatible with the `libzueci`[^19] functions `zueci_utf8_to_eci()` and `zueci_dest_len_eci()`): @@ -2746,7 +2750,7 @@ returned in `p_dest_length`, may be smaller than the estimate given by NUL-terminated. The destination buffer is not NUL-terminated. The obsolete ECIs 0, 1 and 2 are supported. -[^18]: The library `libzueci`, which can convert both to and from UTF-8 and ECI, +[^19]: The library `libzueci`, which can convert both to and from UTF-8 and ECI, is available at [https://sourceforge.net/projects/libzueci/]( https://sourceforge.net/projects/libzueci/). @@ -3351,7 +3355,7 @@ alphanumerics) are not recommended. #### 6.1.10.2 Code 128 Suppress Code Set C (Code Sets A and B only) It is sometimes advantageous to stop Code 128 from using Code Set C which -compresses numerical data. The `BARCODE_CODE128AB`[^19] variant (symbology 60) +compresses numerical data. The `BARCODE_CODE128AB`[^20] variant (symbology 60) suppresses Code Set C in favour of Code Sets A and B. ![`zint -b CODE128AB -d "130170X178"`](images/code128ab.svg){.lin} @@ -3359,7 +3363,7 @@ suppresses Code Set C in favour of Code Sets A and B. Note that the special extra escapes mentioned above are not available for this variant (nor for any other). -[^19]: `BARCODE_CODE128AB` previously used the name `BARCODE_CODE128B`, which is +[^20]: `BARCODE_CODE128AB` previously used the name `BARCODE_CODE128B`, which is still recognised. #### 6.1.10.3 GS1-128 @@ -3965,7 +3969,7 @@ first and last digit are ignored, leaving a 4-digit DX Extract number in any case, which must be in the range 16 to 2047. The second format `"NNN-NN"` represents the DX Extract as two numbers separated by a dash (`-`), the first number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range -0 to 15).[^20] +0 to 15).[^21] The optional frame number is a number in the range 0 to 63, and may have a half frame indicator `"A"` appended. Special character sequences (with or without a @@ -3975,7 +3979,7 @@ number 62, `"K"` or `"00"` means frame number 63, and `"F"` means frame number A parity bit is automatically added by Zint. -[^20]: The DX number may be looked up in The (Modified) Big Film Database at +[^21]: The DX number may be looked up in The (Modified) Big Film Database at [https://thebigfilmdatabase.merinorus.com]( https://thebigfilmdatabase.merinorus.com). diff --git a/docs/manual.txt b/docs/manual.txt index 86381bf5..05ef6251 100644 --- a/docs/manual.txt +++ b/docs/manual.txt @@ -1444,7 +1444,7 @@ formatted. Zint automatically translates the data into the target encoding. 34 UTF-32BE (High order bytes first) 35 UTF-32LE (Low order bytes first) 170 ISO/IEC 646 Invariant[8] - 899 8-bit binary data + 899 8-bit binary data[9] Table 8: ECI Codes @@ -1950,7 +1950,7 @@ the buffer is given in memfile_size. For instance: } will print the SVG output to stdout (the file "mem.svg" is not created). This is -particularly useful for the textual formats EPS and SVG,[9] allowing the output +particularly useful for the textual formats EPS and SVG,[10] allowing the output to be manipulated and processed by the client. 5.7 Setting Options @@ -1971,7 +1971,7 @@ the following members: height float Symbol height in Symbol dependent X-dimensions, excluding fixed width-to-height - symbols.[10] + symbols.[11] scale float Scale factor for adjusting 1.0 size of image (sets @@ -2018,7 +2018,7 @@ the following members: end in .png, .gif, .bmp, .emf, .eps, .pcx, .svg, .tif or .txt followed by a - terminating NUL.[11] + terminating NUL.[12] primary character Primary message data for "" (empty) string more complex symbols, with @@ -2293,10 +2293,10 @@ together when adjusting this value: -------------------------- --------------------------------------------------- 0 No options selected. - BARCODE_BIND_TOP Boundary bar above the symbol only.[12] + BARCODE_BIND_TOP Boundary bar above the symbol only.[13] BARCODE_BIND Boundary bars above and below the symbol and - between rows if stacking multiple symbols.[13] + between rows if stacking multiple symbols.[14] BARCODE_BOX Add a box surrounding the symbol and whitespace. @@ -2323,7 +2323,7 @@ together when adjusting this value: Symbols in Memory (raster). BARCODE_QUIET_ZONES Add compliant quiet zones (additional to any - specified whitespace).[14] + specified whitespace).[15] BARCODE_NO_QUIET_ZONES Disable quiet zones, notably those with defaults. @@ -2581,7 +2581,7 @@ see which are set. ZINT_CAP_STACKABLE Is the symbology stackable? Note that stacked symbologies are not stackable. - ZINT_CAP_EANUPC[15] Is the symbology EAN/UPC? + ZINT_CAP_EANUPC[16] Is the symbology EAN/UPC? ZINT_CAP_COMPOSITE Does the symbology support composite data? (see 6.3 GS1 Composite Symbols (ISO 24723) below) @@ -2639,7 +2639,7 @@ option_2 and option_3 members will be set to the values used by Zint to create the barcode. This is useful for feedback if the values were left as defaults or were overridden by Zint. -In particular for symbologies that have masks,[16] option_3 will contain the +In particular for symbologies that have masks,[17] option_3 will contain the mask used as (N + 1) << 8, N being the mask. Also Aztec Code will return the actual ECC percentage used in option_1 as P << 8, where P is the integer percentage, the low byte containing the values given in Table 39: Aztec Code @@ -2655,7 +2655,7 @@ in raw_seg_count - which will always be at least one. The source, length and eci members of zint_seg will be set accordingly - the unconverted data in source, the data length in length, and the character set the -data was converted to in eci. Any check characters encoded will be included,[17] +data was converted to in eci. Any check characters encoded will be included,[18] and for GS1 data any FNC1 separators will be represented as GS (ASCII 29) characters. UPC-A and UPC-E data will be expanded to EAN-13, as will EAN-8 but only if it has an add-on (otherwise it will remain at 8 digits), and any add-ons @@ -2671,7 +2671,7 @@ two helper functions discussed next. 5.17 UTF-8 to ECI convenience functions As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in -two helper functions (compatible with the libzueci[18] functions +two helper functions (compatible with the libzueci[19] functions zueci_utf8_to_eci() and zueci_dest_len_eci()): int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length, @@ -3230,7 +3230,7 @@ alphanumerics) are not recommended. 6.1.10.2 Code 128 Suppress Code Set C (Code Sets A and B only) It is sometimes advantageous to stop Code 128 from using Code Set C which -compresses numerical data. The BARCODE_CODE128AB[19] variant (symbology 60) +compresses numerical data. The BARCODE_CODE128AB[20] variant (symbology 60) suppresses Code Set C in favour of Code Sets A and B. [zint -b CODE128AB -d "130170X178"] @@ -3803,7 +3803,7 @@ first and last digit are ignored, leaving a 4-digit DX Extract number in any case, which must be in the range 16 to 2047. The second format "NNN-NN" represents the DX Extract as two numbers separated by a dash (-), the first number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range -0 to 15).[20] +0 to 15).[21] The optional frame number is a number in the range 0 to 63, and may have a half frame indicator "A" appended. Special character sequences (with or without a @@ -5851,40 +5851,44 @@ the yen sign (¥), and tilde (~) to overline (U+203E). [8] ISO/IEC 646 Invariant is a subset of ASCII with 12 characters undefined: #, $, @, [, \, ], ^, `, {, |, }, ~ (tilde). -[9] BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines (LF) -on both Windows and Unix, i.e. not CR+LF on Windows. +[9] Note that unless the --binary switch is used, 8-bit binary data for ECI 899 +must be given as UTF-8, e.g. a byte "\x80" must be represented as the 2 bytes +"\xC2\x80"; similarly "\xC0" as "\xC3\x80", etc. -[10] The height value is ignored for Aztec (including HIBC and Aztec Rune), Code +[10] BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines +(LF) on both Windows and Unix, i.e. not CR+LF on Windows. + +[11] The height value is ignored for Aztec (including HIBC and Aztec Rune), Code One, Data Matrix (including HIBC), DotCode, Grid Matrix, Han Xin, MaxiCode, QR Code (including HIBC, Micro QR, rMQR and UPNQR), and Ultracode - all of which have a fixed width-to-height ratio (or, in the case of Code One, a fixed height). -[11] For Windows, outfile is assumed to be UTF-8 encoded. +[12] For Windows, outfile is assumed to be UTF-8 encoded. -[12] The BARCODE_BIND_TOP flag is set by default for DPD - see 6.1.10.7 DPD +[13] The BARCODE_BIND_TOP flag is set by default for DPD - see 6.1.10.7 DPD Code. -[13] The BARCODE_BIND flag is always set for Codablock-F, Code 16K and Code 49. +[14] The BARCODE_BIND flag is always set for Codablock-F, Code 16K and Code 49. Special considerations apply to ITF-14 - see 6.1.2.6 ITF-14. -[14] Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN, +[15] Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN, ITF-14, UPC-A and UPC-E have compliant quiet zones added by default. -[15] ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still +[16] ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still recognised. -[16] DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks. +[17] DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks. Rectangular Micro QR Code has a fixed mask (4). -[17] Except for Japanese Postal Code, whose check character is not truly +[18] Except for Japanese Postal Code, whose check character is not truly representable in the encoded data. -[18] The library libzueci, which can convert both to and from UTF-8 and ECI, is +[19] The library libzueci, which can convert both to and from UTF-8 and ECI, is available at https://sourceforge.net/projects/libzueci/. -[19] BARCODE_CODE128AB previously used the name BARCODE_CODE128B, which is still +[20] BARCODE_CODE128AB previously used the name BARCODE_CODE128B, which is still recognised. -[20] The DX number may be looked up in The (Modified) Big Film Database at +[21] The DX number may be looked up in The (Modified) Big Film Database at https://thebigfilmdatabase.merinorus.com.