From 543696cb0674a5e574e36e2df74394d1cb707a40 Mon Sep 17 00:00:00 2001
From: gitlost <burmartke@gmail.com>
Date: Thu, 16 Oct 2025 18:23:48 +0100
Subject: [PATCH] ECI: ECI 899 binary in `UNICODE_MODE` now converted from
 UTF-8,   not treated literally as it was before, which was inconsistent  
 (literal interpretation now requires `DATA_MODE`)

---
 ChangeLog                    |   6 +-
 backend/eci.c                |   8 +-
 backend/tests/test_dotcode.c |  40 +++++-----
 backend/tests/test_eci.c     | 150 +++++++++++++++++++++++++----------
 backend/tests/test_ultra.c   |  28 +++----
 docs/manual.html             | 109 +++++++++++++------------
 docs/manual.pmd              |  54 +++++++------
 docs/manual.txt              |  56 +++++++------
 8 files changed, 267 insertions(+), 184 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 29a2a944..01da6d05 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,10 @@
-Version 2.15.0.9 (dev) not released yet (2025-09-16)
+Version 2.15.0.9 (dev) not released yet (2025-10-16)
 ====================================================
 
 **Incompatible changes**
 ------------------------
+- In `UNICODE_MODE`, ECI 899 Binary input now interpreted as UTF-8 (previously
+  treated as-is, i.e. as binary bytes - this now requires `DATA_MODE`)
 - Buffer length of member `errtxt` in `zint_symbol` extended 100 -> 160
   (client buffers may need checking/extending)
 - New `raw_segs` & `raw_seg_count` fields in `zint_symbol` for use with new
@@ -64,6 +66,8 @@ Bugs
 - CODABLOCKF: fix misencodation of extended ASCII 0xB0-0xB9 when followed by
   digit (ignore 2nd byte of FNC4 when categorizing Code C characters)
 - AZTEC: fix GS1 mode with Structured Append (wasn't outputting initial FNC1)
+- ECI: ECI 899 in UNICODE_MODE wasn't being converted from UTF-8, which was
+  inconsistent
 - set_height: fix non-compliance false positives by using epsilon in checks
 - UPU_S10: fix Service Indicator warning re "H" (ticket #331, props Milton Neal)
 - CLI: fix `separator` check to use new `ZINT_CAP_BINDABLE` instead of
diff --git a/backend/eci.c b/backend/eci.c
index 65d7fd9e..f9a071a2 100644
--- a/backend/eci.c
+++ b/backend/eci.c
@@ -211,7 +211,7 @@ static int u_utf32le(const unsigned int u, unsigned char *dest) {
 
 /* NOLINTEND(clang-analyzer-security.ArrayBound) */
 
-/* ECI 899 Binary, included for libzueci compatibility - assumes valid Unicode */
+/* ECI 899 Binary - assumes valid Unicode */
 static int u_binary(const unsigned int u, unsigned char *dest) {
     if (u <= 0xFF) {
         *dest = (unsigned char) u;
@@ -681,8 +681,8 @@ static int chr_range_cnt(const unsigned char string[], const int length, const u
 
 /* Is ECI convertible from UTF-8? */
 INTERNAL int zint_is_eci_convertible(const int eci) {
-    if (eci == 26 || (eci > 35 && eci != 170)) { /* Exclude ECI 170 - ASCII Invariant */
-        /* UTF-8 (26) or 8-bit binary data (899) or undefined (> 35 and < 899) or not character set (> 899) */
+    if (eci == 26 || (eci > 35 && eci != 170 && eci != 899)) { /* Exclude ECI 170 ASCII Invariant & ECI 899 Binary */
+        /* UTF-8 (26) or undefined (> 35 and < 899) or not character set (> 899) */
         return 0;
     }
     return 1;
@@ -779,7 +779,7 @@ INTERNAL int zint_utf8_to_eci(const int eci, const unsigned char source[], unsig
 
     if (eci == 170) { /* ASCII Invariant (archaic subset) */
         eci_func = u_ascii_inv;
-    } else if (eci == 899) { /* Binary, for libzueci compatibility */
+    } else if (eci == 899) { /* Binary */
         eci_func = u_binary;
     } else {
         eci_func = eci_funcs[eci];
diff --git a/backend/tests/test_dotcode.c b/backend/tests/test_dotcode.c
index f27b28c1..cd8b60ce 100644
--- a/backend/tests/test_dotcode.c
+++ b/backend/tests/test_dotcode.c
@@ -1391,27 +1391,25 @@ static void test_encode_segs(const testCtx *const p_ctx) {
                     "01010000010100010001010000010000010101"
                     "10000010100000001010100000100010001010"
                 },
-        /*  7*/ { UNICODE_MODE, 29, -1, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU(""), 0, 0 } }, 0, 20, 29, 1, 0, "BIN_LATCH ECI > 0xFF; ZXing-C++ test can't handle UTF-8 binary",
-                    "10001010001010101000000010001"
-                    "01000001000100010100010101010"
-                    "10000000100000100000000010101"
-                    "00010100010001000101010000000"
-                    "10001000000000001010101010101"
-                    "00000100010100010000010101000"
-                    "10001000101000001000100000000"
-                    "01010101000101010101000101010"
-                    "10101010101010000010001010000"
-                    "00000001000001010101010001010"
-                    "10001000000010100000101010101"
-                    "01000001010100010101010100000"
-                    "00000010101000101000000000101"
-                    "01000101000001000100000000000"
-                    "00100010000010100010100000101"
-                    "01010000010101000101010100010"
-                    "10100000100010000010001000001"
-                    "01010101000000000100000001010"
-                    "10101000101010000010001010001"
-                    "01010101010100010001010001010"
+        /*  7*/ { UNICODE_MODE, 29, -1, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU(""), 0, 0 } }, 0, 18, 29, 1, 1, "BIN_LATCH ECI > 0xFF",
+                    "10100000000000100010101000101"
+                    "01010100010101010101000100000"
+                    "10101000101010000000001010100"
+                    "01010001000100010101010000010"
+                    "10001010101010101010000000101"
+                    "00010000010000000000010001010"
+                    "10001000100000001000100000000"
+                    "00000001010001000001010101000"
+                    "10001010100010100010101010101"
+                    "01010101000100000101000101000"
+                    "10000010001010100000000000001"
+                    "00000000010101000000010001000"
+                    "00101010100000101010100010100"
+                    "01010100000000010100000100010"
+                    "00001000101010001000101010101"
+                    "01000101010101010001000100010"
+                    "10001000000010100000001000101"
+                    "01000100010000010101010100010"
                 },
         /*  8*/ { UNICODE_MODE, 29, -1, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 65536 }, { TU(""), 0, 0 } }, 0, 22, 29, 1, 0, "BIN_LATCH ECI > 0xFFFF; ZXing-C++ test can't handle UTF-8 binary",
                     "10101000100000101000001010001"
diff --git a/backend/tests/test_eci.c b/backend/tests/test_eci.c
index dd344e49..1fba1ece 100644
--- a/backend/tests/test_eci.c
+++ b/backend/tests/test_eci.c
@@ -792,8 +792,7 @@ static void test_utf8_to_eci_ascii(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        ret = zint_utf8_to_eci(data[i].eci, (const unsigned char *) data[i].data, (unsigned char *) dest,
-                                &out_length);
+        ret = zint_utf8_to_eci(data[i].eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(length, out_length, "i:%d length %d != %d\n", i, length, out_length);
@@ -845,10 +844,11 @@ static void test_utf8_to_eci_utf16be(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -913,10 +913,11 @@ static void test_utf8_to_eci_utf16le(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -978,10 +979,11 @@ static void test_utf8_to_eci_utf32be(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1045,10 +1047,11 @@ static void test_utf8_to_eci_utf32le(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1116,10 +1119,11 @@ static void test_utf8_to_eci_sjis(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1164,10 +1168,11 @@ static void test_utf8_to_eci_big5(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1212,10 +1217,11 @@ static void test_utf8_to_eci_gb2312(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1260,10 +1266,11 @@ static void test_utf8_to_eci_euc_kr(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1308,10 +1315,11 @@ static void test_utf8_to_eci_gbk(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1356,10 +1364,60 @@ static void test_utf8_to_eci_gb18030(const testCtx *const p_ctx) {
 
         length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
         out_length = length;
-        eci_length = zint_get_eci_length(eci, (const unsigned char *) data[i].data, length);
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
 
-        assert_nonzero(eci_length + 1 <= 1024, "i:%d eci_length %d + 1 > 1024\n", i, eci_length);
-        ret = zint_utf8_to_eci(eci, (const unsigned char *) data[i].data, (unsigned char *) dest, &out_length);
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
+        assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
+        if (ret == 0) {
+            assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
+                        i, out_length, data[i].expected_length);
+            assert_nonzero(out_length <= eci_length, "i:%d out_length %d > eci_length %d\n",
+                        i, out_length, eci_length);
+        }
+    }
+
+    testFinish();
+}
+
+static void test_utf8_to_eci_binary(const testCtx *const p_ctx) {
+
+    struct item {
+        const char *data;
+        int length;
+        int ret;
+        int expected_length;
+    };
+    /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */
+    struct item data[] = {
+        /*  0*/ { "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037", 32, 0, 32 },
+        /*  1*/ { " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177", 96, 0, 96 },
+        /*  2*/ { "\302\200\302\201\302\202\302\203\302\204\302\205\302\206\302\207\302\210\302\211\302\212\302\213\302\214\302\215\302\216\302\217", 32, 0, 16 },
+        /*  3*/ { "\302\220\302\221\302\222\302\223\302\224\302\225\302\226\302\227\302\230\302\231\302\232\302\233\302\234\302\235\302\236\302\237", 32, 0, 16 },
+        /*  4*/ { "\303\200\303\201\303\202\303\203\303\204\303\205\303\206\303\207\303\210\303\211\303\212\303\213\303\214\303\215\303\216\303\217", 32, 0, 16 },
+        /*  5*/ { "\303\220\303\221\303\222\303\223\303\224\303\225\303\226\303\227\303\230\303\231\303\232\303\233\303\234\303\235\303\236\303\237", 32, 0, 16 },
+    };
+    int data_size = ARRAY_SIZE(data);
+    int i, length, ret;
+    const int eci = 899;
+
+    char dest[128] = {0}; /* Suppress clang -fsanitize=memory false positive */
+
+    testStart(p_ctx->func_name);
+
+    for (i = 0; i < data_size; i++) {
+        int out_length, eci_length;
+
+        if (testContinue(p_ctx, i)) continue;
+
+        length = data[i].length != -1 ? data[i].length : (int) strlen(data[i].data);
+        out_length = length;
+        eci_length = zint_get_eci_length(eci, ZCUCP(data[i].data), length);
+
+        assert_nonzero(eci_length + 1 <= ARRAY_SIZE(dest), "i:%d eci_length %d + 1 > %d\n", i, eci_length,
+                        ARRAY_SIZE(dest));
+        ret = zint_utf8_to_eci(eci, ZCUCP(data[i].data), ZUCP(dest), &out_length);
         assert_equal(ret, data[i].ret, "i:%d zint_utf8_to_eci ret %d != %d\n", i, ret, data[i].ret);
         if (ret == 0) {
             assert_equal(out_length, data[i].expected_length, "i:%d length %d != %d\n",
@@ -1385,15 +1443,22 @@ static void test_is_eci_convertible_segs(const testCtx *const p_ctx) {
         /*  1*/ { { { TU("A"), -1, 26 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } },
         /*  2*/ { { { TU("A"), -1, 36 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } },
         /*  3*/ { { { TU("A"), -1, 170 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } },
-        /*  4*/ { { { TU("A"), -1, 899 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } },
-        /*  5*/ { { { TU("A"), -1, 3 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } },
-        /*  6*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU(""), 0, 0 } }, 1, { 0, 1, -1 } },
-        /*  7*/ { { { TU("A"), -1, 0 }, { TU("A"), -1, 899 }, { TU(""), 0, 0 } }, 1, { 1, 0, -1 } },
-        /*  8*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 4 }, { TU("A"), -1, 35 } }, 1, { 1, 1, 1 } },
-        /*  9*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 1, 0, 1 } },
-        /* 10*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 0, 0, 1 } },
-        /* 11*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU("A"), -1, 899 } }, 1, { 0, 1, 0 } },
-        /* 12*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 899 } }, 0, { 0, 0, 0 } },
+        /*  4*/ { { { TU("A"), -1, 899 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } },
+        /*  5*/ { { { TU("A"), -1, 900 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 0, { 0, -1, -1 } },
+        /*  6*/ { { { TU("A"), -1, 3 }, { TU(""), 0, 0 }, { TU(""), 0, 0 } }, 1, { 1, -1, -1 } },
+        /*  7*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU(""), 0, 0 } }, 1, { 1, 1, -1 } },
+        /*  8*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 0 }, { TU(""), 0, 0 } }, 1, { 0, 1, -1 } },
+        /*  9*/ { { { TU("A"), -1, 0 }, { TU("A"), -1, 899 }, { TU(""), 0, 0 } }, 1, { 1, 1, -1 } },
+        /* 10*/ { { { TU("A"), -1, 0 }, { TU("A"), -1, 900 }, { TU(""), 0, 0 } }, 1, { 1, 0, -1 } },
+        /* 11*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 4 }, { TU("A"), -1, 35 } }, 1, { 1, 1, 1 } },
+        /* 12*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 1, 1, 1 } },
+        /* 13*/ { { { TU("A"), -1, 3 }, { TU("A"), -1, 900 }, { TU("A"), -1, 0 } }, 1, { 1, 0, 1 } },
+        /* 14*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 0 } }, 1, { 1, 1, 1 } },
+        /* 15*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 900 }, { TU("A"), -1, 0 } }, 1, { 0, 0, 1 } },
+        /* 16*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 0 }, { TU("A"), -1, 899 } }, 1, { 1, 1, 1 } },
+        /* 17*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 0 }, { TU("A"), -1, 900 } }, 1, { 0, 1, 0 } },
+        /* 18*/ { { { TU("A"), -1, 899 }, { TU("A"), -1, 899 }, { TU("A"), -1, 899 } }, 1, { 1, 1, 1 } },
+        /* 19*/ { { { TU("A"), -1, 900 }, { TU("A"), -1, 900 }, { TU("A"), -1, 900 } }, 0, { 0, 0, 0 } },
     };
     int data_size = ARRAY_SIZE(data);
     int i, j, seg_count, ret;
@@ -1449,7 +1514,7 @@ static void test_get_best_eci(const testCtx *const p_ctx) {
 
         length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
 
-        ret = zint_get_best_eci((const unsigned char *) data[i].data, length);
+        ret = zint_get_best_eci(ZCUCP(data[i].data), length);
         assert_equal(ret, data[i].ret, "i:%d zint_get_best_eci ret %d != %d\n", i, ret, data[i].ret);
     }
 
@@ -1522,6 +1587,7 @@ int main(int argc, char *argv[]) {
         { "test_utf8_to_eci_euc_kr", test_utf8_to_eci_euc_kr },
         { "test_utf8_to_eci_gbk", test_utf8_to_eci_gbk },
         { "test_utf8_to_eci_gb18030", test_utf8_to_eci_gb18030 },
+        { "test_utf8_to_eci_binary", test_utf8_to_eci_binary },
         { "test_is_eci_convertible_segs", test_is_eci_convertible_segs },
         { "test_get_best_eci", test_get_best_eci },
         { "test_get_best_eci_segs", test_get_best_eci_segs },
diff --git a/backend/tests/test_ultra.c b/backend/tests/test_ultra.c
index 93fee1ca..0926ddb6 100644
--- a/backend/tests/test_ultra.c
+++ b/backend/tests/test_ultra.c
@@ -1031,20 +1031,20 @@ static void test_encode_segs(const testCtx *const p_ctx) {
                     "78178133151513333118657"
                     "77777777777777777777777"
                 },
-        /*  8*/ { UNICODE_MODE, -1, -1, ULTRA_COMPRESSION, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU("òóô"), -1, 10000 } }, 0, 13, 27, 0, "ECIs >= 899; BWIPP no ECI support for Ultracode",
-                    "777777777777777777777777777"
-                    "785786353555666665585335557"
-                    "771783161616113513373663337"
-                    "783786335335661355686335667"
-                    "771785511666353666171656117"
-                    "786781655535111113385163357"
-                    "778787878787878787878787877"
-                    "783781151511666355586355517"
-                    "771785616353113666675666637"
-                    "783781363635661511183311157"
-                    "775786615561353366676566637"
-                    "781785551653535633383633317"
-                    "777777777777777777777777777"
+        /*  8*/ { UNICODE_MODE, -1, -1, ULTRA_COMPRESSION, { 0, 0, "" }, { { TU("çèéêëì"), -1, 0 }, { TU("òóô"), -1, 899 }, { TU("òóô"), -1, 10000 } }, 0, 13, 25, 0, "ECIs >= 899; BWIPP no ECI support for Ultracode",
+                    "7777777777777777777777777"
+                    "7857861351566666663855557"
+                    "7767836515611351356766617"
+                    "7857863163566135133811157"
+                    "7717856531635366665766617"
+                    "7837813613511111536833357"
+                    "7787878787878787878787877"
+                    "7867811531166636633855517"
+                    "7757853615311363366733337"
+                    "7837811163166151533856657"
+                    "7757836335635336165761117"
+                    "7817863151353563316833557"
+                    "7777777777777777777777777"
                 },
     };
     const int data_size = ARRAY_SIZE(data);
diff --git a/docs/manual.html b/docs/manual.html
index 53a47acf..ffcc0b86 100644
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -2789,7 +2789,8 @@ role="doc-noteref"><sup>8</sup></a></td>
 </tr>
 <tr>
 <td>899</td>
-<td>8-bit binary data</td>
+<td>8-bit binary data<a href="#fn9" class="footnote-ref" id="fnref9"
+role="doc-noteref"><sup>9</sup></a></td>
 </tr>
 </tbody>
 </table>
@@ -3365,8 +3366,8 @@ member <code>memfile</code> instead of to the output file
 <span id="cb63-16"><a href="#cb63-16" aria-hidden="true" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
 <p>will print the SVG output to <code>stdout</code> (the file
 <code>"mem.svg"</code> is not created). This is particularly useful for
-the textual formats EPS and SVG,<a href="#fn9" class="footnote-ref"
-id="fnref9" role="doc-noteref"><sup>9</sup></a> allowing the output to
+the textual formats EPS and SVG,<a href="#fn10" class="footnote-ref"
+id="fnref10" role="doc-noteref"><sup>10</sup></a> allowing the output to
 be manipulated and processed by the client.</p>
 <h2 id="setting-options">5.7 Setting Options</h2>
 <p>So far our application is not very useful unless we plan to only make
@@ -3405,8 +3406,8 @@ href="#specifying-a-symbology">5.9 Specifying a Symbology</a>.</td>
 <td style="text-align: left;"><code>height</code></td>
 <td style="text-align: left;">float</td>
 <td style="text-align: left;">Symbol height in X-dimensions, excluding
-fixed width-to-height symbols.<a href="#fn10" class="footnote-ref"
-id="fnref10" role="doc-noteref"><sup>10</sup></a></td>
+fixed width-to-height symbols.<a href="#fn11" class="footnote-ref"
+id="fnref11" role="doc-noteref"><sup>11</sup></a></td>
 <td style="text-align: left;">Symbol dependent</td>
 </tr>
 <tr>
@@ -3481,8 +3482,8 @@ resulting barcode symbol to. Must end in <code>.png</code>,
 <code>.gif</code>, <code>.bmp</code>, <code>.emf</code>,
 <code>.eps</code>, <code>.pcx</code>, <code>.svg</code>,
 <code>.tif</code> or <code>.txt</code> followed by a terminating
-<code>NUL</code>.<a href="#fn11" class="footnote-ref" id="fnref11"
-role="doc-noteref"><sup>11</sup></a></td>
+<code>NUL</code>.<a href="#fn12" class="footnote-ref" id="fnref12"
+role="doc-noteref"><sup>12</sup></a></td>
 <td style="text-align: left;"><code>"out.png"</code></td>
 </tr>
 <tr>
@@ -3892,15 +3893,15 @@ value:</p>
 <tr>
 <td style="text-align: left;"><code>BARCODE_BIND_TOP</code></td>
 <td style="text-align: left;">Boundary bar above the symbol only.<a
-href="#fn12" class="footnote-ref" id="fnref12"
-role="doc-noteref"><sup>12</sup></a></td>
+href="#fn13" class="footnote-ref" id="fnref13"
+role="doc-noteref"><sup>13</sup></a></td>
 </tr>
 <tr>
 <td style="text-align: left;"><code>BARCODE_BIND</code></td>
 <td style="text-align: left;">Boundary bars above and below the symbol
-and between rows if stacking multiple symbols.<a href="#fn13"
-class="footnote-ref" id="fnref13"
-role="doc-noteref"><sup>13</sup></a></td>
+and between rows if stacking multiple symbols.<a href="#fn14"
+class="footnote-ref" id="fnref14"
+role="doc-noteref"><sup>14</sup></a></td>
 </tr>
 <tr>
 <td style="text-align: left;"><code>BARCODE_BOX</code></td>
@@ -3950,8 +3951,8 @@ Memory (raster)</a>.</td>
 <tr>
 <td style="text-align: left;"><code>BARCODE_QUIET_ZONES</code></td>
 <td style="text-align: left;">Add compliant quiet zones (additional to
-any specified whitespace).<a href="#fn14" class="footnote-ref"
-id="fnref14" role="doc-noteref"><sup>14</sup></a></td>
+any specified whitespace).<a href="#fn15" class="footnote-ref"
+id="fnref15" role="doc-noteref"><sup>15</sup></a></td>
 </tr>
 <tr>
 <td style="text-align: left;"><code>BARCODE_NO_QUIET_ZONES</code></td>
@@ -4257,8 +4258,8 @@ stacked symbologies are not stackable.</td>
 </tr>
 <tr>
 <td style="text-align: left;"><code>ZINT_CAP_EANUPC</code><a
-href="#fn15" class="footnote-ref" id="fnref15"
-role="doc-noteref"><sup>15</sup></a></td>
+href="#fn16" class="footnote-ref" id="fnref16"
+role="doc-noteref"><sup>16</sup></a></td>
 <td style="text-align: left;">Is the symbology EAN/UPC?</td>
 </tr>
 <tr>
@@ -4343,8 +4344,8 @@ etc.) the <code>option_1</code>, <code>option_2</code> and
 <code>option_3</code> members will be set to the values used by Zint to
 create the barcode. This is useful for feedback if the values were left
 as defaults or were overridden by Zint.</p>
-<p>In particular for symbologies that have masks,<a href="#fn16"
-class="footnote-ref" id="fnref16" role="doc-noteref"><sup>16</sup></a>
+<p>In particular for symbologies that have masks,<a href="#fn17"
+class="footnote-ref" id="fnref17" role="doc-noteref"><sup>17</sup></a>
 <code>option_3</code> will contain the mask used as
 <code>(N + 1) &lt;&lt; 8</code>, N being the mask. Also Aztec Code will
 return the actual ECC percentage used in <code>option_1</code> as
@@ -4368,8 +4369,8 @@ members of <code>zint_seg</code> will be set accordingly - the
 unconverted data in <code>source</code>, the data length in
 <code>length</code>, and the character set the data was converted to in
 <code>eci</code>. Any check characters encoded will be included,<a
-href="#fn17" class="footnote-ref" id="fnref17"
-role="doc-noteref"><sup>17</sup></a> and for GS1 data any
+href="#fn18" class="footnote-ref" id="fnref18"
+role="doc-noteref"><sup>18</sup></a> and for GS1 data any
 <code>FNC1</code> separators will be represented as <code>GS</code>
 (ASCII 29) characters. UPC-A and UPC-E data will be expanded to EAN-13,
 as will EAN-8 but only if it has an add-on (otherwise it will remain at
@@ -4386,8 +4387,8 @@ member using the two helper functions discussed next.</p>
 convenience functions</h2>
 <p>As a convenience the conversion done by Zint from UTF-8 to ECIs is
 exposed in two helper functions (compatible with the
-<code>libzueci</code><a href="#fn18" class="footnote-ref" id="fnref18"
-role="doc-noteref"><sup>18</sup></a> functions
+<code>libzueci</code><a href="#fn19" class="footnote-ref" id="fnref19"
+role="doc-noteref"><sup>19</sup></a> functions
 <code>zueci_utf8_to_eci()</code> and
 <code>zueci_dest_len_eci()</code>):</p>
 <div class="sourceCode" id="cb84"><pre class="sourceCode c"><code class="sourceCode c"><span id="cb84-1"><a href="#cb84-1" aria-hidden="true" tabindex="-1"></a><span class="dt">int</span> ZBarcode_UTF8_To_ECI<span class="op">(</span><span class="dt">int</span> eci<span class="op">,</span> <span class="dt">const</span> <span class="dt">unsigned</span> <span class="dt">char</span> <span class="op">*</span>source<span class="op">,</span> <span class="dt">int</span> length<span class="op">,</span></span>
@@ -5061,8 +5062,8 @@ digits (60 alphanumerics) are not recommended.</p>
 Code 128 Suppress Code Set C (Code Sets A and B only)</h4>
 <p>It is sometimes advantageous to stop Code 128 from using Code Set C
 which compresses numerical data. The <code>BARCODE_CODE128AB</code><a
-href="#fn19" class="footnote-ref" id="fnref19"
-role="doc-noteref"><sup>19</sup></a> variant (symbology 60) suppresses
+href="#fn20" class="footnote-ref" id="fnref20"
+role="doc-noteref"><sup>20</sup></a> variant (symbology 60) suppresses
 Code Set C in favour of Code Sets A and B.</p>
 <figure>
 <img src="images/code128ab.svg" class="lin"
@@ -5820,8 +5821,8 @@ the 6-digit version the first and last digit are ignored, leaving a
 2047. The second format <code>"NNN-NN"</code> represents the DX Extract
 as two numbers separated by a dash (<code>-</code>), the first number
 being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range
-0 to 15).<a href="#fn20" class="footnote-ref" id="fnref20"
-role="doc-noteref"><sup>20</sup></a></p>
+0 to 15).<a href="#fn21" class="footnote-ref" id="fnref21"
+role="doc-noteref"><sup>21</sup></a></p>
 <p>The optional frame number is a number in the range 0 to 63, and may
 have a half frame indicator <code>"A"</code> appended. Special character
 sequences (with or without a half frame indicator appended) may also be
@@ -9731,50 +9732,56 @@ characters undefined: <code>#</code>, <code>$</code>, <code>@</code>,
 <code>`</code>, <code>{</code>, <code>|</code>, <code>}</code>,
 <code>~</code> (tilde).<a href="#fnref8" class="footnote-back"
 role="doc-backlink">↩︎</a></p></li>
-<li id="fn9"><p>BARCODE_MEMORY_FILE textual formats EPS and SVG will
-have Unix newlines (LF) on both Windows and Unix, i.e. not CR+LF on
-Windows.<a href="#fnref9" class="footnote-back"
+<li id="fn9"><p>Note that unless the <code>--binary</code> switch is
+used, 8-bit binary data for ECI 899 must be given as UTF-8, e.g. a byte
+<code>"\x80"</code> must be represented as the 2 bytes
+<code>"\xC2\x80"</code>; similarly <code>"\xC0"</code> as
+<code>"\xC3\x80"</code>, etc.<a href="#fnref9" class="footnote-back"
 role="doc-backlink">↩︎</a></p></li>
-<li id="fn10"><p>The <code>height</code> value is ignored for Aztec
+<li id="fn10"><p>BARCODE_MEMORY_FILE textual formats EPS and SVG will
+have Unix newlines (LF) on both Windows and Unix, i.e. not CR+LF on
+Windows.<a href="#fnref10" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn11"><p>The <code>height</code> value is ignored for Aztec
 (including HIBC and Aztec Rune), Code One, Data Matrix (including HIBC),
 DotCode, Grid Matrix, Han Xin, MaxiCode, QR Code (including HIBC, Micro
 QR, rMQR and UPNQR), and Ultracode - all of which have a fixed
 width-to-height ratio (or, in the case of Code One, a fixed height).<a
-href="#fnref10" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn11"><p>For Windows, <code>outfile</code> is assumed to be
-UTF-8 encoded.<a href="#fnref11" class="footnote-back"
+href="#fnref11" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn12"><p>For Windows, <code>outfile</code> is assumed to be
+UTF-8 encoded.<a href="#fnref12" class="footnote-back"
 role="doc-backlink">↩︎</a></p></li>
-<li id="fn12"><p>The <code>BARCODE_BIND_TOP</code> flag is set by
+<li id="fn13"><p>The <code>BARCODE_BIND_TOP</code> flag is set by
 default for DPD - see <a href="#dpd-code">6.1.10.7 DPD Code</a>.<a
-href="#fnref12" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn13"><p>The <code>BARCODE_BIND</code> flag is always set for
+href="#fnref13" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn14"><p>The <code>BARCODE_BIND</code> flag is always set for
 Codablock-F, Code 16K and Code 49. Special considerations apply to
-ITF-14 - see <a href="#itf-14">6.1.2.6 ITF-14</a>.<a href="#fnref13"
+ITF-14 - see <a href="#itf-14">6.1.2.6 ITF-14</a>.<a href="#fnref14"
 class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn14"><p>Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC
+<li id="fn15"><p>Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC
 add-ons, ISBN, ITF-14, UPC-A and UPC-E have compliant quiet zones added
-by default.<a href="#fnref14" class="footnote-back"
+by default.<a href="#fnref15" class="footnote-back"
 role="doc-backlink">↩︎</a></p></li>
-<li id="fn15"><p><code>ZINT_CAP_EANUPC</code> was previously named
+<li id="fn16"><p><code>ZINT_CAP_EANUPC</code> was previously named
 <code>ZINT_CAP_EXTENDABLE</code>, which is still recognised.<a
-href="#fnref15" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn16"><p>DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have
-variable masks. Rectangular Micro QR Code has a fixed mask (4).<a
 href="#fnref16" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn17"><p>Except for Japanese Postal Code, whose check character
-is not truly representable in the encoded data.<a href="#fnref17"
+<li id="fn17"><p>DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have
+variable masks. Rectangular Micro QR Code has a fixed mask (4).<a
+href="#fnref17" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn18"><p>Except for Japanese Postal Code, whose check character
+is not truly representable in the encoded data.<a href="#fnref18"
 class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn18"><p>The library <code>libzueci</code>, which can convert
+<li id="fn19"><p>The library <code>libzueci</code>, which can convert
 both to and from UTF-8 and ECI, is available at <a
 href="https://sourceforge.net/projects/libzueci/">https://sourceforge.net/projects/libzueci/</a>.<a
-href="#fnref18" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn19"><p><code>BARCODE_CODE128AB</code> previously used the name
-<code>BARCODE_CODE128B</code>, which is still recognised.<a
 href="#fnref19" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn20"><p>The DX number may be looked up in The (Modified) Big
+<li id="fn20"><p><code>BARCODE_CODE128AB</code> previously used the name
+<code>BARCODE_CODE128B</code>, which is still recognised.<a
+href="#fnref20" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn21"><p>The DX number may be looked up in The (Modified) Big
 Film Database at <a
 href="https://thebigfilmdatabase.merinorus.com">https://thebigfilmdatabase.merinorus.com</a>.<a
-href="#fnref20" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+href="#fnref21" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 </ol>
 </section>
 </body>
diff --git a/docs/manual.pmd b/docs/manual.pmd
index 3356e38a..56a90513 100644
--- a/docs/manual.pmd
+++ b/docs/manual.pmd
@@ -1366,13 +1366,17 @@ ECI Code  Character Encoding Scheme (ISO/IEC 8859 schemes include ASCII)
 34        UTF-32BE (High order bytes first)
 35        UTF-32LE (Low order bytes first)
 170       ISO/IEC 646 Invariant[^8]
-899       8-bit binary data
+899       8-bit binary data[^9]
 
 Table: ECI Codes {#tbl:eci_codes}
 
 [^8]: ISO/IEC 646 Invariant is a subset of ASCII with 12 characters undefined:
 `#`, `$`, `@`, `[`, `\`, `]`, `^`, `` ` ``, `{`, `|`, `}`, `~` (tilde).
 
+[^9]: Note that unless the `--binary` switch is used, 8-bit binary data for ECI
+899 must be given as UTF-8, e.g.  a byte `"\x80"` must be represented as the 2
+bytes `"\xC2\x80"`; similarly `"\xC0"` as `"\xC3\x80"`, etc.
+
 An ECI value of 0 does not encode any ECI information in the code symbol (unless
 the data contains non-default character set characters). In this case, the
 default character set applies (see [#tbl:default_character_sets] above).
@@ -1928,10 +1932,10 @@ int main(int argc, char **argv)
 ```
 
 will print the SVG output to `stdout` (the file `"mem.svg"` is not created).
-This is particularly useful for the textual formats EPS and SVG,[^9] allowing
+This is particularly useful for the textual formats EPS and SVG,[^10] allowing
 the output to be manipulated and processed by the client.
 
-[^9]: BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines
+[^10]: BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines
 (LF) on both Windows and Unix, i.e. not CR+LF on Windows.
 
 ## 5.7 Setting Options
@@ -1952,7 +1956,7 @@ Member Name          Type        Meaning                    Default Value
 `height`             float       Symbol height in            Symbol dependent
                                  X-dimensions, excluding
                                  fixed width-to-height
-                                 symbols.[^10]
+                                 symbols.[^11]
 
 `scale`              float       Scale factor for            1.0
                                  adjusting size of image
@@ -2002,7 +2006,7 @@ Member Name          Type        Meaning                    Default Value
                                  `.eps`, `.pcx`, `.svg`,
                                  `.tif` or `.txt` followed
                                  by a terminating
-                                 `NUL`.[^11]
+                                 `NUL`.[^12]
 
 `primary`            character   Primary message data for    `""` (empty)
                      string      more complex symbols,
@@ -2129,13 +2133,13 @@ Member Name          Type        Meaning                    Default Value
 
 Table: API Structure `zint_symbol` {#tbl:api_structure_zint_symbol}
 
-[^10]: The `height` value is ignored for Aztec (including HIBC and Aztec Rune),
+[^11]: The `height` value is ignored for Aztec (including HIBC and Aztec Rune),
 Code One, Data Matrix (including HIBC), DotCode, Grid Matrix, Han Xin, MaxiCode,
 QR Code (including HIBC, Micro QR, rMQR and UPNQR), and Ultracode - all of which
 have a fixed width-to-height ratio (or, in the case of Code One, a fixed
 height).
 
-[^11]: For Windows, `outfile` is assumed to be UTF-8 encoded.
+[^12]: For Windows, `outfile` is assumed to be UTF-8 encoded.
 
 To alter these values use the syntax shown in the example below. This code has
 the same result as the previous example except the output is now taller and
@@ -2301,10 +2305,10 @@ Value                      Effect
 -------------------------  ---------------------------------------------------
  0                         No options selected.
 
-`BARCODE_BIND_TOP`         Boundary bar above the symbol only.[^12]
+`BARCODE_BIND_TOP`         Boundary bar above the symbol only.[^13]
 
 `BARCODE_BIND`             Boundary bars above and below the symbol and
-                           between rows if stacking multiple symbols.[^13]
+                           between rows if stacking multiple symbols.[^14]
 
 `BARCODE_BOX`              Add a box surrounding the symbol and whitespace.
 
@@ -2331,7 +2335,7 @@ Value                      Effect
                            Symbols in Memory (raster)].
 
 `BARCODE_QUIET_ZONES`      Add compliant quiet zones (additional to any
-                           specified whitespace).[^14]
+                           specified whitespace).[^15]
 
 `BARCODE_NO_QUIET_ZONES`   Disable quiet zones, notably those with defaults.
 
@@ -2353,13 +2357,13 @@ Value                      Effect
 
 Table: API `output_options` Values {#tbl:api_output_options}
 
-[^12]: The `BARCODE_BIND_TOP` flag is set by default for DPD - see [6.1.10.7 DPD
+[^13]: The `BARCODE_BIND_TOP` flag is set by default for DPD - see [6.1.10.7 DPD
 Code].
 
-[^13]: The `BARCODE_BIND` flag is always set for Codablock-F, Code 16K and Code
+[^14]: The `BARCODE_BIND` flag is always set for Codablock-F, Code 16K and Code
 49. Special considerations apply to ITF-14 - see [6.1.2.6 ITF-14].
 
-[^14]: Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN,
+[^15]: Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN,
 ITF-14, UPC-A and UPC-E have compliant quiet zones added by default.
 
 ## 5.11 Setting the Input Mode
@@ -2625,7 +2629,7 @@ Value                       Meaning
 `ZINT_CAP_STACKABLE`        Is the symbology stackable? Note that stacked
                             symbologies are not stackable.
 
-`ZINT_CAP_EANUPC`[^15]      Is the symbology EAN/UPC?
+`ZINT_CAP_EANUPC`[^16]      Is the symbology EAN/UPC?
 
 `ZINT_CAP_COMPOSITE`        Does the symbology support composite data? (see
                             [6.3 GS1 Composite Symbols (ISO 24723)] below)
@@ -2661,7 +2665,7 @@ Value                       Meaning
 
 Table: API Capability Flags {#tbl:api_cap}
 
-[^15]: `ZINT_CAP_EANUPC` was previously named `ZINT_CAP_EXTENDABLE`, which is
+[^16]: `ZINT_CAP_EANUPC` was previously named `ZINT_CAP_EXTENDABLE`, which is
 still recognised.
 
 For example:
@@ -2688,7 +2692,7 @@ On successful encodation (after using `ZBarcode_Encode()` etc.) the `option_1`,
 create the barcode. This is useful for feedback if the values were left as
 defaults or were overridden by Zint.
 
-In particular for symbologies that have masks,[^16] `option_3` will contain the
+In particular for symbologies that have masks,[^17] `option_3` will contain the
 mask used as `(N + 1) << 8`, N being the mask. Also Aztec Code will return the
 actual ECC percentage used in `option_1` as `P << 8`, where P is the integer
 percentage, the low byte containing the values given in [#tbl:aztec_eccs] (with
@@ -2705,7 +2709,7 @@ being set in `raw_seg_count` - which will always be at least one.
 The `source`, `length` and `eci` members of `zint_seg` will be set accordingly -
 the unconverted data in `source`, the data length in `length`, and the character
 set the data was converted to in `eci`. Any check characters encoded will be
-included,[^17] and for GS1 data any `FNC1` separators will be represented as
+included,[^18] and for GS1 data any `FNC1` separators will be represented as
 `GS` (ASCII 29) characters. UPC-A and UPC-E data will be expanded to EAN-13, as
 will EAN-8 but only if it has an add-on (otherwise it will remain at 8 digits),
 and any add-ons will follow the 13 digits directly (no separator). GS1 Composite
@@ -2717,16 +2721,16 @@ is `DATA_MODE`, it remains in binary; otherwise it will be in UTF-8. The UTF-8
 source may be converted to the character set of the corresponding `eci` member
 using the two helper functions discussed next.
 
-[^16]: DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks.
+[^17]: DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks.
 Rectangular Micro QR Code has a fixed mask (4).
 
-[^17]: Except for Japanese Postal Code, whose check character is not truly
+[^18]: Except for Japanese Postal Code, whose check character is not truly
 representable in the encoded data.
 
 ## 5.17 UTF-8 to ECI convenience functions
 
 As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in
-two helper functions (compatible with the `libzueci`[^18] functions
+two helper functions (compatible with the `libzueci`[^19] functions
 `zueci_utf8_to_eci()` and `zueci_dest_len_eci()`):
 
 
@@ -2746,7 +2750,7 @@ returned in `p_dest_length`, may be smaller than the estimate given by
 NUL-terminated. The destination buffer is not NUL-terminated. The obsolete ECIs
 0, 1 and 2 are supported.
 
-[^18]: The library `libzueci`, which can convert both to and from UTF-8 and ECI,
+[^19]: The library `libzueci`, which can convert both to and from UTF-8 and ECI,
 is available at [https://sourceforge.net/projects/libzueci/](
 https://sourceforge.net/projects/libzueci/).
 
@@ -3351,7 +3355,7 @@ alphanumerics) are not recommended.
 #### 6.1.10.2 Code 128 Suppress Code Set C (Code Sets A and B only)
 
 It is sometimes advantageous to stop Code 128 from using Code Set C which
-compresses numerical data. The `BARCODE_CODE128AB`[^19] variant (symbology 60)
+compresses numerical data. The `BARCODE_CODE128AB`[^20] variant (symbology 60)
 suppresses Code Set C in favour of Code Sets A and B.
 
 ![`zint -b CODE128AB -d "130170X178"`](images/code128ab.svg){.lin}
@@ -3359,7 +3363,7 @@ suppresses Code Set C in favour of Code Sets A and B.
 Note that the special extra escapes mentioned above are not available for this
 variant (nor for any other).
 
-[^19]: `BARCODE_CODE128AB` previously used the name `BARCODE_CODE128B`, which is
+[^20]: `BARCODE_CODE128AB` previously used the name `BARCODE_CODE128B`, which is
 still recognised.
 
 #### 6.1.10.3 GS1-128
@@ -3965,7 +3969,7 @@ first and last digit are ignored, leaving a 4-digit DX Extract number in any
 case, which must be in the range 16 to 2047. The second format `"NNN-NN"`
 represents the DX Extract as two numbers separated by a dash (`-`), the first
 number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range
-0 to 15).[^20]
+0 to 15).[^21]
 
 The optional frame number is a number in the range 0 to 63, and may have a half
 frame indicator `"A"` appended. Special character sequences (with or without a
@@ -3975,7 +3979,7 @@ number 62, `"K"` or `"00"` means frame number 63, and `"F"` means frame number
 
 A parity bit is automatically added by Zint.
 
-[^20]: The DX number may be looked up in The (Modified) Big Film Database at
+[^21]: The DX number may be looked up in The (Modified) Big Film Database at
 [https://thebigfilmdatabase.merinorus.com](
 https://thebigfilmdatabase.merinorus.com).
 
diff --git a/docs/manual.txt b/docs/manual.txt
index 86381bf5..05ef6251 100644
--- a/docs/manual.txt
+++ b/docs/manual.txt
@@ -1444,7 +1444,7 @@ formatted. Zint automatically translates the data into the target encoding.
   34         UTF-32BE (High order bytes first)
   35         UTF-32LE (Low order bytes first)
   170        ISO/IEC 646 Invariant[8]
-  899        8-bit binary data
+  899        8-bit binary data[9]
 
   Table 8: ECI Codes
 
@@ -1950,7 +1950,7 @@ the buffer is given in memfile_size. For instance:
     }
 
 will print the SVG output to stdout (the file "mem.svg" is not created). This is
-particularly useful for the textual formats EPS and SVG,[9] allowing the output
+particularly useful for the textual formats EPS and SVG,[10] allowing the output
 to be manipulated and processed by the client.
 
 5.7 Setting Options
@@ -1971,7 +1971,7 @@ the following members:
   height               float        Symbol height in           Symbol dependent
                                     X-dimensions, excluding
                                     fixed width-to-height
-                                    symbols.[10]
+                                    symbols.[11]
 
   scale                float        Scale factor for adjusting 1.0
                                     size of image (sets
@@ -2018,7 +2018,7 @@ the following members:
                                     end in .png, .gif, .bmp,
                                     .emf, .eps, .pcx, .svg,
                                     .tif or .txt followed by a
-                                    terminating NUL.[11]
+                                    terminating NUL.[12]
 
   primary              character    Primary message data for   "" (empty)
                        string       more complex symbols, with
@@ -2293,10 +2293,10 @@ together when adjusting this value:
   -------------------------- ---------------------------------------------------
   0                          No options selected.
 
-  BARCODE_BIND_TOP           Boundary bar above the symbol only.[12]
+  BARCODE_BIND_TOP           Boundary bar above the symbol only.[13]
 
   BARCODE_BIND               Boundary bars above and below the symbol and
-                             between rows if stacking multiple symbols.[13]
+                             between rows if stacking multiple symbols.[14]
 
   BARCODE_BOX                Add a box surrounding the symbol and whitespace.
 
@@ -2323,7 +2323,7 @@ together when adjusting this value:
                              Symbols in Memory (raster).
 
   BARCODE_QUIET_ZONES        Add compliant quiet zones (additional to any
-                             specified whitespace).[14]
+                             specified whitespace).[15]
 
   BARCODE_NO_QUIET_ZONES     Disable quiet zones, notably those with defaults.
 
@@ -2581,7 +2581,7 @@ see which are set.
   ZINT_CAP_STACKABLE          Is the symbology stackable? Note that stacked
                               symbologies are not stackable.
 
-  ZINT_CAP_EANUPC[15]         Is the symbology EAN/UPC?
+  ZINT_CAP_EANUPC[16]         Is the symbology EAN/UPC?
 
   ZINT_CAP_COMPOSITE          Does the symbology support composite data? (see
                               6.3 GS1 Composite Symbols (ISO 24723) below)
@@ -2639,7 +2639,7 @@ option_2 and option_3 members will be set to the values used by Zint to create
 the barcode. This is useful for feedback if the values were left as defaults or
 were overridden by Zint.
 
-In particular for symbologies that have masks,[16] option_3 will contain the
+In particular for symbologies that have masks,[17] option_3 will contain the
 mask used as (N + 1) << 8, N being the mask. Also Aztec Code will return the
 actual ECC percentage used in option_1 as P << 8, where P is the integer
 percentage, the low byte containing the values given in Table 39: Aztec Code
@@ -2655,7 +2655,7 @@ in raw_seg_count - which will always be at least one.
 
 The source, length and eci members of zint_seg will be set accordingly - the
 unconverted data in source, the data length in length, and the character set the
-data was converted to in eci. Any check characters encoded will be included,[17]
+data was converted to in eci. Any check characters encoded will be included,[18]
 and for GS1 data any FNC1 separators will be represented as GS (ASCII 29)
 characters. UPC-A and UPC-E data will be expanded to EAN-13, as will EAN-8 but
 only if it has an add-on (otherwise it will remain at 8 digits), and any add-ons
@@ -2671,7 +2671,7 @@ two helper functions discussed next.
 5.17 UTF-8 to ECI convenience functions
 
 As a convenience the conversion done by Zint from UTF-8 to ECIs is exposed in
-two helper functions (compatible with the libzueci[18] functions
+two helper functions (compatible with the libzueci[19] functions
 zueci_utf8_to_eci() and zueci_dest_len_eci()):
 
     int ZBarcode_UTF8_To_ECI(int eci, const unsigned char *source, int length,
@@ -3230,7 +3230,7 @@ alphanumerics) are not recommended.
 6.1.10.2 Code 128 Suppress Code Set C (Code Sets A and B only)
 
 It is sometimes advantageous to stop Code 128 from using Code Set C which
-compresses numerical data. The BARCODE_CODE128AB[19] variant (symbology 60)
+compresses numerical data. The BARCODE_CODE128AB[20] variant (symbology 60)
 suppresses Code Set C in favour of Code Sets A and B.
 
 [zint -b CODE128AB -d "130170X178"]
@@ -3803,7 +3803,7 @@ first and last digit are ignored, leaving a 4-digit DX Extract number in any
 case, which must be in the range 16 to 2047. The second format "NNN-NN"
 represents the DX Extract as two numbers separated by a dash (-), the first
 number being 1 to 3 digits (range 1 to 127) and the second 1 to 2 digits (range
-0 to 15).[20]
+0 to 15).[21]
 
 The optional frame number is a number in the range 0 to 63, and may have a half
 frame indicator "A" appended. Special character sequences (with or without a
@@ -5851,40 +5851,44 @@ the yen sign (¥), and tilde (~) to overline (U+203E).
 [8] ISO/IEC 646 Invariant is a subset of ASCII with 12 characters undefined: #,
 $, @, [, \, ], ^, `, {, |, }, ~ (tilde).
 
-[9] BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines (LF)
-on both Windows and Unix, i.e. not CR+LF on Windows.
+[9] Note that unless the --binary switch is used, 8-bit binary data for ECI 899
+must be given as UTF-8, e.g. a byte "\x80" must be represented as the 2 bytes
+"\xC2\x80"; similarly "\xC0" as "\xC3\x80", etc.
 
-[10] The height value is ignored for Aztec (including HIBC and Aztec Rune), Code
+[10] BARCODE_MEMORY_FILE textual formats EPS and SVG will have Unix newlines
+(LF) on both Windows and Unix, i.e. not CR+LF on Windows.
+
+[11] The height value is ignored for Aztec (including HIBC and Aztec Rune), Code
 One, Data Matrix (including HIBC), DotCode, Grid Matrix, Han Xin, MaxiCode, QR
 Code (including HIBC, Micro QR, rMQR and UPNQR), and Ultracode - all of which
 have a fixed width-to-height ratio (or, in the case of Code One, a fixed
 height).
 
-[11] For Windows, outfile is assumed to be UTF-8 encoded.
+[12] For Windows, outfile is assumed to be UTF-8 encoded.
 
-[12] The BARCODE_BIND_TOP flag is set by default for DPD - see 6.1.10.7 DPD
+[13] The BARCODE_BIND_TOP flag is set by default for DPD - see 6.1.10.7 DPD
 Code.
 
-[13] The BARCODE_BIND flag is always set for Codablock-F, Code 16K and Code 49.
+[14] The BARCODE_BIND flag is always set for Codablock-F, Code 16K and Code 49.
 Special considerations apply to ITF-14 - see 6.1.2.6 ITF-14.
 
-[14] Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN,
+[15] Codablock-F, Code 16K, Code 49, EAN-13, EAN-8, EAN/UPC add-ons, ISBN,
 ITF-14, UPC-A and UPC-E have compliant quiet zones added by default.
 
-[15] ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still
+[16] ZINT_CAP_EANUPC was previously named ZINT_CAP_EXTENDABLE, which is still
 recognised.
 
-[16] DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks.
+[17] DotCode, Han Xin, Micro QR Code, QR Code and UPNQR have variable masks.
 Rectangular Micro QR Code has a fixed mask (4).
 
-[17] Except for Japanese Postal Code, whose check character is not truly
+[18] Except for Japanese Postal Code, whose check character is not truly
 representable in the encoded data.
 
-[18] The library libzueci, which can convert both to and from UTF-8 and ECI, is
+[19] The library libzueci, which can convert both to and from UTF-8 and ECI, is
 available at https://sourceforge.net/projects/libzueci/.
 
-[19] BARCODE_CODE128AB previously used the name BARCODE_CODE128B, which is still
+[20] BARCODE_CODE128AB previously used the name BARCODE_CODE128B, which is still
 recognised.
 
-[20] The DX number may be looked up in The (Modified) Big Film Database at
+[21] The DX number may be looked up in The (Modified) Big Film Database at
 https://thebigfilmdatabase.merinorus.com.