From a42b89100e9f6045558478b3ee8aaba17cc989f8 Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Tue, 11 Nov 2025 07:26:55 +0100 Subject: [PATCH 1/7] Simplify encoding mode detection logic Refactor to simplify checks for numeric and alphanumeric content. --- src/Encoder/Encoder.php | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/Encoder/Encoder.php b/src/Encoder/Encoder.php index c363953..6cf6755 100644 --- a/src/Encoder/Encoder.php +++ b/src/Encoder/Encoder.php @@ -166,29 +166,21 @@ private static function chooseMode(string $content, ?string $encoding = null) : return self::isOnlyDoubleByteKanji($content) ? Mode::KANJI() : Mode::BYTE(); } - $hasNumeric = false; - $hasAlphanumeric = false; + if (ctype_digit($content)) { + return Mode::NUMERIC(); + } + $contentLength = strlen($content); for ($i = 0; $i < $contentLength; ++$i) { $char = $content[$i]; - if (ctype_digit($char)) { - $hasNumeric = true; - } elseif (-1 !== self::getAlphanumericCode(ord($char))) { - $hasAlphanumeric = true; - } else { + if (-1 === self::getAlphanumericCode(ord($char))) { return Mode::BYTE(); } } - if ($hasAlphanumeric) { - return Mode::ALPHANUMERIC(); - } elseif ($hasNumeric) { - return Mode::NUMERIC(); - } - - return Mode::BYTE(); + return Mode::ALPHANUMERIC(); } /** From 0d8280dc9505c57338f63600d985e60fb3114997 Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Tue, 11 Nov 2025 08:23:03 +0100 Subject: [PATCH 2/7] Fix empty string encoding regression: use Mode::BYTE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a regression introduced in the previous refactor where an empty string ('') was incorrectly encoded as Mode::ALPHANUMERIC() instead of Mode::BYTE(). While technically valid, Mode::BYTE() is a more appropriate choice—it reflects general-purpose encoding and avoids the limitations of the alphanumeric character set. This change also improves clarity and helps prevent similar regressions in the future. Similarly, it ensures correct behavior for Encoder::chooseMode('', 'SHIFT-JIS'). --- src/Encoder/Encoder.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Encoder/Encoder.php b/src/Encoder/Encoder.php index 6cf6755..2830d53 100644 --- a/src/Encoder/Encoder.php +++ b/src/Encoder/Encoder.php @@ -162,6 +162,10 @@ private static function getAlphanumericCode(int $code) : int */ private static function chooseMode(string $content, ?string $encoding = null) : Mode { + if ('' === $content) { + return Mode::BYTE(); + } + if (null !== $encoding && 0 === strcasecmp($encoding, 'SHIFT-JIS')) { return self::isOnlyDoubleByteKanji($content) ? Mode::KANJI() : Mode::BYTE(); } From 589affd11cbf96a5383bf575b21708ee874bf0f3 Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Tue, 11 Nov 2025 14:16:09 +0100 Subject: [PATCH 3/7] Add unit test for Encoder::chooseMode with empty string and Shift-JIS encoding --- test/Encoder/EncoderTest.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/Encoder/EncoderTest.php b/test/Encoder/EncoderTest.php index 66459e9..bc90c7e 100644 --- a/test/Encoder/EncoderTest.php +++ b/test/Encoder/EncoderTest.php @@ -63,6 +63,10 @@ public function testGetAlphanumericCode() : void public function testChooseMode() : void { + // Empty string + $this->assertSame(Mode::BYTE(), $this->methods['chooseMode']->invoke(null, '')); + $this->assertSame(Mode::BYTE(), $this->methods['chooseMode']->invoke(null, '', 'SHIFT-JIS')); + // Numeric mode $this->assertSame(Mode::NUMERIC(), $this->methods['chooseMode']->invoke(null, '0')); $this->assertSame(Mode::NUMERIC(), $this->methods['chooseMode']->invoke(null, '0123456789')); @@ -77,7 +81,6 @@ public function testChooseMode() : void // 8-bit byte mode $this->assertSame(Mode::BYTE(), $this->methods['chooseMode']->invoke(null, 'a')); $this->assertSame(Mode::BYTE(), $this->methods['chooseMode']->invoke(null, '#')); - $this->assertSame(Mode::BYTE(), $this->methods['chooseMode']->invoke(null, '')); // AIUE in Hiragana in SHIFT-JIS $this->assertSame(Mode::BYTE(), $this->methods['chooseMode']->invoke(null, "\x8\xa\x8\xa\x8\xa\x8\xa6")); From 9abb8f18f137b4c10f737fd0ef910ec3fa0d42e2 Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Fri, 14 Nov 2025 12:44:29 +0100 Subject: [PATCH 4/7] Minor fixes and clarifications getAlphanumericCode(): - rename parameter from $code to $byte for clarity and consistency with docblock - simplify using null coalescing (also reduces lookups) isOnlyDoubleByteKanji(): - update docblock to note empty string returns true (which is important to consider in the caller) --- src/Encoder/Encoder.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Encoder/Encoder.php b/src/Encoder/Encoder.php index 2830d53..ccea88a 100644 --- a/src/Encoder/Encoder.php +++ b/src/Encoder/Encoder.php @@ -148,13 +148,9 @@ public static function encode( /** * Gets the alphanumeric code for a byte. */ - private static function getAlphanumericCode(int $code) : int + private static function getAlphanumericCode(int $byte) : int { - if (isset(self::ALPHANUMERIC_TABLE[$code])) { - return self::ALPHANUMERIC_TABLE[$code]; - } - - return -1; + return self::ALPHANUMERIC_TABLE[$byte] ?? -1; } /** @@ -201,7 +197,7 @@ private static function calculateMaskPenalty(ByteMatrix $matrix) : int } /** - * Checks if content only consists of double-byte kanji characters. + * Checks if content only consists of double-byte kanji characters (or is empty). */ private static function isOnlyDoubleByteKanji(string $content) : bool { From c0915b5af3f5ae9257b346b820a680877f340875 Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Fri, 14 Nov 2025 12:54:45 +0100 Subject: [PATCH 5/7] Extract alphanumeric check into a dedicated method --- src/Encoder/Encoder.php | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/Encoder/Encoder.php b/src/Encoder/Encoder.php index ccea88a..177bb43 100644 --- a/src/Encoder/Encoder.php +++ b/src/Encoder/Encoder.php @@ -170,17 +170,11 @@ private static function chooseMode(string $content, ?string $encoding = null) : return Mode::NUMERIC(); } - $contentLength = strlen($content); - - for ($i = 0; $i < $contentLength; ++$i) { - $char = $content[$i]; - - if (-1 === self::getAlphanumericCode(ord($char))) { - return Mode::BYTE(); - } + if (self::isOnlyAlphanumeric($content)) { + return Mode::ALPHANUMERIC(); } - return Mode::ALPHANUMERIC(); + return Mode::BYTE(); } /** @@ -224,6 +218,24 @@ private static function isOnlyDoubleByteKanji(string $content) : bool return true; } + /** + * Checks if content only consists of alphanumeric characters (or is empty). + */ + private static function isOnlyAlphanumeric(string $content) : bool + { + $length = strlen($content); + + for ($i = 0; $i < $length; ++$i) { + $byte = ord($content[$i]); + + if (-1 === self::getAlphanumericCode($byte)) { + return false; + } + } + + return true; + } + /** * Chooses the best mask pattern for a matrix. */ From a45a746b823aa3470f7bc0a9c8f875d82f50010b Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Sun, 16 Nov 2025 14:27:40 +0100 Subject: [PATCH 6/7] Optimize isOnlyAlphanumeric() using strspn-based technique Implemented a technique that takes advantage of strspn() to optimize isOnlyAlphanumeric(). This approach is much, much faster (hundreds or even thousands of times) and highly scalable, appearing to run in close to O(1). --- src/Encoder/Encoder.php | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Encoder/Encoder.php b/src/Encoder/Encoder.php index 177bb43..276867f 100644 --- a/src/Encoder/Encoder.php +++ b/src/Encoder/Encoder.php @@ -223,17 +223,9 @@ private static function isOnlyDoubleByteKanji(string $content) : bool */ private static function isOnlyAlphanumeric(string $content) : bool { - $length = strlen($content); - - for ($i = 0; $i < $length; ++$i) { - $byte = ord($content[$i]); + $allowed = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:'; - if (-1 === self::getAlphanumericCode($byte)) { - return false; - } - } - - return true; + return strlen($content) === strspn($content, $allowed); } /** From d6d5428bf48974cb48c200f27da62d12d10c3fe9 Mon Sep 17 00:00:00 2001 From: vlakoff <544424+vlakoff@users.noreply.github.com> Date: Sun, 16 Nov 2025 15:25:03 +0100 Subject: [PATCH 7/7] Use class const for alphanumeric characters --- src/Encoder/Encoder.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Encoder/Encoder.php b/src/Encoder/Encoder.php index 276867f..6a7a951 100644 --- a/src/Encoder/Encoder.php +++ b/src/Encoder/Encoder.php @@ -25,6 +25,11 @@ final class Encoder /** @deprecated use DEFAULT_BYTE_MODE_ENCODING */ public const DEFAULT_BYTE_MODE_ECODING = self::DEFAULT_BYTE_MODE_ENCODING; + /** + * Allowed characters for the Alphanumeric Mode. + */ + private const ALPHANUMERIC_CHARS = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:'; + /** * The original table is defined in the table 5 of JISX0510:2004 (p.19). */ @@ -223,9 +228,7 @@ private static function isOnlyDoubleByteKanji(string $content) : bool */ private static function isOnlyAlphanumeric(string $content) : bool { - $allowed = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:'; - - return strlen($content) === strspn($content, $allowed); + return strlen($content) === strspn($content, self::ALPHANUMERIC_CHARS); } /**