From 5193039dabf892209ca24432ddce8e63b0e7ebed Mon Sep 17 00:00:00 2001 From: Henrique Moody Date: Sat, 31 Jan 2026 01:46:01 +0100 Subject: [PATCH 1/2] Add UppercaseFormatter with proper UTF-8 support The new UppercaseFormatter provides reliable UTF-8 aware uppercase conversion for international text, ensuring accented characters and non-Latin scripts are handled correctly using mb_strtoupper(). This formatter is essential for applications requiring proper internationalization support when manipulating text in various languages like French, German, Turkish, Greek, Cyrillic, and CJK languages. Includes comprehensive tests covering ASCII, Latin accents, non-Latin scripts, emoji, combining diacritics, right-to-left text, multi-byte characters, and mixed content scenarios. Assisted-by: OpenCode (GLM-4.7) --- README.md | 1 + docs/UppercaseFormatter.md | 88 +++++++++ src/Mixin/Builder.php | 2 + src/Mixin/Chain.php | 2 + src/UppercaseFormatter.php | 21 ++ tests/Unit/UppercaseFormatterTest.php | 269 ++++++++++++++++++++++++++ 6 files changed, 383 insertions(+) create mode 100644 docs/UppercaseFormatter.md create mode 100644 src/UppercaseFormatter.php create mode 100644 tests/Unit/UppercaseFormatterTest.php diff --git a/README.md b/README.md index 9451966..5df839b 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ See the [PlaceholderFormatter documentation](docs/PlaceholderFormatter.md) and [ | [PatternFormatter](docs/PatternFormatter.md) | Pattern-based string filtering with placeholders | | [PlaceholderFormatter](docs/PlaceholderFormatter.md) | Template interpolation with placeholder replacement | | [TimeFormatter](docs/TimeFormatter.md) | Time promotion (mil, c, dec, y, mo, w, d, h, min, s, ms, us, ns) | +| [UppercaseFormatter](docs/UppercaseFormatter.md) | Convert string to uppercase | ## Contributing diff --git a/docs/UppercaseFormatter.md b/docs/UppercaseFormatter.md new file mode 100644 index 0000000..43859bd --- /dev/null +++ b/docs/UppercaseFormatter.md @@ -0,0 +1,88 @@ + + +# UppercaseFormatter + +The `UppercaseFormatter` converts strings to uppercase with proper UTF-8 character support for international text. + +## Usage + +### Basic Usage + +```php +use Respect\StringFormatter\UppercaseFormatter; + +$formatter = new UppercaseFormatter(); + +echo $formatter->format('hello world'); +// Outputs: "HELLO WORLD" +``` + +### Unicode Characters + +```php +use Respect\StringFormatter\UppercaseFormatter; + +$formatter = new UppercaseFormatter(); + +echo $formatter->format('café français'); +// Outputs: "CAFÉ FRANÇAIS" + +echo $formatter->format('こんにちは'); +// Outputs: "コンニチハ" +``` + +### Mixed Content + +```php +use Respect\StringFormatter\UppercaseFormatter; + +$formatter = new UppercaseFormatter(); + +echo $formatter->format('Hello World 😊'); +// Outputs: "HELLO WORLD 😊" +``` + +## API + +### `UppercaseFormatter::__construct` + +- `__construct()` + +Creates a new uppercase formatter instance. + +### `format` + +- `format(string $input): string` + +Converts the input string to uppercase using UTF-8 aware conversion. + +**Parameters:** + +- `$input`: The string to convert to uppercase + +**Returns:** The uppercase string + +## Examples + +| Input | Output | Description | +| ------------ | ------------ | --------------------------------------- | +| `hello` | `HELLO` | Simple ASCII text | +| `café` | `CAFÉ` | Latin characters with accents | +| `привет` | `ПРИВЕТ` | Cyrillic text | +| `こんにちは` | `コンニチハ` | Japanese hiragana converted to katakana | +| `Hello 😊` | `HELLO 😊` | Text with emoji | +| `éîôû` | `ÉÎÔÛ` | Multiple accented characters | + +## Notes + +- Uses `mb_strtoupper()` for proper Unicode handling +- Preserves accent marks and diacritical marks +- Works with all Unicode scripts (Latin, Cyrillic, Greek, CJK, etc.) +- Emoji and special symbols are preserved unchanged +- Combining diacritics are properly handled +- Numbers and special characters remain unchanged +- Empty strings return empty strings diff --git a/src/Mixin/Builder.php b/src/Mixin/Builder.php index 539fd0d..1320f4d 100644 --- a/src/Mixin/Builder.php +++ b/src/Mixin/Builder.php @@ -44,4 +44,6 @@ public static function pattern(string $pattern): FormatterBuilder; public static function placeholder(array $parameters): FormatterBuilder; public static function time(string $unit): FormatterBuilder; + + public static function uppercase(): FormatterBuilder; } diff --git a/src/Mixin/Chain.php b/src/Mixin/Chain.php index 780ba0b..c4ec7a8 100644 --- a/src/Mixin/Chain.php +++ b/src/Mixin/Chain.php @@ -44,4 +44,6 @@ public function pattern(string $pattern): FormatterBuilder; public function placeholder(array $parameters): FormatterBuilder; public function time(string $unit): FormatterBuilder; + + public function uppercase(): FormatterBuilder; } diff --git a/src/UppercaseFormatter.php b/src/UppercaseFormatter.php new file mode 100644 index 0000000..42a5293 --- /dev/null +++ b/src/UppercaseFormatter.php @@ -0,0 +1,21 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter; + +use function mb_strtoupper; + +final readonly class UppercaseFormatter implements Formatter +{ + public function format(string $input): string + { + return mb_strtoupper($input); + } +} diff --git a/tests/Unit/UppercaseFormatterTest.php b/tests/Unit/UppercaseFormatterTest.php new file mode 100644 index 0000000..f198f17 --- /dev/null +++ b/tests/Unit/UppercaseFormatterTest.php @@ -0,0 +1,269 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter\Test\Unit; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use Respect\StringFormatter\UppercaseFormatter; + +#[CoversClass(UppercaseFormatter::class)] +final class UppercaseFormatterTest extends TestCase +{ + #[Test] + #[DataProvider('providerForValidFormattedString')] + public function testShouldFormatString(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + public function testShouldHandleEmptyString(): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format(''); + + self::assertSame('', $actual); + } + + #[Test] + #[DataProvider('providerForUnicodeString')] + public function testShouldHandleUnicodeCharacters(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForLatinAccents')] + public function testShouldHandleLatinCharactersWithAccents(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNonLatinScripts')] + public function testShouldHandleNonLatinScripts(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForEmojiAndSpecialChars')] + public function testShouldHandleEmojiAndSpecialCharacters(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForCombiningDiacritics')] + public function testShouldHandleCombiningDiacritics(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForRightToLeft')] + public function testShouldHandleRightToLeftText(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMultiByte')] + public function testShouldHandleMultiByteCharacters(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNumbersAndSpecial')] + public function testShouldHandleNumbersAndSpecialChars(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMixed')] + public function testShouldHandleMixedContent(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + /** @return array */ + public static function providerForValidFormattedString(): array + { + return [ + 'empty string' => ['', ''], + 'single lowercase letter' => ['a', 'A'], + 'all lowercase' => ['hello', 'HELLO'], + 'already uppercase' => ['HELLO', 'HELLO'], + 'mixed case' => ['Hello World', 'HELLO WORLD'], + 'with punctuation' => ['hello, world!', 'HELLO, WORLD!'], + 'with numbers' => ['hello123', 'HELLO123'], + 'single word' => ['test', 'TEST'], + 'multiple words' => ['test string case', 'TEST STRING CASE'], + ]; + } + + /** @return array */ + public static function providerForUnicodeString(): array + { + return [ + 'german umlauts' => ['über', 'ÜBER'], + 'french accents' => ['café', 'CAFÉ'], + 'spanish tilde' => ['niño', 'NIÑO'], + 'portuguese' => ['coração', 'CORAÇÃO'], + 'icelandic' => ['þingvellir', 'ÞINGVELLIR'], + 'scandinavian' => ['ørsted', 'ØRSTED'], + 'polish' => ['łęski', 'ŁĘSKI'], + ]; + } + + /** @return array */ + public static function providerForLatinAccents(): array + { + return [ + 'c-cedilla' => ['café français', 'CAFÉ FRANÇAIS'], + 'umlauts' => ['äöü', 'ÄÖÜ'], + 'tilde' => ['ãñõ', 'ÃÑÕ'], + 'circumflex' => ['êîôû', 'ÊÎÔÛ'], + 'acute' => ['áéíóú', 'ÁÉÍÓÚ'], + 'grave' => ['àèìòù', 'ÀÈÌÒÙ'], + 'mixed accents' => ['résumé déjà vu', 'RÉSUMÉ DÉJÀ VU'], + ]; + } + + /** @return array */ + public static function providerForNonLatinScripts(): array + { + return [ + 'greek lowercase' => ['γεια σας', 'ΓΕΙΑ ΣΑΣ'], + 'cyrillic lowercase' => ['привет мир', 'ПРИВЕТ МИР'], + 'arabic' => ['مرحبا', 'مرحبا'], + 'hebrew' => ['שלום', 'שלום'], + 'thai' => ['สวัสดี', 'สวัสดี'], + ]; + } + + /** @return array */ + public static function providerForEmojiAndSpecialChars(): array + { + return [ + 'smiley face' => ['hello 😊', 'HELLO 😊'], + 'multiple emoji' => ['hi 👋 bye 👋', 'HI 👋 BYE 👋'], + 'hearts' => ['❤️ love ❤️', '❤️ LOVE ❤️'], + 'special symbols' => ['© ™ ®', '© ™ ®'], + 'math symbols' => ['∑ π ∫', '∑ Π ∫'], + ]; + } + + /** @return array */ + public static function providerForCombiningDiacritics(): array + { + return [ + 'e with combining acute' => ["e\u{0301}", "E\u{0301}"], + 'a with combining grave' => ["a\u{0300}", "A\u{0300}"], + 'multiple diacritics' => ["e\u{0301}\u{0301}", "E\u{0301}\u{0301}"], + 'word with combining marks' => ["cafe\u{0301}", "CAFE\u{0301}"], + ]; + } + + /** @return array */ + public static function providerForRightToLeft(): array + { + return [ + 'arabic word' => ['مرحبا', 'مرحبا'], + 'hebrew word' => ['שלום', 'שלום'], + 'mixed direction' => ['hello مرحبا', 'HELLO مرحبا'], + ]; + } + + /** @return array */ + public static function providerForMultiByte(): array + { + return [ + 'e-acute' => ['é', 'É'], + 'u-umlaut' => ['ü', 'Ü'], + 'greek sigma' => ['σ', 'Σ'], + 'cyrillic de' => ['д', 'Д'], + 'polish l-stroke' => ['ł', 'Ł'], + 'full accented word' => ['résumé', 'RÉSUMÉ'], + 'mixed multibyte and ascii' => ['über cool', 'ÜBER COOL'], + 'multibyte with cjk' => ['café你好', 'CAFÉ你好'], + ]; + } + + /** @return array */ + public static function providerForNumbersAndSpecial(): array + { + return [ + 'digits only' => ['1234567890', '1234567890'], + 'mixed alphanumeric' => ['abc123def', 'ABC123DEF'], + 'special chars only' => ['!@#$%^&*()', '!@#$%^&*()'], + 'whitespace' => [' ', ' '], + 'tabs and newlines' => ["hello\tworld\n", "HELLO\tWORLD\n"], + ]; + } + + /** @return array */ + public static function providerForMixed(): array + { + return [ + 'unicode with numbers' => ['café123', 'CAFÉ123'], + 'emoji with text' => ['Hello World 😊', 'HELLO WORLD 😊'], + 'cjk with latin' => ['Hello你好', 'HELLO你好'], + 'mixed scripts' => ['Hello 世界 Мир', 'HELLO 世界 МИР'], + 'complex string' => ['CAFé 123 😊 你好', 'CAFÉ 123 😊 你好'], + ]; + } +} From ec6e5207b4a1941508bd6b2393d16cd711a507be Mon Sep 17 00:00:00 2001 From: Henrique Moody Date: Sat, 31 Jan 2026 01:46:05 +0100 Subject: [PATCH 2/2] Add LowercaseFormatter with proper UTF-8 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new LowercaseFormatter provides reliable UTF-8 aware lowercase conversion for international text, ensuring accented characters and Turkish special cases (İ/i) are handled correctly using mb_strtolower(). This formatter complements UppercaseFormatter and is essential for applications requiring proper internationalization support when manipulating text in various languages including those with special character mapping rules. Includes comprehensive tests covering ASCII, Latin accents, Turkish characters, non-Latin scripts, emoji, combining diacritics, right-to-left text, multi-byte characters, and mixed content. Assisted-by: OpenCode (GLM-4.7) --- README.md | 1 + docs/LowercaseFormatter.md | 88 ++++++++ src/LowercaseFormatter.php | 21 ++ src/Mixin/Builder.php | 2 + src/Mixin/Chain.php | 8 +- tests/Unit/LowercaseFormatterTest.php | 290 ++++++++++++++++++++++++++ 6 files changed, 407 insertions(+), 3 deletions(-) create mode 100644 docs/LowercaseFormatter.md create mode 100644 src/LowercaseFormatter.php create mode 100644 tests/Unit/LowercaseFormatterTest.php diff --git a/README.md b/README.md index 5df839b..05df52d 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ See the [PlaceholderFormatter documentation](docs/PlaceholderFormatter.md) and [ | [ImperialAreaFormatter](docs/ImperialAreaFormatter.md) | Imperial area promotion (in², ft², yd², ac, mi²) | | [ImperialLengthFormatter](docs/ImperialLengthFormatter.md) | Imperial length promotion (in, ft, yd, mi) | | [ImperialMassFormatter](docs/ImperialMassFormatter.md) | Imperial mass promotion (oz, lb, st, ton) | +| [LowercaseFormatter](docs/LowercaseFormatter.md) | Convert string to lowercase | | [MaskFormatter](docs/MaskFormatter.md) | Range-based string masking with Unicode support | | [MassFormatter](docs/MassFormatter.md) | Metric mass promotion (mg, g, kg, t) | | [MetricFormatter](docs/MetricFormatter.md) | Metric length promotion (mm, cm, m, km) | diff --git a/docs/LowercaseFormatter.md b/docs/LowercaseFormatter.md new file mode 100644 index 0000000..1becf63 --- /dev/null +++ b/docs/LowercaseFormatter.md @@ -0,0 +1,88 @@ + + +# LowercaseFormatter + +The `LowercaseFormatter` converts strings to lowercase with proper UTF-8 character support for international text. + +## Usage + +### Basic Usage + +```php +use Respect\StringFormatter\LowercaseFormatter; + +$formatter = new LowercaseFormatter(); + +echo $formatter->format('HELLO WORLD'); +// Outputs: "hello world" +``` + +### Unicode Characters + +```php +use Respect\StringFormatter\LowercaseFormatter; + +$formatter = new LowercaseFormatter(); + +echo $formatter->format('CAFÉ FRANÇAIS'); +// Outputs: "café français" + +echo $formatter->format('コンニチハ'); +// Outputs: "コンニチハ" +``` + +### Mixed Content + +```php +use Respect\StringFormatter\LowercaseFormatter; + +$formatter = new LowercaseFormatter(); + +echo $formatter->format('HELLO WORLD 😊'); +// Outputs: "hello world 😊" +``` + +## API + +### `LowercaseFormatter::__construct` + +- `__construct()` + +Creates a new lowercase formatter instance. + +### `format` + +- `format(string $input): string` + +Converts the input string to lowercase using UTF-8 aware conversion. + +**Parameters:** + +- `$input`: The string to convert to lowercase + +**Returns:** The lowercase string + +## Examples + +| Input | Output | Description | +| ------------ | ------------ | ----------------------------- | +| `HELLO` | `hello` | Simple ASCII text | +| `CAFÉ` | `café` | Latin characters with accents | +| `ПРИВЕТ` | `привет` | Cyrillic text | +| `コンニチハ` | `コンニチハ` | Japanese text | +| `HELLO 😊` | `hello 😊` | Text with emoji | +| `ÉÎÔÛ` | `éîôû` | Multiple accented characters | + +## Notes + +- Uses `mb_strtolower()` for proper Unicode handling +- Preserves accent marks and diacritical marks +- Works with all Unicode scripts (Latin, Cyrillic, Greek, CJK, etc.) +- Emoji and special symbols are preserved unchanged +- Combining diacritics are properly handled +- Numbers and special characters remain unchanged +- Empty strings return empty strings diff --git a/src/LowercaseFormatter.php b/src/LowercaseFormatter.php new file mode 100644 index 0000000..906e256 --- /dev/null +++ b/src/LowercaseFormatter.php @@ -0,0 +1,21 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter; + +use function mb_strtolower; + +final readonly class LowercaseFormatter implements Formatter +{ + public function format(string $input): string + { + return mb_strtolower($input); + } +} diff --git a/src/Mixin/Builder.php b/src/Mixin/Builder.php index 1320f4d..57714b4 100644 --- a/src/Mixin/Builder.php +++ b/src/Mixin/Builder.php @@ -26,6 +26,8 @@ public static function imperialMass(string $unit): FormatterBuilder; public static function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public static function lowercase(): FormatterBuilder; + public static function mask(string $range, string $replacement = '*'): FormatterBuilder; public static function metric(string $unit): FormatterBuilder; diff --git a/src/Mixin/Chain.php b/src/Mixin/Chain.php index c4ec7a8..09f5e5a 100644 --- a/src/Mixin/Chain.php +++ b/src/Mixin/Chain.php @@ -18,26 +18,28 @@ interface Chain extends Formatter { public function area(string $unit): FormatterBuilder; + public function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public function imperialArea(string $unit): FormatterBuilder; public function imperialLength(string $unit): FormatterBuilder; public function imperialMass(string $unit): FormatterBuilder; - public function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public function lowercase(): FormatterBuilder; public function mask(string $range, string $replacement = '*'): FormatterBuilder; public function metric(string $unit): FormatterBuilder; + public function metricMass(string $unit): FormatterBuilder; + public function number( int $decimals = 0, string $decimalSeparator = '.', string $thousandsSeparator = ',', ): FormatterBuilder; - public function metricMass(string $unit): FormatterBuilder; - public function pattern(string $pattern): FormatterBuilder; /** @param array $parameters */ diff --git a/tests/Unit/LowercaseFormatterTest.php b/tests/Unit/LowercaseFormatterTest.php new file mode 100644 index 0000000..5cb96e2 --- /dev/null +++ b/tests/Unit/LowercaseFormatterTest.php @@ -0,0 +1,290 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter\Test\Unit; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use Respect\StringFormatter\LowercaseFormatter; + +#[CoversClass(LowercaseFormatter::class)] +final class LowercaseFormatterTest extends TestCase +{ + #[Test] + #[DataProvider('providerForValidFormattedString')] + public function testShouldFormatString(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + public function testShouldHandleEmptyString(): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format(''); + + self::assertSame('', $actual); + } + + #[Test] + #[DataProvider('providerForUnicodeString')] + public function testShouldHandleUnicodeCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForLatinAccents')] + public function testShouldHandleLatinCharactersWithAccents(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNonLatinScripts')] + public function testShouldHandleNonLatinScripts(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForEmojiAndSpecialChars')] + public function testShouldHandleEmojiAndSpecialCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForTurkish')] + public function testShouldHandleTurkishCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForCombiningDiacritics')] + public function testShouldHandleCombiningDiacritics(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForRightToLeft')] + public function testShouldHandleRightToLeftText(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMultiByte')] + public function testShouldHandleMultiByteCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNumbersAndSpecial')] + public function testShouldHandleNumbersAndSpecialChars(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMixed')] + public function testShouldHandleMixedContent(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + /** @return array */ + public static function providerForValidFormattedString(): array + { + return [ + 'empty string' => ['', ''], + 'single uppercase letter' => ['A', 'a'], + 'all uppercase' => ['HELLO', 'hello'], + 'already lowercase' => ['hello', 'hello'], + 'mixed case' => ['Hello World', 'hello world'], + 'with punctuation' => ['Hello, World!', 'hello, world!'], + 'with numbers' => ['Hello123', 'hello123'], + 'single word' => ['TEST', 'test'], + 'multiple words' => ['Test String Case', 'test string case'], + ]; + } + + /** @return array */ + public static function providerForUnicodeString(): array + { + return [ + 'german umlauts' => ['ÜBER', 'über'], + 'french accents' => ['CAFÉ', 'café'], + 'spanish tilde' => ['NIÑO', 'niño'], + 'portuguese' => ['CORAÇÃO', 'coração'], + 'icelandic' => ['ÞINGVELLIR', 'þingvellir'], + 'scandinavian' => ['ØRSTED', 'ørsted'], + 'polish' => ['ŁĘSKI', 'łęski'], + ]; + } + + /** @return array */ + public static function providerForLatinAccents(): array + { + return [ + 'c-cedilla' => ['CAFÉ FRANÇAIS', 'café français'], + 'umlauts' => ['ÄÖÜ', 'äöü'], + 'tilde' => ['ÃÑÕ', 'ãñõ'], + 'circumflex' => ['ÊÎÔÛ', 'êîôû'], + 'acute' => ['ÁÉÍÓÚ', 'áéíóú'], + 'grave' => ['ÀÈÌÒÙ', 'àèìòù'], + 'mixed accents' => ['RÉSUMÉ DÉJÀ VU', 'résumé déjà vu'], + ]; + } + + /** @return array */ + public static function providerForNonLatinScripts(): array + { + return [ + 'greek uppercase' => ['ΓΕΙΑ ΣΑΣ', 'γεια σας'], + 'cyrillic uppercase' => ['ПРИВЕТ МИР', 'привет мир'], + 'arabic' => ['مرحبا', 'مرحبا'], + 'hebrew' => ['שלום', 'שלום'], + ]; + } + + /** @return array */ + public static function providerForEmojiAndSpecialChars(): array + { + return [ + 'smiley face' => ['HELLO 😊', 'hello 😊'], + 'multiple emoji' => ['HI 👋 BYE 👋', 'hi 👋 bye 👋'], + 'hearts' => ['❤️ LOVE ❤️', '❤️ love ❤️'], + 'special symbols' => ['© ™ ®', '© ™ ®'], + 'math symbols' => ['∑ π ∫', '∑ π ∫'], + ]; + } + + /** @return array */ + public static function providerForTurkish(): array + { + return [ + 'turkish i' => ['İ', 'i̇'], + 'turkish I' => ['I', 'i'], + 'turkish mixed' => ['İSTANBUL', 'i̇stanbul'], + 'capital i with dot' => ['İi', 'i̇i'], + ]; + } + + /** @return array */ + public static function providerForCombiningDiacritics(): array + { + return [ + 'E with combining acute' => ["E\u{0301}", "e\u{0301}"], + 'A with combining grave' => ["A\u{0300}", "a\u{0300}"], + 'combined character' => ['É', 'é'], + 'word with combining marks' => ["CAFE\u{0301}", "cafe\u{0301}"], + ]; + } + + /** @return array */ + public static function providerForRightToLeft(): array + { + return [ + 'arabic word' => ['مرحبا', 'مرحبا'], + 'hebrew word' => ['שלום', 'שלום'], + 'mixed direction' => ['HELLO مرحبا', 'hello مرحبا'], + ]; + } + + /** @return array */ + public static function providerForMultiByte(): array + { + return [ + 'e-acute' => ['É', 'é'], + 'u-umlaut' => ['Ü', 'ü'], + 'greek sigma' => ['Σ', 'σ'], + 'cyrillic de' => ['Д', 'д'], + 'polish l-stroke' => ['Ł', 'ł'], + 'full accented word' => ['RÉSUMÉ', 'résumé'], + 'mixed multibyte and ascii' => ['ÜBER COOL', 'über cool'], + 'multibyte with cjk' => ['CAFÉ你好', 'café你好'], + ]; + } + + /** @return array */ + public static function providerForNumbersAndSpecial(): array + { + return [ + 'digits only' => ['1234567890', '1234567890'], + 'mixed alphanumeric' => ['ABC123DEF', 'abc123def'], + 'special chars only' => ['!@#$%^&*()', '!@#$%^&*()'], + 'whitespace' => [' ', ' '], + 'tabs and newlines' => ["HELLO\tWORLD\n", "hello\tworld\n"], + ]; + } + + /** @return array */ + public static function providerForMixed(): array + { + return [ + 'unicode with numbers' => ['CAFÉ123', 'café123'], + 'emoji with text' => ['HELLO WORLD 😊', 'hello world 😊'], + 'cjk with latin' => ['HELLO你好', 'hello你好'], + 'mixed scripts' => ['HELLO 世界 МИР', 'hello 世界 мир'], + 'complex string' => ['CAFÉ 123 😊 你好', 'café 123 😊 你好'], + ]; + } +}