diff --git a/README.md b/README.md index 9451966..812452f 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ See the [PlaceholderFormatter documentation](docs/PlaceholderFormatter.md) and [ | [PatternFormatter](docs/PatternFormatter.md) | Pattern-based string filtering with placeholders | | [PlaceholderFormatter](docs/PlaceholderFormatter.md) | Template interpolation with placeholder replacement | | [TimeFormatter](docs/TimeFormatter.md) | Time promotion (mil, c, dec, y, mo, w, d, h, min, s, ms, us, ns) | +| [TrimFormatter](docs/TrimFormatter.md) | Remove whitespace from string edges | ## Contributing diff --git a/docs/TrimFormatter.md b/docs/TrimFormatter.md new file mode 100644 index 0000000..b6b22fc --- /dev/null +++ b/docs/TrimFormatter.md @@ -0,0 +1,145 @@ + + +# TrimFormatter + +The `TrimFormatter` removes characters from the edges of strings with configurable masking and side selection, fully supporting UTF-8 Unicode characters. + +## Usage + +### Basic Usage + +```php +use Respect\StringFormatter\TrimFormatter; + +$formatter = new TrimFormatter(); + +echo $formatter->format(' hello world '); +// Outputs: "hello world" +``` + +### Trim Specific Side + +```php +use Respect\StringFormatter\TrimFormatter; + +$formatter = new TrimFormatter('left'); + +echo $formatter->format(' hello '); +// Outputs: "hello " + +$formatterRight = new TrimFormatter('right'); + +echo $formatterRight->format(' hello '); +// Outputs: " hello" +``` + +### Custom Mask + +```php +use Respect\StringFormatter\TrimFormatter; + +$formatter = new TrimFormatter('both', '-._'); + +echo $formatter->format('---hello---'); +// Outputs: "hello" + +echo $formatter->format('._hello_._'); +// Outputs: "hello" +``` + +### Unicode Characters + +```php +use Respect\StringFormatter\TrimFormatter; + +// CJK full-width spaces are trimmed by default +$formatter = new TrimFormatter(); + +echo $formatter->format(' hello世界 '); +// Outputs: "hello世界" + +// Trim emoji with custom mask +$formatterEmoji = new TrimFormatter('both', '😊'); + +echo $formatterEmoji->format('😊hello😊'); +// Outputs: "hello" +``` + +## API + +### `TrimFormatter::__construct` + +- `__construct(string $side = "both", string|null $mask = null)` + +Creates a new trim formatter instance. + +**Parameters:** + +- `$side`: Which side(s) to trim: "left", "right", or "both" (default: "both") +- `$mask`: The characters to trim from the string edges, or `null` for default Unicode whitespace (default: `null`) + +**Throws:** `InvalidFormatterException` when `$side` is not "left", "right", or "both" + +### `format` + +- `format(string $input): string` + +Removes characters from the specified side(s) of the input string. + +**Parameters:** + +- `$input`: The string to trim + +**Returns:** The trimmed string + +## Examples + +| Side | Mask | Input | Output | Description | +| --------- | -------------- | --------------- | ------------ | ----------------------------------- | +| `"both"` | `null` | `" hello "` | `"hello"` | Trim default whitespace both sides | +| `"left"` | `null` | `" hello "` | `"hello "` | Trim default whitespace left only | +| `"right"` | `null` | `" hello "` | `" hello"` | Trim default whitespace right only | +| `"both"` | `"-"` | `"---hello---"` | `"hello"` | Trim hyphens from both sides | +| `"both"` | `"-._"` | `"-._hello_.-"` | `"hello"` | Trim multiple custom characters | +| `"left"` | `":"` | `":::hello:::"` | `"hello:::"` | Trim colons from left only | +| `"both"` | `null` | `" hello"` | `"hello"` | CJK space trimmed by default | +| `"both"` | `"😊"` | `"😊hello😊"` | `"hello"` | Trim emoji with custom mask | + +## Notes + +- Uses PHP's `mb_trim`, `mb_ltrim`, and `mb_rtrim` functions for multibyte-safe trimming +- Fully UTF-8 aware - handles all Unicode scripts including CJK, emoji, and complex characters +- Empty strings return empty strings +- If the mask is empty or contains no characters present in the input, the string is returned unchanged +- Trimming operations are character-oriented, not byte-oriented + +### Default Mask + +When no mask is provided (`null`), the formatter uses `mb_trim`'s default which includes all Unicode whitespace characters: + +**ASCII whitespace:** +- ` ` (U+0020): Ordinary space +- `\t` (U+0009): Tab +- `\n` (U+000A): New line (line feed) +- `\r` (U+000D): Carriage return +- `\0` (U+0000): NUL-byte +- `\v` (U+000B): Vertical tab +- `\f` (U+000C): Form feed + +**Unicode whitespace:** +- U+00A0: No-break space +- U+1680: Ogham space mark +- U+2000–U+200A: Various width spaces (en quad, em quad, en space, em space, etc.) +- U+2028: Line separator +- U+2029: Paragraph separator +- U+202F: Narrow no-break space +- U+205F: Medium mathematical space +- U+3000: Ideographic space (CJK full-width space) +- U+0085: Next line (NEL) +- U+180E: Mongolian vowel separator + +See [mb_trim documentation](https://www.php.net/manual/en/function.mb-trim.php) for the complete list. diff --git a/src/Mixin/Builder.php b/src/Mixin/Builder.php index 539fd0d..3396c04 100644 --- a/src/Mixin/Builder.php +++ b/src/Mixin/Builder.php @@ -18,30 +18,33 @@ interface Builder { public static function area(string $unit): FormatterBuilder; + public static function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public static function imperialArea(string $unit): FormatterBuilder; public static function imperialLength(string $unit): FormatterBuilder; public static function imperialMass(string $unit): FormatterBuilder; - public static function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; - public static function mask(string $range, string $replacement = '*'): FormatterBuilder; public static function metric(string $unit): FormatterBuilder; + public static function metricMass(string $unit): FormatterBuilder; + public static function number( int $decimals = 0, string $decimalSeparator = '.', string $thousandsSeparator = ',', ): FormatterBuilder; - public static function metricMass(string $unit): FormatterBuilder; - public static function pattern(string $pattern): FormatterBuilder; /** @param array $parameters */ public static function placeholder(array $parameters): FormatterBuilder; public static function time(string $unit): FormatterBuilder; + + /** @param 'both'|'left'|'right' $side */ + public static function trim(string $side = 'both', string $mask = " \t\n\r\0\x0B"): FormatterBuilder; } diff --git a/src/Mixin/Chain.php b/src/Mixin/Chain.php index 780ba0b..4fc20c8 100644 --- a/src/Mixin/Chain.php +++ b/src/Mixin/Chain.php @@ -18,30 +18,33 @@ interface Chain extends Formatter { public function area(string $unit): FormatterBuilder; + public function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public function imperialArea(string $unit): FormatterBuilder; public function imperialLength(string $unit): FormatterBuilder; public function imperialMass(string $unit): FormatterBuilder; - public function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; - public function mask(string $range, string $replacement = '*'): FormatterBuilder; public function metric(string $unit): FormatterBuilder; + public function metricMass(string $unit): FormatterBuilder; + public function number( int $decimals = 0, string $decimalSeparator = '.', string $thousandsSeparator = ',', ): FormatterBuilder; - public function metricMass(string $unit): FormatterBuilder; - public function pattern(string $pattern): FormatterBuilder; /** @param array $parameters */ public function placeholder(array $parameters): FormatterBuilder; public function time(string $unit): FormatterBuilder; + + /** @param 'both'|'left'|'right' $side */ + public function trim(string $side = 'both', string $mask = " \t\n\r\0\x0B"): FormatterBuilder; } diff --git a/src/TrimFormatter.php b/src/TrimFormatter.php new file mode 100644 index 0000000..7d8470a --- /dev/null +++ b/src/TrimFormatter.php @@ -0,0 +1,53 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter; + +use function in_array; +use function mb_ltrim; +use function mb_rtrim; +use function mb_trim; +use function sprintf; + +/** + * Trims characters from strings using multibyte-safe functions. + * + * When no mask is provided, trims all Unicode whitespace characters including: + * regular space, tab, newline, carriage return, vertical tab, form feed, + * no-break space (U+00A0), em space (U+2003), ideographic space (U+3000), and others. + * + * @see https://www.php.net/manual/en/function.mb-trim.php + */ +final readonly class TrimFormatter implements Formatter +{ + /** + * @param 'both'|'left'|'right' $side Which side(s) to trim + * @param string|null $mask Characters to trim, or null for default Unicode whitespace + */ + public function __construct( + private string $side = 'both', + private string|null $mask = null, + ) { + if (!in_array($this->side, ['left', 'right', 'both'], true)) { + throw new InvalidFormatterException( + sprintf('Invalid side "%s". Must be "left", "right", or "both".', $this->side), + ); + } + } + + public function format(string $input): string + { + return match ($this->side) { + 'left' => mb_ltrim($input, $this->mask), + 'right' => mb_rtrim($input, $this->mask), + default => mb_trim($input, $this->mask), + }; + } +} diff --git a/tests/Unit/TrimFormatterTest.php b/tests/Unit/TrimFormatterTest.php new file mode 100644 index 0000000..e514928 --- /dev/null +++ b/tests/Unit/TrimFormatterTest.php @@ -0,0 +1,300 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter\Test\Unit; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use Respect\StringFormatter\InvalidFormatterException; +use Respect\StringFormatter\TrimFormatter; + +#[CoversClass(TrimFormatter::class)] +final class TrimFormatterTest extends TestCase +{ + #[Test] + #[DataProvider('providerForValidFormattedString')] + public function testShouldTrimString( + string $input, + string $expected, + string $side = 'both', + string|null $mask = null, + ): void { + // @phpstan-ignore argument.type + $formatter = new TrimFormatter($side, $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForLeftTrim')] + public function testShouldTrimLeft(string $input, string $expected, string|null $mask = null): void + { + $formatter = new TrimFormatter('left', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForRightTrim')] + public function testShouldTrimRight(string $input, string $expected, string|null $mask = null): void + { + $formatter = new TrimFormatter('right', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForBothTrim')] + public function testShouldTrimBoth(string $input, string $expected, string|null $mask = null): void + { + $formatter = new TrimFormatter('both', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + public function testShouldHandleEmptyString(): void + { + $formatter = new TrimFormatter(); + + $actual = $formatter->format(''); + + self::assertSame('', $actual); + } + + #[Test] + public function testShouldThrowExceptionForInvalidSide(): void + { + $this->expectException(InvalidFormatterException::class); + $this->expectExceptionMessage('Invalid side "middle"'); + + // @phpstan-ignore argument.type + new TrimFormatter('middle'); + } + + #[Test] + #[DataProvider('providerForUnicode')] + public function testShouldHandleUnicodeCharacters(string $input, string $expected, string $mask): void + { + $formatter = new TrimFormatter('both', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForEmoji')] + public function testShouldHandleEmoji(string $input, string $expected, string $mask): void + { + $formatter = new TrimFormatter('both', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForCustomMask')] + public function testShouldHandleCustomMask(string $input, string $expected, string $mask): void + { + $formatter = new TrimFormatter('both', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForSpecialChars')] + public function testShouldHandleSpecialCharactersInMask(string $input, string $expected, string $mask): void + { + $formatter = new TrimFormatter('both', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMultiByte')] + public function testShouldHandleMultiByteCharacters(string $input, string $expected, string|null $mask = null): void + { + $formatter = new TrimFormatter('both', $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForEdgeCases')] + public function testShouldHandleEdgeCases(string $input, string $expected, string $side, string $mask): void + { + // @phpstan-ignore argument.type + $formatter = new TrimFormatter($side, $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + /** @return array */ + public static function providerForValidFormattedString(): array + { + return [ + 'whitespace both sides' => [' hello ', 'hello'], + 'tab both sides' => ["\thello\t", 'hello'], + 'newline both sides' => ["\nhello\n", 'hello'], + 'mixed whitespace' => [" \t\n hello \t\n", 'hello'], + 'already trimmed' => ['hello', 'hello'], + 'only spaces' => [' ', ''], + 'no characters in mask' => ['hello', 'hello', 'both', 'xyz'], + 'all characters to trim' => [' !!! ', '!!!', 'both', ' '], + // Unicode whitespace (trimmed by default with mb_trim) + 'ideographic space' => ["\u{3000}hello\u{3000}", 'hello'], + 'em space' => ["\u{2003}hello\u{2003}", 'hello'], + 'no-break space' => ["\u{00A0}hello\u{00A0}", 'hello'], + 'thin space' => ["\u{2009}hello\u{2009}", 'hello'], + 'mixed unicode whitespace' => ["\u{3000}\u{2003} hello \u{00A0}\u{2009}", 'hello'], + ]; + } + + /** @return array */ + public static function providerForLeftTrim(): array + { + return [ + 'spaces left' => [' hello', 'hello'], + 'spaces right not trimmed' => ['hello ', 'hello '], + 'spaces left and right' => [' hello ', 'hello '], + 'tabs left' => ["\thello\t", "hello\t"], + 'mixed whitespace left' => ["\t\n hello world", 'hello world'], + ]; + } + + /** @return array */ + public static function providerForRightTrim(): array + { + return [ + 'spaces right' => ['hello ', 'hello'], + 'spaces left not trimmed' => [' hello', ' hello'], + 'spaces left and right' => [' hello ', ' hello'], + 'tabs right' => ["\thello\t", "\thello"], + 'mixed whitespace right' => ["hello world \t", 'hello world'], + ]; + } + + /** @return array */ + public static function providerForBothTrim(): array + { + return [ + 'spaces both' => [' hello ', 'hello'], + 'tabs both' => ["\thello\t", 'hello'], + 'newlines both' => ["\nhello\n", 'hello'], + 'mixed whitespace' => [" \t\n hello \t\n ", 'hello'], + 'single space' => [' hello ', 'hello'], + // Unicode whitespace (trimmed by default with mb_trim) + 'ideographic space both' => ["\u{3000}hello\u{3000}", 'hello'], + 'narrow no-break space' => ["\u{202F}hello \u{202F}", 'hello'], + ]; + } + + /** @return array */ + public static function providerForUnicode(): array + { + return [ + // Non-whitespace Unicode characters require explicit mask + 'latin accented chars' => ['éééhelloééé', 'hello', 'é'], + 'greek letters' => ['αααhelloααα', 'hello', 'α'], + 'cyrillic letters' => ['бббhelloббб', 'hello', 'б'], + 'arabic letters' => ['مرحبا', 'ا', 'مرحب'], + 'chinese characters' => ['中中hello中中', 'hello', '中'], + 'japanese hiragana' => ['あああhelloあああ', 'hello', 'あ'], + ]; + } + + /** @return array */ + public static function providerForEmoji(): array + { + return [ + 'smiley faces' => ['😊😊hello😊😊', 'hello', '😊'], + 'mixed emoji' => ['👋👋hi👋👋', 'hi', '👋'], + 'hearts' => ['❤️❤️love❤️❤️', 'love', '❤️'], + ]; + } + + /** @return array */ + public static function providerForCustomMask(): array + { + return [ + 'custom characters' => ['---hello---', 'hello', '-'], + 'multiple custom chars' => ['-._hello-._', 'hello', '_.-'], + 'dots' => ['...hello...', 'hello', '.'], + 'underscores' => ['___hello___', 'hello', '_'], + 'mixed custom' => ['*-+hello+-*', 'hello', '+-*'], + ]; + } + + /** @return array */ + public static function providerForSpecialChars(): array + { + return [ + 'dash' => ['--hello--', 'hello', '-'], + 'asterisk' => ['**hello**', 'hello', '*'], + 'dot' => ['..hello..', 'hello', '.'], + 'dollar sign' => ['$$hello$$', 'hello', '$'], + 'caret' => ['^^hello^^', 'hello', '^'], + 'pipe' => ['||hello||', 'hello', '|'], + 'question mark' => ['??hello??', 'hello', '?'], + 'multiple special' => ['@#$hello$#@', 'hello', '@#$'], + ]; + } + + /** @return array */ + public static function providerForMultiByte(): array + { + return [ + // Ideographic space (U+3000) is trimmed by default with mb_trim + 'chinese with ideographic space' => [' 你好 ', '你好'], + 'japanese with ideographic space' => [' こんにちは ', 'こんにちは'], + 'korean with ideographic space' => [' 안녕하세요 ', '안녕하세요'], + // Custom mask for non-whitespace multibyte chars + 'fullwidth letters with custom mask' => ['aaahelloaaa', 'hello', 'a'], + 'mixed cjk and ascii' => [' hello 你好 ', 'hello 你好'], + ]; + } + + /** @return array */ + public static function providerForEdgeCases(): array + { + return [ + 'empty string' => ['', '', 'both', ' '], + 'string shorter than mask' => ['a', '', 'both', 'abcdef'], + 'all characters trimmed' => ['--', '', 'both', '-'], + 'only one side trimmed left' => ['--a', 'a', 'left', '-'], + 'only one side trimmed right' => ['a--', 'a', 'right', '-'], + 'no characters to trim' => ['hello', 'hello', 'both', 'xyz'], + 'mask longer than string' => ['hello', 'hello', 'both', 'abcdefgzij'], + 'empty mask' => ['hello', 'hello', 'both', ''], + 'repeated characters' => ['aaaaahelloaaaaa', 'hello', 'both', 'a'], + 'interleaved characters' => ['ababhelloabab', 'hello', 'both', 'ab'], + ]; + } +}