Skip to content

Commit db5aa15

Browse files
committed
Add filter for normalize label - Close #5
1 parent f930eaa commit db5aa15

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

src/Filter/NormalizeLabel.php

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?php
2+
3+
/**
4+
* @see https://github.com/open-code-modeling/php-filter for the canonical source repository
5+
* @copyright https://github.com/open-code-modeling/php-filter/blob/master/COPYRIGHT.md
6+
* @license https://github.com/open-code-modeling/php-filter/blob/master/LICENSE.md MIT License
7+
*/
8+
9+
declare(strict_types=1);
10+
11+
namespace OpenCodeModeling\Filter\Filter;
12+
13+
final class NormalizeLabel extends AbstractFilter
14+
{
15+
public function __invoke(string $value): string
16+
{
17+
// Special rule: Remove everything after the first horizontal line
18+
$matches = [];
19+
if (\preg_match('/<hr id="null"[^>]*>/', $value, $matches)) {
20+
return \explode($matches[0], $value)[0];
21+
}
22+
23+
// Remove all html tags and styles
24+
$normalizedName = \strip_tags(\html_entity_decode($value));
25+
26+
// Replace the decoded nbsp UTF-8 space with a "normal" space
27+
$normalizedName = \str_replace("\xc2\xa0", ' ', $normalizedName);
28+
29+
// Strip multi-spaces and tabs with a single space
30+
$normalizedName = \preg_replace(['/\s{2,}/', '/[\t\n]/'], ' ', $normalizedName);
31+
32+
return \trim($normalizedName);
33+
}
34+
}

0 commit comments

Comments
 (0)