Skip to content

Commit 985661d

Browse files
committed
Draft impl of idn-hostname validator. Add libraries to simplify work with unicode
1 parent 02261d9 commit 985661d

File tree

7 files changed

+129
-3
lines changed

7 files changed

+129
-3
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,14 @@ You can see the results in the latest workflow execution.
389389
The update to Kotlin 1.9.22 came with an issue for JS incremental compilation.
390390
In case you see an error about main function that already bind please execute `clean` task.
391391

392+
When you build project for **linux** target you might get an error about missing native library.
393+
This is because `de.cketti.unicode:kotlin-codepoints` requires this library to perform string normalization.
394+
This is needed to support `idn-hostname` format. Install this library with the following command:
395+
396+
```bash
397+
sudo apt-get install -y libunistring-dev
398+
```
399+
392400
### Devcontainer
393401

394402
Devcontainers is a cool feature. However, by default in Codespaces and DevPod you will use [VS Code](https://code.visualstudio.com/).

build.gradle.kts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ kotlin {
7777
dependencies {
7878
api(libs.kotlin.serialization.json)
7979
implementation(libs.uri)
80+
implementation("de.cketti.unicode:kotlin-codepoints:0.7.0") {
81+
because("simplifies work with unicode codepoints")
82+
}
83+
implementation("com.doist.x:normalize:1.0.5") {
84+
because("provides normalization required by IDN-hostname format")
85+
}
8086
}
8187
}
8288
commonTest {

src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import io.github.optimumcode.json.schema.internal.formats.DateFormatValidator
1717
import io.github.optimumcode.json.schema.internal.formats.DateTimeFormatValidator
1818
import io.github.optimumcode.json.schema.internal.formats.DurationFormatValidator
1919
import io.github.optimumcode.json.schema.internal.formats.HostnameFormatValidator
20+
import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator
2021
import io.github.optimumcode.json.schema.internal.formats.IpV4FormatValidator
2122
import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator
2223
import io.github.optimumcode.json.schema.internal.formats.JsonPointerFormatValidator
@@ -70,6 +71,7 @@ internal sealed class FormatAssertionFactory(
7071
"ipv6" to IpV6FormatValidator,
7172
"uuid" to UuidFormatValidator,
7273
"hostname" to HostnameFormatValidator,
74+
"idn-hostname" to IdnHostnameFormatValidator,
7375
)
7476
}
7577
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package io.github.optimumcode.json.schema.internal.formats
2+
3+
import de.cketti.codepoints.codePointAt
4+
import de.cketti.codepoints.codePointBefore
5+
import io.github.optimumcode.json.schema.FormatValidationResult
6+
import io.github.optimumcode.json.schema.FormatValidator
7+
import io.github.optimumcode.json.schema.internal.hostname.Normalizer
8+
import io.github.optimumcode.json.schema.internal.hostname.Punycode
9+
import kotlin.text.CharCategory.COMBINING_SPACING_MARK
10+
import kotlin.text.CharCategory.ENCLOSING_MARK
11+
import kotlin.text.CharCategory.NON_SPACING_MARK
12+
13+
internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
14+
override fun validate(value: String): FormatValidationResult {
15+
if (value.isEmpty()) {
16+
return FormatValidator.Invalid()
17+
}
18+
if (value.length == 1 && isLabelSeparator(value[0])) {
19+
return FormatValidator.Valid()
20+
}
21+
var pointer = 0
22+
while (pointer < value.length) {
23+
val dot = findDot(value, pointer)
24+
val label = value.substring(pointer, dot)
25+
if (!isValidLabel(label)) {
26+
return FormatValidator.Invalid()
27+
}
28+
pointer = dot + 1
29+
}
30+
return FormatValidator.Valid()
31+
}
32+
33+
private fun isValidLabel(label: String): Boolean {
34+
val unicode =
35+
if (isACE(label)) {
36+
Punycode.decode(label) ?: return false
37+
} else {
38+
label
39+
}
40+
41+
if (!Normalizer.isNormalized(unicode)) {
42+
return false
43+
}
44+
45+
// https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
46+
if (unicode[0] == '-' || unicode.codePointBefore(unicode.length) == '-'.code) {
47+
// cannot start or end with hyphen
48+
return false
49+
}
50+
51+
if (unicode.length >= 4 && hasTwoConsecutiveHyphens(unicode)) {
52+
// cannot have to consecutive hyphens at 3 and 4 char position
53+
return false
54+
}
55+
56+
val firstCodePoint = unicode.codePointAt(0)
57+
// https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
58+
if (isLeadingCombiningMark(firstCodePoint)) {
59+
return false
60+
}
61+
62+
// TODO: check common rules and specific depending on char direction
63+
// TODO: encode using Punycode and check length
64+
65+
return true
66+
}
67+
68+
private fun isLeadingCombiningMark(codePoint: Int): Boolean =
69+
// I am not sure if this is correct
70+
// code point might be converted to the char incorrectly
71+
// need to have more tests on this
72+
// TODO: add tests with code points greater than Char.MAX_VALUE
73+
when (codePoint.toChar().category) {
74+
NON_SPACING_MARK,
75+
COMBINING_SPACING_MARK,
76+
ENCLOSING_MARK,
77+
-> true
78+
79+
else -> false
80+
}
81+
82+
private fun hasTwoConsecutiveHyphens(value: String): Boolean =
83+
value.codePointAt(2) == '-'.code && value.codePointAt(3) == '-'.code
84+
85+
private fun isACE(label: String): Boolean =
86+
label.length > Punycode.PREFIX_SIZE && label.startsWith(Punycode.PREFIX_STRING)
87+
88+
private fun isLabelSeparator(c: Char): Boolean = c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61'
89+
90+
private fun findDot(
91+
value: String,
92+
startIndex: Int,
93+
): Int {
94+
for (i in startIndex until value.length) {
95+
if (isLabelSeparator(value[i])) {
96+
return i
97+
}
98+
}
99+
return value.length
100+
}
101+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package io.github.optimumcode.json.schema.internal.hostname
2+
3+
import doist.x.normalize.Form
4+
import doist.x.normalize.normalize
5+
6+
internal object Normalizer {
7+
fun isNormalized(label: String): Boolean {
8+
return label.normalize(Form.NFC) == label
9+
}
10+
}

src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/hostname/Punycode.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ package io.github.optimumcode.json.schema.internal.hostname
1010
*/
1111
@Suppress("detekt:MagicNumber")
1212
internal object Punycode {
13-
private const val PREFIX_STRING = "xn--"
14-
private const val PREFIX_SIZE = PREFIX_STRING.length
13+
const val PREFIX_STRING = "xn--"
14+
const val PREFIX_SIZE = PREFIX_STRING.length
1515
private const val MIN_SUPPLEMENTARY_CODE_POINT = 0x010000
1616
private const val MAX_CODE_POINT = 0x10FFFF
1717

test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ internal val COMMON_FORMAT_FILTER =
4848
mapOf(
4949
"email" to emptySet(),
5050
"idn-email" to emptySet(),
51-
"idn-hostname" to emptySet(),
5251
"iri" to emptySet(),
5352
"iri-reference" to emptySet(),
5453
"regex" to emptySet(),

0 commit comments

Comments
 (0)