Skip to content

Commit 408f1ac

Browse files
committed
chore: Test kTGHZ2013 field
1 parent 35b60cb commit 408f1ac

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

src/unihan_etl/validator.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Experimental pydantic models for unihan data."""
2+
import typing as t
3+
4+
import pydantic
5+
6+
from unihan_etl.expansion import expand_kTGHZ2013
7+
8+
9+
class UCNBaseModel(pydantic.BaseModel):
10+
"""Core model for UCN data."""
11+
12+
ucn: str
13+
14+
15+
class kTGHZ2013Location(pydantic.BaseModel):
16+
"""Core model for location."""
17+
18+
page: int
19+
position: int
20+
entry_type: int = pydantic.Field(
21+
description=(
22+
"0 for a main entry and greater than 0 for a parenthesized or bracketed "
23+
+ "variant of the main entry"
24+
)
25+
)
26+
27+
28+
class kTGHZ2013Reading(pydantic.BaseModel):
29+
"""kTGHZ2013 model."""
30+
31+
reading: str
32+
locations: t.List[kTGHZ2013Location]
33+
34+
35+
class kTGHZ2013(UCNBaseModel):
36+
"""kTGHZ2013 model."""
37+
38+
readings: t.List[kTGHZ2013Reading]
39+
40+
model_config = pydantic.ConfigDict(
41+
validate_assignment=True,
42+
arbitrary_types_allowed=True,
43+
)
44+
45+
@classmethod
46+
def from_string(cls, value: str) -> "kTGHZ2013":
47+
"""Accept csv valdation from UNIHAN."""
48+
if isinstance(value, str):
49+
ucn, field, val = value.split("\t")
50+
outs = expand_kTGHZ2013(val.split(" "))
51+
52+
return cls(
53+
ucn=ucn,
54+
readings=[
55+
kTGHZ2013Reading(
56+
reading=out["reading"],
57+
locations=[
58+
kTGHZ2013Location(
59+
page=loc["page"],
60+
position=loc["position"],
61+
entry_type=loc["entry_type"],
62+
)
63+
for loc in out["locations"]
64+
],
65+
)
66+
for out in outs
67+
],
68+
)
69+
elif isinstance(value, dict):
70+
return pydantic.parse_obj_as(cls, value)
71+
raise pydantic.ValidationError("Invalid input for kTGHZ2013 model.") # noqa: TRY003

tests/test_validator.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""Test expansion of multi-value fields in UNIHAN."""
2+
import typing as t
3+
4+
from unihan_etl import validator
5+
6+
if t.TYPE_CHECKING:
7+
pass
8+
9+
10+
def test_kTGHZ2013() -> None:
11+
"""Example of kTGHZ2013 being parsed via pydantic."""
12+
model = validator.kTGHZ2013.from_string("U+3447 kTGHZ2013 482.140:zhòu")
13+
assert model.ucn == "U+3447"
14+
15+
model = validator.kTGHZ2013.from_string(
16+
"U+4E07 kTGHZ2013 256.090:mò 379.160:wàn"
17+
)
18+
assert model.ucn == "U+4E07"
19+
assert model.readings[0].reading == "mò"
20+
assert model.readings[1].reading == "wàn"
21+
assert model.readings[1].locations[0] == validator.kTGHZ2013Location(
22+
page=379,
23+
position=16,
24+
entry_type=0,
25+
)
26+
27+
print(f"\n{model}\n")

0 commit comments

Comments
 (0)