1
+ from pyspark .sql .types import StructType
2
+ from pyspark .sql .types import StructField
3
+ from pyspark .sql .types import StringType
4
+ from pyspark .sql .types import MapType
5
+ from pyspark .sql .types import LongType
6
+ from pyspark .sql .types import ArrayType
7
+
8
+ def AQSchema ():
9
+ """AQAnnotation Schema.
10
+ Document Id (such as PII)
11
+ Annotation set (such as scnlp, ge)
12
+ Annotation type (such as text, sentence)
13
+ Starting offset for the annotation (based on the text file for the document)
14
+ Ending offset for the annotation (based on the text file for the document)
15
+ Annotation Id (after the annotations have been reordered)
16
+ Contains any attributes such as exclude annotations, original annotation id, parent id, etc. Stored as a map.
17
+ """
18
+ return StructType ([StructField ('docId' , StringType (), False ),
19
+ StructField ('annotSet' , StringType (), False ),
20
+ StructField ('annotType' , StringType (), False ),
21
+ StructField ('startOffset' , LongType (), False ),
22
+ StructField ('endOffset' , LongType (), False ),
23
+ StructField ('annotId' , LongType (), False ),
24
+ StructField ('properties' , MapType (StringType (), StringType ()), True )])
25
+
26
+ def AQSchemaList ():
27
+ """Schema used for Preceding and Following functions.
28
+ """
29
+ return StructType ([StructField ('annot' , AQSchema (), False ),
30
+ StructField ('annots' ,ArrayType (AQSchema (),True ),True )])
31
+
32
+ def CATSchema ():
33
+ """CATAnnotation Schema.
34
+ Document Id (such as PII)
35
+ Annotation set (such as scnlp, ge)
36
+ Annotation type (such as text, sentence)
37
+ Starting offset for the annotation (based on the text file for the document)
38
+ Ending offset for the annotation (based on the text file for the document)
39
+ Annotation Id (after the annotations have been reordered)
40
+ Other contains any attributes such as exclude annotations, original annotation id, parent id, etc. Stored as a name-value & delimited string.
41
+ """
42
+ return StructType ([StructField ('docId' , StringType (), False ),
43
+ StructField ('annotSet' , StringType (), False ),
44
+ StructField ('annotType' , StringType (), False ),
45
+ StructField ('startOffset' , LongType (), False ),
46
+ StructField ('endOffset' , LongType (), False ),
47
+ StructField ('annotId' , LongType (), False ),
48
+ StructField ('other' , StringType (), True )])
0 commit comments