Skip to content

Commit e94509e

Browse files
committed
Initial Commit
1 parent 0a885ba commit e94509e

22 files changed

+2181
-1
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
.DS_Store

AQPython/Annotation.py

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from pyspark.sql.types import StructType
2+
from pyspark.sql.types import StructField
3+
from pyspark.sql.types import StringType
4+
from pyspark.sql.types import MapType
5+
from pyspark.sql.types import LongType
6+
from pyspark.sql.types import ArrayType
7+
8+
def AQSchema():
9+
"""AQAnnotation Schema.
10+
Document Id (such as PII)
11+
Annotation set (such as scnlp, ge)
12+
Annotation type (such as text, sentence)
13+
Starting offset for the annotation (based on the text file for the document)
14+
Ending offset for the annotation (based on the text file for the document)
15+
Annotation Id (after the annotations have been reordered)
16+
Contains any attributes such as exclude annotations, original annotation id, parent id, etc. Stored as a map.
17+
"""
18+
return StructType([StructField('docId', StringType(), False),
19+
StructField('annotSet', StringType(), False),
20+
StructField('annotType', StringType(), False),
21+
StructField('startOffset', LongType(), False),
22+
StructField('endOffset', LongType(), False),
23+
StructField('annotId', LongType(), False),
24+
StructField('properties', MapType(StringType(), StringType()), True)])
25+
26+
def AQSchemaList():
27+
"""Schema used for Preceding and Following functions.
28+
"""
29+
return StructType([StructField('annot', AQSchema(), False),
30+
StructField('annots',ArrayType(AQSchema(),True),True)])
31+
32+
def CATSchema():
33+
"""CATAnnotation Schema.
34+
Document Id (such as PII)
35+
Annotation set (such as scnlp, ge)
36+
Annotation type (such as text, sentence)
37+
Starting offset for the annotation (based on the text file for the document)
38+
Ending offset for the annotation (based on the text file for the document)
39+
Annotation Id (after the annotations have been reordered)
40+
Other contains any attributes such as exclude annotations, original annotation id, parent id, etc. Stored as a name-value & delimited string.
41+
"""
42+
return StructType([StructField('docId', StringType(), False),
43+
StructField('annotSet', StringType(), False),
44+
StructField('annotType', StringType(), False),
45+
StructField('startOffset', LongType(), False),
46+
StructField('endOffset', LongType(), False),
47+
StructField('annotId', LongType(), False),
48+
StructField('other', StringType(), True)])

AQPython/Concordancers.py

+371
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)