Skip to content

Commit f9b295b

Browse files
author
Arun Tejasvi Chaganty
committed
Fixed packaging and style issues
1 parent 09bbd1d commit f9b295b

File tree

4 files changed

+37
-22
lines changed

4 files changed

+37
-22
lines changed

MANIFEST.in

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Include the license file
2-
include LICENSE.txt
2+
include *.md
3+
include LICENSE
34

45
# Include the data files
5-
recursive-include data *
6+
recursive-include corenlp_protobuf *.py
7+
recursive-include doc *.proto

corenlp_protobuf/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from google.protobuf.internal.decoder import _DecodeVarint
44
from .CoreNLP_pb2 import *
55

6+
67
def parseFromDelimitedString(obj, buf, offset=0):
78
"""
89
Stanford CoreNLP uses the Java "writeDelimitedTo" function, which
@@ -15,9 +16,11 @@ def parseFromDelimitedString(obj, buf, offset=0):
1516
obj.ParseFromString(buf[offset+pos:offset+pos+size])
1617
return pos+size
1718

19+
1820
def to_text(sentence):
1921
"""
20-
Helper routine that converts a Sentence protobuf to a string from its tokens.
22+
Helper routine that converts a Sentence protobuf to a string from
23+
its tokens.
2124
"""
2225
text = ""
2326
for i, tok in enumerate(sentence.token):

tests/test_read.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,45 @@
99

1010
import os
1111
from pytest import fixture
12-
from corenlp_protobuf import Document, Sentence, Token, DependencyGraph, CorefChain
12+
from corenlp_protobuf import Document, Sentence, Token, DependencyGraph,\
13+
CorefChain
1314
from corenlp_protobuf import parseFromDelimitedString, to_text
1415

1516

1617
# Thext that was annotated
1718
TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"
1819

20+
1921
@fixture
2022
def doc_pb():
21-
test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')
23+
test_dir = os.path.dirname(os.path.abspath(__file__))
24+
test_data = os.path.join(test_dir, 'data', 'test.dat')
2225
with open(test_data, 'rb') as f:
2326
buf = f.read()
2427
doc = Document()
2528
parseFromDelimitedString(doc, buf)
2629
return doc
2730

31+
2832
def test_parse_protobuf(doc_pb):
2933
assert doc_pb.ByteSize() == 4239
3034

35+
3136
def test_document_text(doc_pb):
3237
assert doc_pb.text == TEXT
3338

39+
3440
def test_sentences(doc_pb):
3541
assert len(doc_pb.sentence) == 1
3642

3743
sentence = doc_pb.sentence[0]
3844
assert isinstance(sentence, Sentence)
39-
assert sentence.characterOffsetEnd - sentence.characterOffsetBegin # Sentence length == 67
40-
assert sentence.text == '' # Note that the sentence text should actually be recovered from the tokens.
41-
assert to_text(sentence) == TEXT[:-1] # Note that the sentence text should actually be recovered from the tokens.
45+
# check sentence length
46+
assert sentence.characterOffsetEnd - sentence.characterOffsetBegin == 67
47+
# Note that the sentence text should actually be recovered from the tokens.
48+
assert sentence.text == ''
49+
assert to_text(sentence) == TEXT[:-1]
50+
4251

4352
def test_tokens(doc_pb):
4453
sentence = doc_pb.sentence[0]
@@ -54,25 +63,26 @@ def test_tokens(doc_pb):
5463
# Lemma
5564
lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()
5665
lemmas_ = [t.lemma for t in tokens]
57-
assert lemmas_ == lemmas
66+
assert lemmas_ == lemmas
5867

5968
# POS
6069
pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()
6170
pos_ = [t.pos for t in tokens]
62-
assert pos_ == pos
71+
assert pos_ == pos
6372

6473
# NER
6574
ner = "PERSON O O O O O O O O ORGANIZATION O O".split()
6675
ner_ = [t.ner for t in tokens]
67-
assert ner_ == ner
76+
assert ner_ == ner
6877

6978
# character offsets
7079
begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]
7180
end = [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]
7281
begin_ = [t.beginChar for t in tokens]
7382
end_ = [t.endChar for t in tokens]
74-
assert begin_ == begin
75-
assert end_ == end
83+
assert begin_ == begin
84+
assert end_ == end
85+
7686

7787
def test_dependency_parse(doc_pb):
7888
"""
@@ -89,7 +99,7 @@ def test_dependency_parse(doc_pb):
8999
tree = sentence.enhancedPlusPlusDependencies
90100
isinstance(tree, DependencyGraph)
91101
# Indices are 1-indexd with 0 being the "pseudo root"
92-
assert tree.root # 'wrote' is the root. == [2]
102+
assert tree.root # 'wrote' is the root. == [2]
93103
# There are as many nodes as there are tokens.
94104
assert len(tree.node) == len(sentence.token)
95105

@@ -104,6 +114,7 @@ def test_dependency_parse(doc_pb):
104114
assert edge.target == 1
105115
assert edge.dep == "nsubj"
106116

117+
107118
def test_coref_chain(doc_pb):
108119
"""
109120
Extract the corefence chains from the annotation.
@@ -113,15 +124,15 @@ def test_coref_chain(doc_pb):
113124
chains = doc_pb.corefChain
114125

115126
# In this document there is 1 chain with Chris and he.
116-
assert len(chains) == 1
127+
assert len(chains) == 1
117128
chain = chains[0]
118129
assert isinstance(chain, CorefChain)
119-
assert chain.mention[0].beginIndex == 0 # 'Chris'
130+
assert chain.mention[0].beginIndex == 0 # 'Chris'
120131
assert chain.mention[0].endIndex == 1
121132
assert chain.mention[0].gender == "MALE"
122133

123-
assert chain.mention[1].beginIndex == 6 # 'he'
134+
assert chain.mention[1].beginIndex == 6 # 'he'
124135
assert chain.mention[1].endIndex == 7
125136
assert chain.mention[1].gender == "MALE"
126137

127-
assert chain.representative == 0 # The head of the chain is 'Chris'
138+
assert chain.representative == 0 # Head of the chain is 'Chris'

tox.ini

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,21 @@
1111
# and also to help confirm pull requests to this project.
1212

1313
[tox]
14-
envlist = py{27,33,34}
14+
envlist = py{27,33,35}
1515

1616
[testenv]
1717
basepython =
1818
py27: python2.7
1919
py33: python3.3
20-
py34: python3.4
20+
py35: python3.5
2121
deps =
2222
check-manifest
2323
readme_renderer
24-
flake8
2524
pytest
25+
protobuf
2626
commands =
2727
check-manifest --ignore tox.ini,tests*
2828
python setup.py check -m -r -s
29-
flake8 .
3029
py.test tests
3130
[flake8]
3231
exclude = .tox,*.egg,build,data

0 commit comments

Comments
 (0)