Ported tests to simpler py.test framework

Arun Tejasvi Chaganty · Arun Tejasvi Chaganty · commit 09bbd1d497b2 · 2017-04-28T01:04:47.000-07:00
diff --git a/tests/test_read.py b/tests/test_read.py
@@ -8,121 +8,120 @@
 """
 
 import os
-import unittest
+from pytest import fixture
 from corenlp_protobuf import Document, Sentence, Token, DependencyGraph, CorefChain
 from corenlp_protobuf import parseFromDelimitedString, to_text
 
-class TestProtobuf(unittest.TestCase):
-    def setUp(self):
-        self.text = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"
-        test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')
-        with open(test_data, 'rb') as f:
-            self.buf = f.read()
-
-        self.doc = Document()
-        parseFromDelimitedString(self.doc, self.buf)
-
-    def test_parse_protobuf(self):
-        self.assertEqual(4239, self.doc.ByteSize(), "Could not read input correctly")
-
-    def test_document_text(self):
-        self.assertEqual(self.text, self.doc.text)
-
-    def test_sentences(self):
-        self.assertEqual(1, len(self.doc.sentence))
-
-        sentence = self.doc.sentence[0]
-        self.assertTrue(isinstance(sentence, Sentence))
-        self.assertEqual(67, sentence.characterOffsetEnd - sentence.characterOffsetBegin) # Sentence length
-        self.assertEqual('', sentence.text) # Note that the sentence text should actually be recovered from the tokens.
-        self.assertEqual(self.text[:-1], to_text(sentence)) # Note that the sentence text should actually be recovered from the tokens.
-
-    def test_tokens(self):
-        sentence = self.doc.sentence[0]
-        tokens = sentence.token
-        self.assertEqual(12, len(tokens))
-        self.assertTrue(isinstance(tokens[0], Token))
-
-        # Word
-        words = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP .".split()
-        words_ = [t.word for t in tokens]
-        self.assertEqual(words, words_)
-
-        # Lemma
-        lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()
-        lemmas_ = [t.lemma for t in tokens]
-        self.assertEqual(lemmas, lemmas_)
-
-        # POS
-        pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()
-        pos_ = [t.pos for t in tokens]
-        self.assertEqual(pos, pos_)
-
-        # NER
-        ner = "PERSON O O O O O O O O ORGANIZATION O O".split()
-        ner_ = [t.ner for t in tokens]
-        self.assertEqual(ner, ner_)
-
-        # character offsets
-        begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]
-        end =   [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]
-        begin_ = [t.beginChar for t in tokens]
-        end_ = [t.endChar for t in tokens]
-        self.assertEqual(begin, begin_)
-        self.assertEqual(end, end_)
-
-    def test_dependency_parse(self):
-        """
-        Extract the dependency parse from the annotation.
-        """
-        sentence = self.doc.sentence[0]
-
-        # You can choose from the following types of dependencies.
-        # In general, you'll want enhancedPlusPlus
-        self.assertTrue(sentence.basicDependencies.ByteSize() > 0)
-        self.assertTrue(sentence.enhancedDependencies.ByteSize() > 0)
-        self.assertTrue(sentence.enhancedPlusPlusDependencies.ByteSize() > 0)
-
-        tree = sentence.enhancedPlusPlusDependencies
-        self.assertTrue(isinstance(tree, DependencyGraph))
-        # Indices are 1-indexd with 0 being the "pseudo root"
-        self.assertEqual([2], tree.root) # 'wrote' is the root.
-        # There are as many nodes as there are tokens.
-        self.assertEqual(len(sentence.token), len(tree.node))
-
-        # Enhanced++ depdencies often contain additional edges and are
-        # not trees -- here, 'parsed' would also have an edge to
-        # 'sentence'
-        self.assertEqual(12, len(tree.edge))
-
-        # This edge goes from "wrote" to "Chirs"
-        edge = tree.edge[0]
-        self.assertEqual(2, edge.source)
-        self.assertEqual(1, edge.target)
-        self.assertEqual("nsubj", edge.dep)
-
-    def test_coref_chain(self):
-        """
-        Extract the corefence chains from the annotation.
-        """
-        # Coreference chains span sentences and are stored in the
-        # document.
-        chains = self.doc.corefChain
-
-        # In this document there is 1 chain with Chris and he.
-        self.assertEqual(1, len(chains))
-        chain = chains[0]
-        self.assertTrue(isinstance(chain, CorefChain))
-        self.assertEqual(0, chain.mention[0].beginIndex) # Starts at token 0 == 'Chris'
-        self.assertEqual(1, chain.mention[0].endIndex)
-        self.assertEqual("MALE", chain.mention[0].gender)
-
-        self.assertEqual(6, chain.mention[1].beginIndex) # Starts at token 6 == 'he'
-        self.assertEqual(7, chain.mention[1].endIndex)
-        self.assertEqual("MALE", chain.mention[1].gender)
-
-        self.assertEqual(0, chain.representative) # The head of the chain is 'Chris'
-
-
-if __name__ == "__main__":
-    unittest.main()
+
+# Thext that was annotated
+TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"
+
+@fixture
+def doc_pb():
+    test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')
+    with open(test_data, 'rb') as f:
+        buf = f.read()
+    doc = Document()
+    parseFromDelimitedString(doc, buf)
+    return doc
+
+def test_parse_protobuf(doc_pb):
+    assert doc_pb.ByteSize() == 4239
+
+def test_document_text(doc_pb):
+    assert doc_pb.text == TEXT
+
+def test_sentences(doc_pb):
+    assert len(doc_pb.sentence) == 1
+
+    sentence = doc_pb.sentence[0]
+    assert isinstance(sentence, Sentence)
+    assert sentence.characterOffsetEnd - sentence.characterOffsetBegin # Sentence length == 67
+    assert sentence.text == '' # Note that the sentence text should actually be recovered from the tokens.
+    assert to_text(sentence) == TEXT[:-1] # Note that the sentence text should actually be recovered from the tokens.
+
+def test_tokens(doc_pb):
+    sentence = doc_pb.sentence[0]
+    tokens = sentence.token
+    assert len(tokens) == 12
+    assert isinstance(tokens[0], Token)
+
+    # Word
+    words = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP .".split()
+    words_ = [t.word for t in tokens]
+    assert  words_ == words
+
+    # Lemma
+    lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()
+    lemmas_ = [t.lemma for t in tokens]
+    assert  lemmas_ == lemmas
+
+    # POS
+    pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()
+    pos_ = [t.pos for t in tokens]
+    assert  pos_ == pos
+
+    # NER
+    ner = "PERSON O O O O O O O O ORGANIZATION O O".split()
+    ner_ = [t.ner for t in tokens]
+    assert  ner_ == ner
+
+    # character offsets
+    begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]
+    end =   [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]
+    begin_ = [t.beginChar for t in tokens]
+    end_ = [t.endChar for t in tokens]
+    assert  begin_ == begin
+    assert  end_ == end
+
+def test_dependency_parse(doc_pb):
+    """
+    Extract the dependency parse from the annotation.
+    """
+    sentence = doc_pb.sentence[0]
+
+    # You can choose from the following types of dependencies.
+    # In general, you'll want enhancedPlusPlus
+    assert sentence.basicDependencies.ByteSize() > 0
+    assert sentence.enhancedDependencies.ByteSize() > 0
+    assert sentence.enhancedPlusPlusDependencies.ByteSize() > 0
+
+    tree = sentence.enhancedPlusPlusDependencies
+    isinstance(tree, DependencyGraph)
+    # Indices are 1-indexd with 0 being the "pseudo root"
+    assert  tree.root # 'wrote' is the root. == [2]
+    # There are as many nodes as there are tokens.
+    assert len(tree.node) == len(sentence.token)
+
+    # Enhanced++ depdencies often contain additional edges and are
+    # not trees -- here, 'parsed' would also have an edge to
+    # 'sentence'
+    assert len(tree.edge) == 12
+
+    # This edge goes from "wrote" to "Chirs"
+    edge = tree.edge[0]
+    assert edge.source == 2
+    assert edge.target == 1
+    assert edge.dep == "nsubj"
+
+def test_coref_chain(doc_pb):
+    """
+    Extract the corefence chains from the annotation.
+    """
+    # Coreference chains span sentences and are stored in the
+    # document.
+    chains = doc_pb.corefChain
+
+    # In this document there is 1 chain with Chris and he.
+    assert  len(chains) == 1
+    chain = chains[0]
+    assert isinstance(chain, CorefChain)
+    assert chain.mention[0].beginIndex == 0 # 'Chris'
+    assert chain.mention[0].endIndex == 1
+    assert chain.mention[0].gender == "MALE"
+
+    assert chain.mention[1].beginIndex == 6 # 'he'
+    assert chain.mention[1].endIndex == 7
+    assert chain.mention[1].gender == "MALE"
+
+    assert chain.representative == 0 # The head of the chain is 'Chris'