demo/NLTKTagParserDemo.py

   1 # This Python file uses the following encoding: utf-8
   2 '''
   3 Created on Jun 14, 2011
   4
   5 @author: mjacob
   6 '''
   7 from mjacob.nltk.grammar.TreeAdjoiningGrammar import TreeAdjoiningGrammar
   8 from mjacob.nltk.parse.tag.TagChartParser import TagChartParser
   9 from itertools import chain
  10 import yaml
  11
  12
  13 l4_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/L4-trinary/grammar.yaml')
  14 l4_parser = TagChartParser(l4_grammar)
  15
  16 print("Parsing non-context-free languages")
  17 print()
  18 print("EXAMPLE 1: L4 {aⁿbⁿcⁿdⁿ : n∈ℕ}")
  19 print("see https://secure.wikimedia.org/wikipedia/en/wiki/Context-sensitive_grammar")
  20 print("  %s" % (l4_grammar))
  21 for production in chain(sorted(l4_grammar.productions(is_auxiliary=False)), sorted(l4_grammar.productions(is_auxiliary=True))):
  22     print("   %s" % (production))
  23 print()
  24 for n in 3,4:
  25     string = "a"*n + "b"*n + "c"*n + "d"*n
  26     tokens = tuple(c for c in string)
  27     print("  parsing '%s': " % (string,))
  28     for parse in l4_parser.nbest_parse(tokens):
  29         print("    %s" % (parse.pprint(margin=100000)))
  30
  31 print()
  32 print()
  33
  34 xs_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/cross_serial_2/grammar.yaml')
  35 xs_parser = TagChartParser(xs_grammar)
  36
  37 print("EXAMPLE 2: Cross serial dependencies")
  38 print("see http://www.let.rug.nl/~vannoord/papers/acl94/node5.html")
  39 print("  %s" % (xs_grammar))
  40 for production in chain(sorted(xs_grammar.productions(is_auxiliary=False)), sorted(xs_grammar.productions(is_auxiliary=True))):
  41     print("   %s" % (production))
  42 print()
  43 examples = yaml.load(open('../tests/integration/tag_chart/cross_serial_2/tests.yaml').read())
  44 for string in examples:
  45     tokens = string.split(' ')
  46     print("  parsing '%s': " % (string,))
  47     for parse in xs_parser.nbest_parse(tokens):
  48         print("    %s" % (parse.pprint(margin=100000)))
  49
  50 print()
  51 print()
  52
  53 elephant_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/elephant/grammar.yaml')
  54 elephant_parser = TagChartParser(elephant_grammar)
  55 print("Example 3: An ambiguous sentence")
  56 print("  %s" % (elephant_grammar))
  57 for production in chain(sorted(elephant_grammar.productions(is_auxiliary=False)), sorted(elephant_grammar.productions(is_auxiliary=True))):
  58     print("   %s" % (production))
  59 print()
  60
  61 examples = yaml.load(open('../tests/integration/tag_chart/elephant/tests.yaml').read())
  62 for string in examples:
  63     tokens = string.split(' ')
  64     print("  parsing '%s': " % (string,))
  65     for parse in elephant_parser.nbest_parse(tokens):
  66         print("    %s" % (parse.pprint(margin=100000)))
  67
  68 print()
  69 print()
  70
  71 print("Performance testing results")
  72 with open('../tests/performance/PERFORMANCE_RESULTS.txt') as file:
  73     print(file.read())