import nltk from nltk.corpus import conll2000 grammar = r''' NP: {
????**} {
????**
????**+} # coordination {
???**+} # compounds {} # pronoun {} # that {} # it/there expletive ''' cp = nltk.RegexpParser(grammar) training_chunks = conll2000.chunked_sents('train.txt', chunk_types=['NP']) test_chunks = conll2000.chunked_sents('test.txt', chunk_types=['NP']) print('Evaluering på treningskorpuset:\n', cp.evaluate(training_chunks)) print('\nEvaluering på testkorpuset:\n', cp.evaluate(test_chunks))