from gini.matching import best_match_from_list,matches_from_list
import copy
[docs]class ConceptMatch(object):
def __init__(self,concept):
#: A back-reference to the original matching concept
self.concept = concept
#: The string this concept was matched to
self.string = None
#: Matching item
self.item = None
#: List of matching example
self.examples = None
#: This concept is currently being used in the negative sense (i.e., "not ____")
self.negative = False
#: The type of match this was found in (e.g., ``exact`` or ``fuzzy``)
self.type = None
#: The degree of match (when using fuzzy matching)
self.amount = None
def __eq__(self,other):
'''To make things easy, just follow the :class:`Concept`'''
return self.concept==other
def __repr__(self):
return 'ConceptMatch(%s)' % self.concept
[docs]def sort_matches(matches):
'''Sorts a ``list`` of matches best to worst'''
multipliers = {'exact':10**5,'fname':10**4,'fuzzy':10**2,'fuzzy_fragment':1}
matches = [(multipliers[x.type]*(x.amount if x.amount else 1),x) for x in matches]
return [x[1] for x in sorted(matches,reverse=True)]
[docs]class Concept(object):
def __init__(self,name,example_lists,parent=None,action=None,meta=None):
'''Create a :class:`Concept` named ``name`` using the given examples.
``example_lists`` can be given in multiple ways. As a ``list`` of strings, it's interpreted
as multiple examples of a single item. If it's a ``list`` of ``list``s, each sub-``list`` is treated
as an item. If a ``dict`` is given, the values of each entry are expected to be ``list``s of
examples for each item, named by the key'''
#: Arbitrary label for concept
self.name = name
#: Method to run if this is used as an action
self.action = action
#: Any information to attach to this object
self.meta = meta
#: List of example strings for each item
self.examples = {}
if isinstance(example_lists,list):
if len(example_lists)>0:
if isinstance(example_lists[0],basestring):
self.examples[name] = example_lists
elif isinstance(example_lists[0],list):
self.examples = {x[0]:x for x in example_lists}
else:
raise RuntimeError('Cannot understand example_lists given')
elif isinstance(example_lists,dict):
self.examples = example_lists
else:
raise RuntimeError('Cannot understand example_lists given')
def __repr__(self):
return 'Concept(%s)' % self.name
def __eq__(self,other):
'''Concepts are equal if their names are equal, or if it is a string with the same name'''
return (isinstance(other,self.__class__) and other.name==self.name) or (isinstance(other,basestring) and self.name==other)
[docs] def matches(self,string,fuzzy=90,fname_match=True,fuzzy_fragment=None,guess=False):
'''Returns whether this :class:`Concept` matches ``string``'''
matches = []
for item in self.examples:
m = best_match_from_list(string,self.examples[item],fuzzy,fname_match,fuzzy_fragment,guess)
if m:
match = ConceptMatch(self)
match.concept = self
match.string = string
match.item = item
match.examples = m[0]
match.type = m[2]
match.amount = m[3]
matches.append(match)
return sort_matches(matches)
[docs]class Bottle(object):
'''Container for semantic information (and magical creatures)'''
def __init__(self,vocab=[]):
#: list of :class:`Concepts` to match with
self.vocab = vocab
#: matching options
self.fname_match = True
self.fuzzy = 90
self.fuzzy_fragment = None
self.guess = False
#: Allow the use of "not" and a "-" before a term to negate the term
self.negative = True
[docs] def set_action(self,concept_name,action_meth):
'''helper function to set the ``action`` attr of any :class:`Concept`s in ``self.vocab`` that match ``concept_name`` to ``action_meth``'''
for concept in self.vocab:
if concept.name == concept_name:
concept.action = action_meth
[docs] def match_all_concepts(self,string):
'''Returns sorted list of all :class:`Concept`s matching ``string``'''
multipliers = {'exact':10**5,'fname':10**4,'fuzzy':10**2,'fuzzy_fragment':1}
matches = []
for concept in self.vocab:
matches += concept.matches(string,self.fuzzy,self.fname_match,self.fuzzy_fragment,self.guess)
return sort_matches(matches)
[docs] def match_concept(self,string):
'''Find all matches in this :class:`Bottle` for ``string`` and return the best match'''
matches = self.match_all_concepts(string)
if len(matches)>0:
return matches[0]
return None
[docs] def parse_string(self,string,best=False):
'''Parses ``string`` trying to match each word to a :class:`Concept`. If ``best``, will only return the top matches'''
if isinstance(string,list):
items = string
else:
items = string.split()
item_list = []
not_next = False
for item in items:
if self.negative:
if item=='not':
not_next = True
continue
if item[0]=='-':
not_next = True
item = item[1:]
concepts = self.match_all_concepts(item)
if len(concepts)>0:
if not_next:
for concept in concepts:
concept.negative = True
if best:
item_list.append(concepts[0])
else:
item_list.append(concepts)
else:
item_list.append(item)
not_next = False
return item_list
[docs] def process_string(self,string):
'''Searches the string (or list of strings) for an action word (a :class:`Concept` that has and ``action`` attached
to it), then calls the appropriate function with a dictionary of the identified words (according to ``vocab``).
For examples, see ``demo.py``
'''
item_list = self.parse_string(string)
for item in item_list:
if len(item)>0 and 'concept' in dir(item[0]) and 'action' in dir(item[0].concept) and item[0].concept.action:
item[0].concept.action(item_list)