#-*- coding:utf-8 -*- import os,re,codecs finput=codecs.open('./dicovalence.txt', encoding='utf-8') input_lines = finput.readlines() finput.close() #input_lines = input_lines[:3000] def extraire_verbes_avec_etre (lentries): pronominals =[] verbs_with_etre =[] length = len(lentries) n=0 while n < length: line = lentries[n] if line == u'\n': n +=1 else: if line[:3] == 'VAL': i=1 finished = False while not finished: if n+i < length: #print n+i fline = lentries[n+i] #print "champ", fline if fline == u'\n': finished = True n +=i+1 else: l=fline.split() f=l[0][:-1] if f == u'AUX' and l[1] == u'être': finished = True n += i+1 verb = line[4:].split(':')[0][1:] if verb[:4] == u'être': n += i+1 elif verb[0:2] == u's\'': pronominals.append(verb[2:]) elif verb[0:3] == u'se ': pronominals.append(verb[3:]) else: verbs_with_etre.append(verb) else: i +=1 else: finished = True n +=1 else: n +=1 return (pronominals, verbs_with_etre) def extraire_verbes_passivables (lentries): verbes_passivables =[] length = len(lentries) n=0 while n < length: line = lentries[n] if line == u'\n': n +=1 else: if line[:3] == 'VAL': i=1 finished = False while not finished: if n+i < length: #print n+i fline = lentries[n+i] #print "champ", fline if fline == u'\n': finished = True n +=i+1 else: l=fline.split() f=l[0][:-1] if f == u'RP': finished = True n += i+1 verb = line[4:].split(':')[0][1:] if verb[:4] == u'être': n += i+1 else: verbes_passivables.append(verb) else: i +=1 else: finished = True n +=1 else: n +=1 return verbes_passivables def supprimer_doublons(input_list): output_list =[] item_prec = '' for item in input_list: if item <> item_prec: output_list.append(item) item_prec= item return output_list def afficher(l) : for e in l: print e def stocker(output, file_path): foutput=codecs.open(file_path, mode='w', encoding='utf-8') for entry in output: foutput.write(entry+"\n") foutput.close() (pronominals_brut, verbs_with_etre_brut) = extraire_verbes_avec_etre(input_lines) verbes_passivables_brut = extraire_verbes_passivables(input_lines) pronominals = supprimer_doublons(pronominals_brut) verbs_with_etre = supprimer_doublons(verbs_with_etre_brut) verbes_passivables = supprimer_doublons(verbes_passivables_brut) #afficher(extraire(input_lines, u'AUX', u'être')) stocker(pronominals, '../rewriting_rules/lexicons/pronominal_verbs.lp') stocker(verbs_with_etre, '../rewriting_rules/lexicons/verbs_with_etre.lp') stocker(verbes_passivables, '../rewriting_rules/lexicons/passivable_verbs.lp')