#!/bin/env python3 #-*- coding: utf8 -*- """ This file supplies functions to change the subjects and verbs of a tagged sentence from the third person to the first. """ import sqlite3 LEFFF = sqlite3.connect('./Lefff/Lefff.db') CURS = LEFFF.cursor() def split(string, num): """ A function that split a string in a list of n characters string Arguments: string {string} -- the text we want to cut num {integer} -- the length of the string at the output Returns: [list] -- Returns a list of string """ new_list = [] for i in range(0, len(string), num): new_list.append(string[i:i+num]) return new_list def write_file(sentence, file): """ Writes the sentence in the file, 2096 char by 2096 char to avoid too long lines in the file. """ for part in split(sentence, 2096): file.write(str(part)+'\n') def new_sub(idx, new_phrase): """ Returns a new subject according to the context. """ if (idx > 0 and new_phrase[idx-1][1].split()[-1] in ('de', 'un', 'une', 'des', 'le', 'la', 'les')): sub = 'moi-même' else: sub = 'Je' if idx == 0 else 'je' return sub def new_ppers(idx, partie, new_phrase): """ Returns a new personal pronoun according to the context. """ if (idx > 0 and partie[1] not in ('se',) and new_phrase[idx-1][1].split()[-1] in ('de', 'un', 'une', 'des', 'le', 'la', 'les')): ppers = 'moi-même' else: ppers = change_ppers(partie[1].lower()) if idx == 0: ppers = ppers.capitalize() return ppers def replace_sujet_verbe(phrases, file): """ A function that replace subject, verb and possessive group in text Arguments: phrases {list} -- sentences where replace elements file {file} -- the report file Returns: [list] -- Returns modified tagged sentence """ new_phrases = [] file.write("\n---- Replace sujet-verb -----\n\n") for phrase in phrases: new_phrase = [] for idx, partie in enumerate(phrase): if partie[0] == 'S': new_phrase.append(('S', new_sub(idx, new_phrase))) elif partie[0] == 'V': new_phrase.append(('V', find_new_form(partie[1])[0])) elif partie[0] == 'P:PERS': new_phrase.append(('P:PERS', new_ppers(idx, partie, new_phrase))) elif partie[0] == 'D:POS': new_dpos = change_dpos(partie[1].lower()) if idx == 0: new_dpos = new_dpos.capitalize() new_phrase.append(('D:POS', new_dpos)) elif partie[0] == 'P:POS': new_ppos = change_ppos(partie[1].lower()) if idx == 0: new_ppos = new_ppos.capitalize() new_phrase.append(('P:POS', new_ppos)) else: # 'A' new_phrase.append(partie) tricky_apostrophes(phrase, idx, new_phrase, partie) mod_sentence = ''.join('(\"'+str(part[0])+'\",\"'+str(part[1])+'\")' for part in new_phrase) write_file(mod_sentence, file) new_phrases.append(new_phrase) return new_phrases def find_new_form(form): """ A function that search in database the form of a verb Arguments: form {string} -- form that we need to modify Returns: [string] -- Returns new form """ CURS.execute('SELECT * FROM verbes WHERE form IS "{}";'.format(form)) #retrouver le même verbe au même temps mais à la 1ere personne #attention : possible d'avoir plusieurs résultats results = CURS.fetchall() if len(results) > 1: #si plusieurs résultats, on garde celui à la 3e personne #(et on suppose qu'il y en a toujours un...) results = [res for res in results if '3' in res[2]] if not results: #si pas de résultats, on renvoie immédiatement l'entrée return (form,) result = results[0] new_code = result[2] if '1' not in new_code: dgt = '' for char in new_code: #quand il y a plusieurs chiffres, ils se suivent if char.isdigit(): dgt += char new_code = new_code.replace(dgt, '1%') req = 'SELECT * FROM verbes WHERE lemma IS "{}" AND code LIKE "{}"'.format( result[1], new_code) CURS.execute(req) final_form = CURS.fetchone() while not final_form: new_code = new_code[0] + new_code[2:] req = 'SELECT * FROM verbes WHERE lemma IS "{}" AND code LIKE "{}"'.format( result[1], new_code) CURS.execute(req) final_form = CURS.fetchone() return final_form def change_ppers(ppers): """ A function that replace personal pronoun Arguments: ppers {string} -- ppers that we need to modify Returns: [string] -- Returns new ppers """ try: return {"il":"je", "il en":"j'en", "il s'":"je me", "il se":"je me", "elle":"je", "elle en":"j'en", "elle s'":"je me", "elle se":"je me", "ils":"nous", "ils en":"nous en", "ils s'":"nous nous", "ils se":"nous nous", "elles":"nous", "elles en":"nous en", "elles s'":"nous nous", "elles se":"nous nous", "lui":"me", "eux" :"nous", "s'il":"si je", "s'ils":"si nous"}[ppers] except KeyError: return ppers def change_dpos(dpos): """ A function that replace possessive determinant Arguments: dpos {string} -- dpos that we need to modify Returns: [string] -- Returns new dpos """ try: #pas de changements pour les 1ères et 2e personnes #singulier et pluriel return {"son":"mon", "sa":"ma", "ses":"mes", "leurs":"nos"}[dpos] except KeyError: return dpos def change_ppos(ppos): """ A function that replace possessive pronoun Arguments: ppos {string} -- ppos that we need to modify Returns: [string] -- Returns new ppos """ try: #masc sing return {"mien":"mien", "tien":"tien", "sien":"mien", "nôtre":"nôtre", "vôtre":"vôtre", #masc pl "miens":"miens", "tiens":"tiens", "siens":"miens", "nôtres":"nôtres", "vôtres":"vôtres", #fem sing "mienne":"mienne", "tienne":"tienne", "sienne":"mienne", #"nôtre":"nôtre", #duplicate #"vôtre":"vôtre", #duplicate #fem pl "miennes":"miennes", "tiennes":"tiennes", "siennes":"miennes", #"nôtres":"nôtres", #duplicate #"vôtres":"vôtres", #duplicate #Leur/leurs "leur":"nôtre", "leurs":"nôtres"}[ppos] except KeyError: return ppos def tricky_apostrophes(phrase, idx, new_phrase, partie): """ Adds and remove ' where it is needed. """ if (idx > 0 and (phrase[idx-1][0] == 'S' or phrase[idx-1][0] == 'P:PERS') and new_phrase[idx][1][0] in 'aáàâäeéèêëiíìîïoóòôöuúùûüyýỳŷÿ'): new_phrase[idx-1] = (phrase[idx-1][0], new_phrase[idx-1][1][:-1]+'\'') if (idx > 0 and (partie[0] == 'S' or partie[0] == 'P:PERS') and new_phrase[idx-1][1][-1] == '\''): new_phrase[idx-1] = (phrase[idx-1][0], new_phrase[idx-1][1][:-1]+'e') def __test(): """ Test function of modify_sentences.py """ #print('\n'.join([' '.join([phr[1] for phr in phrase]) for phrase in phrases])) #new = replace_sujet_verbe(phrases) #print('\n') #print('\n'.join([' '.join([phr[1] for phr in phrase]) for phrase in new]).replace('\' ', '\'')) if __name__ == '__main__': __test()