Code Triplificator
Aller à la navigation
Aller à la recherche
# -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup # Création de la base de données Wikipast = ["Naissance", "Décès", "Retraite", "Élection", "Obsèques", "Participation", "Mort", "Démission", "Nomination", "Diplôme", "Organisation", "Naturalisation", "Invention", "Rôle", "Meurtre", "Inauguration", "Rencontre", "Mariage"] VocabORG = ["birth", "death", "retirement", "investiture", "funeral", "participant", "death", "resignation", "investiture", "graduation", "organization", "naturalization", "creator", "role", "murder", "inauguration", "has met", "marriage"] LibORG = ["bio", "bio", "bio", "bio", "bio", "bio", "bio","bio","bio","bio","bio","bio", "frbr/core", "participation","bio","bio","relationship", "bio"] Types = [["bio:principal"], ["bio:principal"], ["bio:principal"], ["bio:principal", "bio:position"], ["bio:principal"], ["bio:principal"], ["bio:principal"], [], ["bio:principal", "bio:position", "bio:organization"], ["bio:principal", "bio:organization"], ["bio:principal", "bio:organization"], [], ["bio:event", "bio:principal"], ["bio:principal", "bio:nationality"], ["bio:work", "bio:principal"], ["bio:principal", "frbr/core:work"], ["bio:principal", "bio:participant"], ["bio:organization"], ["bio:principal"], [], ["bio:principal"], ["bio:partner"], ["bio:principal", "bio:partner"]] # Construction d'un dictionnaire contenant les données BDD = {} for terme in Wikipast: indice = Wikipast.index(terme) BDD[terme] = (VocabORG[indice], LibORG[indice], Types[indice]) # Extraction des noms noms = [] baseurl = 'http://wikipast.epfl.ch/wikipast/' resultat = requests.post(baseurl+'api.php?action=query&titles='+'Biographies'+'&export&exportnowrap') soup = BeautifulSoup(resultat.text, "lxml") code_noms='' for primitive in soup.findAll("text"): code_noms += primitive.string for phrase in code_noms.split("| [["): nom = phrase.split("]]") nom = nom[0].strip() nom = nom.replace(' ','_') noms.append(nom) noms = noms [1:] # Traitement de chaque datafication biographique par le bot def work(pages): for nom in pages: sortie = "" code = '' result = requests.post(baseurl + 'api.php?action=query&titles=' + nom + '&export&exportnowrap') soup = BeautifulSoup(result.text, "lxml") for primitive in soup.findAll("text"): code += primitive.string entrees = code.split("*") entrees_avec_resultat = 0 for entree in entrees: lieu = [] date = [] #Trouve la date date_lieu_action = entree.split("/") date = date_lieu_action[0] date = date.replace("[[", "") date = date.replace("]]", "") date = ''.join(date.split()) # supprimer tout les whitspaces (tab, retours à la ligne, espace...) if not(date) or not date[0].isdigit(): #Si le premier élément n'est pas un nombre, ce n'est probablement pas une date continue #Trouve le lieu if len(date_lieu_action) > 1: #s'il y a plus qu'une date lieu_action = date_lieu_action[1] lieu_action = lieu_action.replace("[https:","") lieu_action = lieu_action.replace("[http:", "") points = lieu_action.count('.') if lieu_action.endswith('.'): if points > 1: lieu_action = lieu_action.split('.') lieu = lieu_action[0] lieu = lieu.replace("[[", "") lieu = lieu.replace("]]", "") lieu = ''.join(lieu.split()) #supprimer tout les whitspaces (tab, retours à la ligne, espace...) else: lieu = '-' lieu_action = [lieu] + [lieu_action] else: if points !=0: lieu_action = lieu_action.split('.') lieu = lieu_action[0] lieu = lieu.replace("[[", "") lieu = lieu.replace("]]", "") lieu = ''.join(lieu.split()) # supprimer tout les whitspaces (tab, retours à la ligne, espace...) else: lieu = '-' lieu_action = [lieu] + [lieu_action] if len(lieu_action) < 2: continue split_phase_1 = lieu_action[1].split("[[") actions = [] for mot in split_phase_1: action = mot.split("]]") actions.append(action[0].strip()) if "" in actions: actions.remove("") if len(actions) != 0: for hypermot in BDD: if hypermot == actions[0]: entrees_avec_resultat += 1 sortie = sortie + "_:e a " + BDD[hypermot][1] + ":" + BDD[hypermot][0] + "\n ; dc:date '" + date + "'\n ; bio:Place '" + lieu + "'\n" for type in BDD[hypermot][2]: indice = BDD[hypermot][2].index(type) + 1 try: actions[indice] except: continue sortie = sortie + " ; " + type + " '" + actions[indice] + "' \n" sortie += "\n" # Création de la page RDF correspondante if entrees_avec_resultat > 1: nouveau_nom = nom.replace('_',' ') + " RDF" user='MasterBot' passw='dhbot2019' summary='Triplificator update' payload={'action':'query','format':'json','utf8':'','meta':'tokens','type':'login'} r1=requests.post(baseurl + 'api.php', data=payload) login_token=r1.json()['query']['tokens']['logintoken'] payload={'action':'login','format':'json','utf8':'','lgname':user,'lgpassword':passw,'lgtoken':login_token} r2=requests.post(baseurl + 'api.php', data=payload, cookies=r1.cookies) params3='?format=json&action=query&meta=tokens&continue=' r3=requests.get(baseurl + 'api.php' + params3, cookies=r2.cookies) edit_token=r3.json()['query']['tokens']['csrftoken'] edit_cookie=r2.cookies.copy() edit_cookie.update(r3.cookies) content= '' content+='<nowiki>\n' content+= sortie content+='\n' content+='Page originale: '+nom.replace('_',' ')+'\n' payload={'action':'edit','assert':'user','format':'json','utf8':,'text':content,'summary':summary,'title':nouveau_nom,'token':edit_token} to_replace = 'Version RDF : '+nouveau_nom+'' code = code.replace(to_replace,' ') payload2 = {'action': 'edit', 'assert': 'user', 'format': 'json', 'utf8': , 'text': code + '\n' + 'Version RDF : '+nouveau_nom+'', 'summary': summary, 'title': nom, 'token': edit_token} r4 = requests.post(baseurl + 'api.php', data=payload, cookies=edit_cookie) r4=requests.post(baseurl+'api.php',data=payload2,cookies=edit_cookie) def main(*args): if len(args) == 0: work(noms) else: work(args)