Code ImageBot
Aller à la navigation
Aller à la recherche
import re import urllib.request as urllib2 import requests import os from bs4 import BeautifulSoup # -*- coding: utf-8 -*- user='MasterBot' passw='dhbot2019' baseurl='http://wikipast.epfl.ch/wikipast/' baseur2='https://images.search.yahoo.com/search/images;_ylt=AwrB8plXwf9YqRkAXSWJzbkF?p=' abcd=['Biographies','Naissance'] # Login request payload={'action':'query', 'format':'json', 'utf8':'', 'meta':'tokens', 'type':'login'} r1=requests.post(baseurl + 'api.php', data=payload) #login confirm login_token=r1.json()['query']['tokens']['logintoken'] payload={'action':'login', 'format':'json', 'utf8':'', 'lgname':user, 'lgpassword':passw, 'lgtoken':login_token} r2=requests.post(baseurl + 'api.php', data=payload, cookies=r1.cookies) params3='?format=json&action=query&meta=tokens&continue=' r3=requests.get(baseurl + 'api.php' + params3, cookies=r2.cookies) edit_token=r3.json()['query']['tokens']['csrftoken'] edit_cookie=r2.cookies.copy() edit_cookie.update(r3.cookies) #extract names of biographies def extract_names(): names=[] for abc in abcd: result=requests.post(baseurl+'api.php?action=query&titles='+abc+'&export&exportnowrap') soup=BeautifulSoup(result.text,'html.parser') resultt=str(soup) if (abc=='Biographies'): resulttt=resultt.split("{|") resultttt=resulttt[1].split("|}") test=resultttt[0].split("|") for xx in test: if (xx.find("]]")!=-1): xx=xx.split("[[")[1].split("]]")[0] a=xx.replace(' ','_') names.append(a) if (abc=='Naissance'): resulttt=resultt.split("*") test=resulttt[1:len(resulttt)] for xx in test: xxx=xx.split("/")[1] xxx=xxx.split("[[") if len(xxx)>1: xxx=xxx[1].split("]]")[0] xxx=xxx.replace(' ','_') if(xxx!='Naissance'): names.append(xxx) return names def main(*args): pages = list() if len(args) == 0: pages = extract_names() else: for name in args: clear = name.replace(' ', '_') pages.append(clear) #recherche images creative common & mise sur le serveur for name in pages: result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap') soup=BeautifulSoup(result.text, 'html.parser') code='' for primitive in soup.findAll("text"): code+=primitive.string if (code.find("Fichier")==-1): print('\n****************\n'+'search image for '+name+'\n****************\n') #attention au codage result=urllib2.urlopen(baseur2+name+'&imgsz=medium&imgty=photo&ei=UTF-8&fr=sfp&imgl=fsu&fr2=p%3As%2Cv%3Ai') content=result.read(); soup=BeautifulSoup(content,'html.parser') ssoup=soup.find("img") result=str(ssoup) url=re.search('data-src="(.+?)"',result) if (url): upload=requests.post( baseurl + 'api.php' , data={'action':'upload', 'filename':name+'.jpg', 'format':'json', 'token':edit_token, 'url':url.group(1) }, cookies=edit_cookie, ) print(upload.text) ###ajouter l'image dans la page content='' result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap') soup=BeautifulSoup(result.text,'html.parser') for primitive in soup.findAll("text"): content+=primitive.string content='[[Fichier:'+name+'.jpg|right|thumb|Premier résulat libre de réutilisation sur Yahoo Images]]\n'+content payload={'action':'edit', 'assert':'user', 'format':'json', 'utf8':'', 'text':content, 'title':name, 'token':edit_token} r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie) else: print(name+': Picture already online\n****************\n') if(code.find("Fichier")!=-1): result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap') soup=BeautifulSoup(result.text,'html.parser') content='' for primitive in soup.findAll("text"): content+=primitive.string xx=content.split("[[Fichier:") try : lol=xx[1].split("jpg|right]]")[0] xx[1]=xx[1].split("jpg|right]]")[1] except: lol=xx[1].split("jpg]]")[0] xx[1]=xx[1].split("jpg]]")[1] if(xx[0].find('\n')!=-1): content='[[Fichier:'+lol+'jpg|right|thumb|Premier résulat libre de réutilisation sur Yahoo Images]]\n'+xx[0]+'[[Fichier:'.join(xx[1:len(xx)]) print(name+': Picture put on top of page\n****************\n') payload={'action':'edit', 'assert':'user', 'format':'json', 'utf8':'', 'text':content, 'title':name, 'token':edit_token} r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)