Code ImageBot
Aller à la navigation
Aller à la recherche
import re
import urllib.request as urllib2
import requests
import os
from bs4 import BeautifulSoup
# -*- coding: utf-8 -*-
user='MasterBot'
passw='dhbot2019'
baseurl='http://wikipast.epfl.ch/wikipast/'
baseur2='https://images.search.yahoo.com/search/images;_ylt=AwrB8plXwf9YqRkAXSWJzbkF?p='
abcd=['Biographies','Naissance']
# Login request
payload={'action':'query',
'format':'json',
'utf8':'',
'meta':'tokens',
'type':'login'}
r1=requests.post(baseurl + 'api.php', data=payload)
#login confirm
login_token=r1.json()['query']['tokens']['logintoken']
payload={'action':'login',
'format':'json',
'utf8':'',
'lgname':user,
'lgpassword':passw,
'lgtoken':login_token}
r2=requests.post(baseurl + 'api.php', data=payload, cookies=r1.cookies)
params3='?format=json&action=query&meta=tokens&continue='
r3=requests.get(baseurl + 'api.php' + params3, cookies=r2.cookies)
edit_token=r3.json()['query']['tokens']['csrftoken']
edit_cookie=r2.cookies.copy()
edit_cookie.update(r3.cookies)
#extract names of biographies
def extract_names():
names=[]
for abc in abcd:
result=requests.post(baseurl+'api.php?action=query&titles='+abc+'&export&exportnowrap')
soup=BeautifulSoup(result.text,'html.parser')
resultt=str(soup)
if (abc=='Biographies'):
resulttt=resultt.split("{|")
resultttt=resulttt[1].split("|}")
test=resultttt[0].split("|")
for xx in test:
if (xx.find("]]")!=-1):
xx=xx.split("[[")[1].split("]]")[0]
a=xx.replace(' ','_')
names.append(a)
if (abc=='Naissance'):
resulttt=resultt.split("*")
test=resulttt[1:len(resulttt)]
for xx in test:
xxx=xx.split("/")[1]
xxx=xxx.split("[[")
if len(xxx)>1:
xxx=xxx[1].split("]]")[0]
xxx=xxx.replace(' ','_')
if(xxx!='Naissance'):
names.append(xxx)
return names
def main(*args):
pages = list()
if len(args) == 0:
pages = extract_names()
else:
for name in args:
clear = name.replace(' ', '_')
pages.append(clear)
#recherche images creative common & mise sur le serveur
for name in pages:
result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
soup=BeautifulSoup(result.text, 'html.parser')
code=''
for primitive in soup.findAll("text"):
code+=primitive.string
if (code.find("Fichier")==-1):
print('\n****************\n'+'search image for '+name+'\n****************\n')
#attention au codage
result=urllib2.urlopen(baseur2+name+'&imgsz=medium&imgty=photo&ei=UTF-8&fr=sfp&imgl=fsu&fr2=p%3As%2Cv%3Ai')
content=result.read();
soup=BeautifulSoup(content,'html.parser')
ssoup=soup.find("img")
result=str(ssoup)
url=re.search('data-src="(.+?)"',result)
if (url):
upload=requests.post(
baseurl + 'api.php' ,
data={'action':'upload',
'filename':name+'.jpg',
'format':'json',
'token':edit_token,
'url':url.group(1)
},
cookies=edit_cookie,
)
print(upload.text)
###ajouter l'image dans la page
content=''
result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
soup=BeautifulSoup(result.text,'html.parser')
for primitive in soup.findAll("text"):
content+=primitive.string
content='[[Fichier:'+name+'.jpg|right|thumb|Premier résulat libre de réutilisation sur Yahoo Images]]\n'+content
payload={'action':'edit',
'assert':'user',
'format':'json',
'utf8':'',
'text':content,
'title':name,
'token':edit_token}
r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)
else:
print(name+': Picture already online\n****************\n')
if(code.find("Fichier")!=-1):
result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
soup=BeautifulSoup(result.text,'html.parser')
content=''
for primitive in soup.findAll("text"):
content+=primitive.string
xx=content.split("[[Fichier:")
try :
lol=xx[1].split("jpg|right]]")[0]
xx[1]=xx[1].split("jpg|right]]")[1]
except:
lol=xx[1].split("jpg]]")[0]
xx[1]=xx[1].split("jpg]]")[1]
if(xx[0].find('\n')!=-1):
content='[[Fichier:'+lol+'jpg|right|thumb|Premier résulat libre de réutilisation sur Yahoo Images]]\n'+xx[0]+'[[Fichier:'.join(xx[1:len(xx)])
print(name+': Picture put on top of page\n****************\n')
payload={'action':'edit',
'assert':'user',
'format':'json',
'utf8':'',
'text':content,
'title':name,
'token':edit_token}
r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)