Code ImageBot

De Wikipast
Version datée du 14 mai 2019 à 12:27 par Assi (discussion | contributions)
(diff) ← Version précédente | Voir la version actuelle (diff) | Version suivante → (diff)
Aller à la navigation Aller à la recherche
import re
import urllib.request as urllib2
import requests
import os
from bs4 import BeautifulSoup
# -*- coding: utf-8 -*-

user='MasterBot'
passw='dhbot2019'
baseurl='http://wikipast.epfl.ch/wikipast/'
baseur2='https://images.search.yahoo.com/search/images;_ylt=AwrB8plXwf9YqRkAXSWJzbkF?p='
abcd=['Biographies','Naissance']

# Login request
payload={'action':'query',
         'format':'json',
         'utf8':'',
         'meta':'tokens',
         'type':'login'}
r1=requests.post(baseurl + 'api.php', data=payload)

#login confirm
login_token=r1.json()['query']['tokens']['logintoken']
payload={'action':'login',
         'format':'json',
         'utf8':'',
         'lgname':user,
         'lgpassword':passw,
         'lgtoken':login_token}
r2=requests.post(baseurl + 'api.php', data=payload, cookies=r1.cookies)

params3='?format=json&action=query&meta=tokens&continue='
r3=requests.get(baseurl + 'api.php' + params3, cookies=r2.cookies)
edit_token=r3.json()['query']['tokens']['csrftoken']
edit_cookie=r2.cookies.copy()
edit_cookie.update(r3.cookies)

#extract names of biographies

def extract_names():
    names=[]  
    for abc in abcd:
        result=requests.post(baseurl+'api.php?action=query&titles='+abc+'&export&exportnowrap')
        soup=BeautifulSoup(result.text,'html.parser')
        resultt=str(soup)
        if (abc=='Biographies'):
            resulttt=resultt.split("{|")
            resultttt=resulttt[1].split("|}")
            test=resultttt[0].split("|")
          
            for xx in test:
                if (xx.find("]]")!=-1):
                   
                    xx=xx.split("[[")[1].split("]]")[0]
                    a=xx.replace(' ','_')
                    names.append(a)
        if (abc=='Naissance'):
            resulttt=resultt.split("*")
        
            test=resulttt[1:len(resulttt)]
            for xx in test:
                xxx=xx.split("/")[1]
                xxx=xxx.split("[[")
                if len(xxx)>1:
                    xxx=xxx[1].split("]]")[0]
                    xxx=xxx.replace(' ','_')
                    if(xxx!='Naissance'):
                        names.append(xxx)
    return names


def main(*args):

    pages = list()
    if len(args) == 0:
        pages = extract_names()
    else:
        for name in args:
          clear = name.replace(' ', '_')
          pages.append(clear)
    #recherche images creative common & mise sur le serveur
    for name in pages:
        result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
        soup=BeautifulSoup(result.text, 'html.parser')
        code=''
        for primitive in soup.findAll("text"):
            code+=primitive.string
        if (code.find("Fichier")==-1):
            print('\n****************\n'+'search image for '+name+'\n****************\n')
            #attention au codage
            result=urllib2.urlopen(baseur2+name+'&imgsz=medium&imgty=photo&ei=UTF-8&fr=sfp&imgl=fsu&fr2=p%3As%2Cv%3Ai')
            content=result.read();
            soup=BeautifulSoup(content,'html.parser')
            ssoup=soup.find("img")
            result=str(ssoup)
            url=re.search('data-src="(.+?)"',result)
            if (url):
                upload=requests.post(
                    baseurl + 'api.php' ,
                    data={'action':'upload',
                          'filename':name+'.jpg',
                          'format':'json',
                          'token':edit_token,
                          'url':url.group(1)
                          },
                    cookies=edit_cookie,
                  )
                print(upload.text)
                    ###ajouter l'image dans la page
                content=''
                result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
                soup=BeautifulSoup(result.text,'html.parser')
                for primitive in soup.findAll("text"):
                    content+=primitive.string
                content='[[Fichier:'+name+'.jpg|right|thumb|Premier résulat libre de réutilisation sur Yahoo Images]]\n'+content
               
                payload={'action':'edit',
                         'assert':'user',
                         'format':'json',
                         'utf8':'',
                         'text':content,
                         'title':name,
                         'token':edit_token}
                r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)    
        else:
            print(name+': Picture already online\n****************\n')
            if(code.find("Fichier")!=-1):
                result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
                soup=BeautifulSoup(result.text,'html.parser')
                content=''
                for primitive in soup.findAll("text"):
                    content+=primitive.string
                    xx=content.split("[[Fichier:")
                    try :
                        lol=xx[1].split("jpg|right]]")[0]
                        xx[1]=xx[1].split("jpg|right]]")[1]
                    except:
                        lol=xx[1].split("jpg]]")[0]
                        xx[1]=xx[1].split("jpg]]")[1]
                        
                        
                    if(xx[0].find('\n')!=-1):
                        content='[[Fichier:'+lol+'jpg|right|thumb|Premier résulat libre de réutilisation sur Yahoo Images]]\n'+xx[0]+'[[Fichier:'.join(xx[1:len(xx)])
            
                        print(name+': Picture put on top of page\n****************\n')
                        payload={'action':'edit',
                                 'assert':'user',
                                 'format':'json',
                                 'utf8':'',
                                 'text':content,
                                 'title':name,
                                 'token':edit_token}
                        r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)