Code BioPathBot
Aller à la navigation
Aller à la recherche
import urllib.request
import requests
from bs4 import BeautifulSoup
import re
import math
import numpy as np
import datetime
import random
import copy
from geopy.geocoders import Nominatim
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from colorsys import hsv_to_rgb
from matplotlib.colors import rgb2hex
import pdb
import time
import itertools
from geopy.exc import GeocoderTimedOut
SEGMENTS = 100
# draw plots inline rather than in a seperate window
# %matplotlib inline
# draw plots bigger
plt.rcParams["figure.figsize"] = [20.0, 10.0]
bot_user='BioPathBot'
passw='chkiroju'
baseurl='http://wikipast.epfl.ch/wikipast/'
summary='Wikipastbot update'
protected_logins=["Frederickaplan","Maud","Vbuntinx","Testbot","IB","SourceBot","PageUpdaterBot","Orthobot","BioPathBot","ChronoBOT","Amonbaro","AntoineL","AntoniasBanderos","Arnau","Arnaudpannatier","Aureliver","Brunowicht","Burgerpop","Cedricviaccoz","Christophe","Claudioloureiro","Ghislain","Gregoire3245","Hirtg","Houssm","Icebaker","JenniCin","JiggyQ","JulienB","Kl","Kperrard","Leandro Kieliger","Marcus","Martin","MatteoGiorla","Mireille","Mj2905","Musluoglucem","Nacho","Nameless","Nawel","O'showa","PA","Qantik","QuentinB","Raphael.barman","Roblan11","Romain Fournier","Sbaaa","Snus","Sonia","Tboyer","Thierry","Titi","Vlaedr","Wanda"]
depuis_date='2017-02-02T16:00:00Z'
# Login request
payload={'action':'query','format':'json','utf8':'','meta':'tokens','type':'login'}
r1=requests.post(baseurl + 'api.php', data=payload)
#login confirm
login_token=r1.json()['query']['tokens']['logintoken']
payload={'action':'login','format':'json','utf8':'','lgname':bot_user,'lgpassword':passw,'lgtoken':login_token}
r2=requests.post(baseurl + 'api.php', data=payload, cookies=r1.cookies)
#get edit token2
params3='?format=json&action=query&meta=tokens&continue='
r3=requests.get(baseurl + 'api.php' + params3, cookies=r2.cookies)
edit_token=r3.json()['query']['tokens']['csrftoken']
edit_cookie=r2.cookies.copy()
edit_cookie.update(r3.cookies)
#setup geolocator
geolocator = Nominatim(timeout=30)
# upload config
def uploadMap(filename):
# read local file
upload_file = open(filename,"rb")
upload_contents = upload_file.read()
upload_file.close()
# setting parameters for upload
# ref: https://www.mediawiki.org/wiki/API:Upload
payload={'action':'upload','filename':filename, 'ignorewarnings':1, 'token':edit_token}
files={'file':upload_contents}
# upload the image
print("Uploading file to %s via API..." % (baseurl+"index.php/Fichier:"+filename))
r4=requests.post(baseurl+'api.php',data=payload,files=files,cookies=edit_cookie)
# in case of error print the response
# print(r4.text)
# add link to biopath in original page if not already existing
def addLinkToOriginalPage(name):
result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
soup=BeautifulSoup(result.text, "lxml")
#soup=BeautifulSoup(result.text)
code=''
for primitive in soup.findAll("text"):
code+=primitive.string
exist = re.findall("(\[\["+name+" BioPathBot\]\])",code)
if(len(exist)==0):
title = name
content = "\n\n"+"[["+name+" BioPathBot]]"
requests.post(baseurl+'api.php?action=query&titles='+title+'&export&exportnowrap')
payload={'action':'edit','assert':'user','format':'json','utf8':'','appendtext':content,'summary':summary,'title':title,'token':edit_token}
r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)
def addToPage(name, images, legend):
title = name + " BioPathBot"
content = "[["+name+"]]<br>"+'<div style="display:inline-block;">'+legend+'</div>'
for img in images:
content += "[[Fichier: "+ img +"|left]]"
pageToChange = requests.post(baseurl+'api.php?action=query&titles='+title+'&export&exportnowrap')
payload={'action':'edit','assert':'user','format':'json','utf8':'','text':content,'summary':summary,'title':title,'token':edit_token}
r4=requests.post(baseurl+'api.php',data=payload,cookies=edit_cookie)
print(r4.text)
# BioPathBot : add line of databiographie to the right page (time and space)
def getDataFromPage(name):
data = []
dates = []
places = []
print("Page Created: " + name)
result=requests.post(baseurl+'api.php?action=query&titles='+name+'&export&exportnowrap')
soup=BeautifulSoup(result.text, "lxml")
#soup=BeautifulSoup(result.text)
code=''
for primitive in soup.findAll("text"):
if primitive.string:
code+=primitive.string
# split on list (*)
lines = code.split("*")
for line in lines :
# add breaking lines (otherwise will be appened directly in one line)
line = "\n\n"+line
# get date if exist
date = re.findall("((?<=\[\[)\d*(\.*\d*\.*\d*)*(?=\]\]))",line)
dateToAdd = ""
if len(date) != 0 :
dateToAdd = date[0][0]
# get place if exist
place = re.findall("(?<=\/\s\[\[)[A-zÀ-ÿ\s\-]*(?=\]\])",line)
if(len(place)==0):
place = re.findall("(?<=\/\[\[)[A-zÀ-ÿ\s\-]*(?=\]\])",line)
placeToAdd = ""
if len(place) != 0:
placeToAdd = place[0]
if placeToAdd == "Rome":
placeToAdd = "Roma"
# if both the date and the location are available, append in data array
if dateToAdd and placeToAdd:
location = ""
for retries in range(5):
try:
location = geolocator.geocode(placeToAdd)
except GeocoderTimedOut:
continue
break
# geopy usage policy max 1 request/sec
# https://operations.osmfoundation.org/policies/nominatim/
time.sleep(2)
if location:
print("Location: " + placeToAdd + " : " + str(location.longitude) + "," + str(location.latitude))
dataToAdd = [location.longitude,location.latitude];
dates.append(dateToAdd)
places.append(placeToAdd)
data.append(dataToAdd)
# stop getting data if find [[Décès]]
foundDeces = re.findall("(\[\[Décès*\]\] (de |d)\[\["+name+")",line)
if(len(foundDeces) != 0):
break
return [data, dates, places]
# finds the minimal and maximal longitude and latitude
def findCorners(pts):
minlon = maxlon = pts[0][0]
minlat = maxlat = pts[0][1]
for p in pts:
currlon = p[0]
if currlon<minlon:
minlon = currlon
elif currlon>maxlon:
maxlon = currlon
currlat = p[1]
if currlat<minlat:
minlat = currlat
elif currlat>maxlat:
maxlat = currlat
return [minlon, maxlon, minlat, maxlat]
# draws the map, some points and the lines
def drawmap_colors(pts, dates, places, filename, export=False):
n_pts = len(pts)
corners = findCorners(pts)
txt = ""
m = Basemap(llcrnrlon=corners[0]-1, llcrnrlat=corners[2]-1, urcrnrlon=corners[1]+1, urcrnrlat=corners[3]+1, resolution='i')
m.drawmapboundary(fill_color='0.6')
m.drawcountries(linewidth=1.0, color='0.6')
m.fillcontinents(color='white', lake_color='white')
for i in range(n_pts-1): # draw lines
for j in range(SEGMENTS):
start = pts[i] + (pts[i+1]-pts[i])*(j/SEGMENTS)
end = pts[i] + (pts[i+1]-pts[i])*((j+1)/SEGMENTS)
m.plot([start[0], end[0]], [start[1], end[1]], color=hsv_to_rgb((i+j/SEGMENTS)/n_pts, 1, 1))
for i in range(n_pts): # draw points
curr_color = hsv_to_rgb(i/n_pts, 1, 1)
m.plot(pts[i][0], pts[i][1], marker='o', color=curr_color, fillstyle='full', markeredgewidth=0.0)
txt += "<br><span style='color:" + rgb2hex(curr_color) + "; font-weight:bold'>" + dates[i] + " / " + places[i] + ". </span>"
if export:
plt.savefig(filename, bbox_inches='tight')
plt.close()
# plt.show()
return txt
# inp: point inside the box
# hs: half dimensions of the box
# outp: another point
# finds the intersection of the segment inp-outp and the box
def line_box(inp, hs, outp):
dir = outp - inp
if dir[0] == 0: dir[0] = np.nextafter(0, 1)
if dir[1] == 0: dir[1] = np.nextafter(0, 1)
ref = np.array([np.copysign(hs[0],dir[0]), np.copysign(hs[1],dir[1])])
dir_x = np.array([(ref[1]/dir[1])*dir[0], ref[1]])
dir_y = np.array([ref[0], (ref[0]/dir[0])*dir[1]])
fdir = dir_x if np.linalg.norm(dir_x) < np.linalg.norm(dir_y) else dir_y
return inp+fdir
# computes the repulsion force if 2 boxes are overlapping
def repulsion_force(pos1, pos2, bbox1, bbox2):
hs1 = np.array([bbox1.width,bbox1.height])/2
hs2 = np.array([bbox2.width,bbox2.height])/2
c1 = pos1 + hs1
c2 = pos2 + hs2
# if both same position, choose a random direction
if all(c1==c2):
return (np.random.rand(2)-np.array([0.5,0.5]))*hs1[1]
b1 = line_box(c1, hs1*1.5, c2)
b2 = line_box(c2, hs2*1.5, c1)
# if pointing in the opposite direction
if np.dot(b1-c1,b2-b1) < 0:
return b2-b1
return np.zeros(2)
# readjust text labels so there is no overlap
def adjust_text(texts, text_width, text_height,num_iterations=20,eta=0.5):
text_pos = [np.array(text.get_position()) for text in texts]
indices = list(range(len(texts)))
colliding = [True for text in texts]
# get text bounding boxes
f = plt.gcf()
r = f.canvas.get_renderer()
ax = plt.gca()
bboxes = [text.get_window_extent(renderer=r).transformed(ax.transData.inverted()) for text in texts]
# center text pos on markers
for i in range(len(texts)):
text_pos[i][0] -= bboxes[i].width/2
text_pos[i][1] -= bboxes[i].height/2
# readjust text labels
for _ in range(num_iterations):
random.shuffle(indices)
for (i,j) in itertools.combinations(indices, 2):
if i == j:
continue
# pdb.set_trace()
f = repulsion_force(text_pos[i], text_pos[j], bboxes[i], bboxes[j])
text_pos[i] += f*eta
# delete text objects and create annotations on the readjusted positions
for i in range(len(texts)):
a = plt.annotate(texts[i].get_text(), xy=texts[i].get_position(), xytext=text_pos[i],
arrowprops=dict(arrowstyle="-", color='k', lw=0.5, alpha=0.6),bbox=dict(facecolor='b', alpha=0.2))
# plt.plot(text_pos[i][0], text_pos[i][1], marker='o',color='r', fillstyle='full', markeredgewidth=0.0,alpha=0.7)
# plt.plot(text_pos[i][0]+bboxes[i].width, text_pos[i][1], marker='o',color='r', fillstyle='full', markeredgewidth=0.0,alpha=0.7)
# plt.plot(text_pos[i][0]+bboxes[i].width, text_pos[i][1]+bboxes[i].height, marker='o',color='r', fillstyle='full', markeredgewidth=0.0,alpha=0.7)
# plt.plot(text_pos[i][0], text_pos[i][1]+bboxes[i].height, marker='o',color='r', fillstyle='full', markeredgewidth=0.0,alpha=0.7)
a.draggable();
texts[i].remove()
# draws the map, some points and the lines
def drawmap_date(pts, dates, places, filename, export=False):
n_pts = len(pts)
corners = findCorners(pts)
txt = ""
# ratio correction to 2:1
lon_width = min((corners[1]-corners[0]+10)*1.1, 360)
lat_width = min((corners[3]-corners[2]+10)*1.1, 180)
lon_center = (corners[1]+corners[0])/2
lat_center = (corners[3]+corners[2])/2
if lon_width > lat_width*2:
lat_width = lon_width/2
else:
lon_width = lat_width*2
corners[0] = lon_center-lon_width/2
corners[1] = lon_center+lon_width/2
corners[2] = lat_center-lat_width/2
corners[3] = lat_center+lat_width/2
if corners[0] < -180:
corners[1] -= corners[0]-(-180)
corners[0] = -180
elif corners[1] > 180:
corners[0] -= corners[1]-180
corners[1] = 180
if corners[2] < -90:
corners[3] -= corners[2]-(-90)
corners[2] = -90
elif corners[3] > 90:
corners[2] -= corners[3]-90
corners[3] = 90
# draw map background
m = Basemap(llcrnrlon=corners[0], llcrnrlat=corners[2], urcrnrlon=corners[1], urcrnrlat=corners[3], resolution='i')
m.drawmapboundary(fill_color='0.6')
m.drawcountries(linewidth=1.0, color='0.6')
m.fillcontinents(color='white', lake_color='white')
texts = []
'''
for i in range(n_pts-1): # draw lines
for j in range(SEGMENTS):
start = pts[i] + (pts[i+1]-pts[i])*(j/SEGMENTS)
end = pts[i] + (pts[i+1]-pts[i])*((j+1)/SEGMENTS)
m.plot([start[0], end[0]], [start[1], end[1]], color=hsv_to_rgb((i+j/SEGMENTS)/n_pts, 1, 1))
'''
for i in range(n_pts): # draw points
curr_color = hsv_to_rgb(i/n_pts, 1, 1)
x,y = m(pts[i][0], pts[i][1])
m.plot(x, y, marker='o', color=curr_color, fillstyle='full', markeredgewidth=0.0,alpha=0.7)
texts.append(plt.text(x, y, dates[i]))
txt += "<span style='color:" + rgb2hex(curr_color) + "; font-weight:bold'>" + dates[i] + " / " + places[i] + ". </span> <br>"
adjust_text(texts, 1, 0.4)
if export:
plt.savefig(filename, bbox_inches='tight')
plt.close()
return txt
def main(*names):
for name in names:
image_filename_colors = (name + "_colors_biopath.png").replace(" ","_")
image_filename_date = (name + "_date_biopath.png").replace(" ","_")
data = getDataFromPage(name)
if len(data[0]) != 0:
legend_colors = drawmap_colors(np.array(data[0]), data[1], data[2], image_filename_colors, True)
drawmap_date(np.array(data[0]), data[1], data[2], image_filename_date, True)
uploadMap(image_filename_date)
uploadMap(image_filename_colors)
addToPage(name, [image_filename_colors, image_filename_date], legend_colors)
addLinkToOriginalPage(name)
print("end")