User:DschwenBot/source
Jump to navigation
Jump to search
#!/usr/bin/python
import sys, os
print os.environ['HOME']
sys.path.append(os.environ['HOME'] + '/dschwen_bot/pywikipedia')
import wikipedia
import MySQLdb
import pyexiv2
import re
import math
import string
import unicodedata
import htmlentitydefs
import marshal
import urllib
from urllib import FancyURLopener
from PHPUnserialize import *
from datetime import timedelta
from datetime import datetime
# look at images of the last two days
dt = timedelta(3)
cut = datetime.now() - dt
class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
myopener = MyOpener()
urllib.urlopen = MyOpener().open
urllib.urlretrieve = MyOpener().retrieve
def unescape_charref(ref) :
name = ref[2:-1]
base = 10
if name.startswith("x") :
name = name[1:]
base = 16
return unichr(int(name, base))
def replace_entities(match) :
ent = match.group()
if ent[1] == "#":
return unescape_charref(ent)
repl = htmlentitydefs.name2codepoint.get(ent[1:-1])
if repl is not None :
repl = unichr(repl)
else :
repl = ent
return repl
def unescape(data) :
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
gpstrackusers = set( [ 'Ikiwaner' ] );
loc1RE = re.compile( '\{\{[Ll]ocation[ _]dec\|([^\|]+)\|([^\|]+)[\|\}]' )
loc2RE = re.compile( '\{\{[Ll]ocation[ _]dec\|([^\}\{]+)\}\}' )
loc3RE = re.compile( '\{\{[Ll]ocation\|([^\}\{]+)\}\}' )
nolocRE = re.compile( '\[\[[Cc]ategory:[Ll]ocation[ _]not[ _]applicable\]\]' )
gpsRE = re.compile( '\{\{(Template:|template:|10:|)[Gg]PS[_ ]EXIF\}\}(\n|)' )
#gpsRE = re.compile( '\{\{Template:[Gg]PS[ _]EXIF\}\}' )
latrefRE = re.compile( '^[NnSs]$' )
lonrefRE = re.compile( '^[EeOoWw]$' )
site = wikipedia.getSite()
try:
f = open( "badlist.gps", "rb" )
badlist = marshal.load( f )
f.close()
except:
badlist = {}
try:
f = open( "taglist.gps", "rb" )
taglist = marshal.load( f )
f.close()
except:
taglist = {}
try:
connection = MySQLdb.connect(host="commonswiki-p.db.ts.wikimedia.org", user="xxx", passwd="xxx", db="commonswiki_p" )
#connection = MySQLdb.connect(host="sql-s1.toolserver.org", user="xxx", passwd="xxx", db="commonswiki_p" )
cursor = connection.cursor()
cursor.execute( "create temporary table u_dschwen.dump (tl_from int, score int)" )
print "Looking for GPS EXIF data (images > %s)" % cut.strftime( "%Y%m%d%H%M%S" )
#cursor.execute( "insert into u_dschwen.dump SELECT page_id, 0 from image, page where img_name = page_title and img_name = 'Freaky_Age_4_Luc_Viatour.jpg'" )
cursor.execute( "insert into u_dschwen.dump SELECT page_id, 0 from image, page where img_timestamp > '%s' and img_name = page_title and page_namespace = 6 and ( img_metadata like '%%\"GPSAltitude\"%%' or img_metadata like '%%\"GPSLatitudeRef\"%%' )" % cut.strftime( "%Y%m%d%H%M%S" ) )
print "Looking for {{Location}}";
cursor.execute( "insert into u_dschwen.dump select tl_from, 1 from templatelinks where tl_namespace = 10 and tl_title = 'Location'" )
print "Looking for {{Location dec}}";
cursor.execute( "insert into u_dschwen.dump select tl_from, 1 from templatelinks where tl_namespace = 10 and tl_title = 'Location_dec'" )
print "Looking for {{GPS EXIF}}";
cursor.execute( "insert into u_dschwen.dump select tl_from, 0 from templatelinks where tl_namespace = 10 and tl_title = 'GPS_EXIF'" )
print "subtracting..."
cursor.execute( "select page_title, tl_from, SUM( score ) as s from u_dschwen.dump, page where page_id = tl_from group by tl_from having s = 0" )
print "fetching results..."
data = cursor.fetchall()
fields = cursor.description
cursor.close()
connection.close()
for row in range(len(data)):
name = data[row][0]
taglist[ name ] = True
file = open( "taglist.gps", "wb" )
marshal.dump( taglist, file )
file.close()
except MySQLdb.OperationalError, message:
errorMessage = "Error %d:\n%s" % (message[ 0 ], message[ 1 ] )
#
# get potential images from taglist
#
for name in taglist.keys() :
if taglist[ name ] and not ( name in badlist ):
#exif = PHPUnserialize().unserialize(data[row][1])
print name
decomposed_string = unicodedata.normalize( 'NFD', name.decode('utf-8') )
#page = wikipedia.Page(site, 'Image:' + decomposed_string.encode('utf-8') )
page = wikipedia.Page(site, 'Image:' + name.decode('utf-8') )
text = ""
if page.exists() :
text = page.get()
# remove {{GPS EXIF}}
oldtext = text
text = gpsRE.sub( '', text )
# Location not applicable
if nolocRE.search( text ) != None :
taglist[ name ] = False;
print "Location not applicable"
continue
# already contains a Location
if string.find(text, '{{Location' ) >= 0 :
# already contains a generated Location
if string.find(text, 'source:exif' ) >= 0 :
print "HMM, %s looks already processed" % name;
if oldtext != text :
wikipedia.setAction("removed gps exif request template")
page.put(text)
print "removed a superfluous gps exif request template"
taglist[ name ] = False;
continue
# extract location to compare to exif
lat_in_dec = 0
lon_in_dec = 0
#{{Location dec|47.5059|-122.0343|type:forest_region:US}}
for match in loc1RE.findall(text) :
lat_in_dec = float( match[0] )
lon_in_dec = float( match[1] )
# already contains a suggestion
if string.find(text, '<!-- EXIF_BOT' ) >= 0 :
print "HMM, looks already processed";
if oldtext != text :
wikipedia.setAction("removed gps exif request template")
page.put(text)
print "removed a superfluous gps exif request template"
taglist[ name ] = False;
continue
print "downloading http://commons.wikimedia.org/wiki/Special:Filepath/%s ..." % name
try:
urllib.urlretrieve( ( "http://commons.wikimedia.org/wiki/Special:Filepath/%s" % urllib.quote(name) ), ".temp.jpg" )
except Exception, e:
print "Exception while downloading:", e
continue
print "analyzing GPS EXIF data ...";
try :
image = pyexiv2.Image( ".temp.jpg" )
image.readMetadata()
#print image.exifKeys()
#print image['Exif.GPSInfo.GPSAltitudeRef']
temp = '{{Location'
lat_dec = 0.0;
for i in range(0, 3):
val = float(image['Exif.GPSInfo.GPSLatitude'][i].numerator) / float(image['Exif.GPSInfo.GPSLatitude'][i].denominator)
temp += ( "|%f" % ( val ) ).rstrip('0').rstrip('.')
lat_dec = lat_dec * 60.0 + val
ref = image['Exif.GPSInfo.GPSLatitudeRef']
if ref == 'S' :
lat_dec = -lat_dec
elif ref != 'N' :
print "Broken lattitude ref!"
temp = "<!-- GPS: Broken lattitude ref! --><br>" + temp
ref = 'N'
temp += '|' + ref
lon_dec = 0.0;
for i in range(0, 3):
val = float(image['Exif.GPSInfo.GPSLongitude'][i].numerator) / float(image['Exif.GPSInfo.GPSLongitude'][i].denominator)
temp += ( "|%f" % ( val ) ).rstrip('0').rstrip('.')
lon_dec = lon_dec * 60.0 + val
ref = image['Exif.GPSInfo.GPSLongitudeRef']
if ref == 'W' :
lon_dec = -lon_dec
elif ref != 'E' :
print "Broken longitude ref!"
temp = "<!-- GPS: Broken longitude ref! --><br>" + temp
ref = 'E'
temp += '|' + ref
except :
print "Broken Tag"
taglist[ name ] = False;
badlist[ name ] = True;
continue
#
# Jump through several hoops to try to determine a heading and make [User:Ikiwaner] happy
#
heading = '?'
try:
val = float(image['Exif.GPSInfo.GPSImgDirection'].numerator) / float(image['Exif.GPSInfo.GPSImgDirection'].denominator)
heading = ( "%f" % ( val ) ).rstrip('0').rstrip('.')
print "Heading found :-)"
except:
print "No heading found :-("
try:
val = float(image['Exif.GPSInfo.GPSTrack'].numerator) / float(image['Exif.GPSInfo.GPSTrack'].denominator)
try:
connection = MySQLdb.connect(host="commonswiki-p.db.ts.wikimedia.org", user="dschwen", passwd="6iemq2c!--TfJFAI", db="commonswiki_p" )
cursor = connection.cursor()
print "lets see if the user is known..."
cursor.execute( "select img_user_text from image where img_name = '%s'" % name )
data = cursor.fetchall()
fields = cursor.description
cursor.close()
connection.close()
uploader = data[0][0]
print "Uploaded by: %s" % uploader
if uploader in gpstrackusers :
heading = ( "%f" % ( val ) ).rstrip('0').rstrip('.')
except:
print "Database error"
except:
print "No dir of movement found either :-("
if heading == 0 :
heading = '?'
#
# deal with missing altitude data
#
try:
alt = float(image['Exif.GPSInfo.GPSAltitude'].numerator) / float(image['Exif.GPSInfo.GPSAltitude'].denominator)
try:
if int( image['Exif.GPSInfo.GPSAltitudeRef'] ) == 1 :
alt = -alt
except:
print "no AltitudeRef, assuming above sea level!"
temp += ( "|alt:%f" % alt ).rstrip('0').rstrip('.') + "_"
except:
print "No altitude data"
temp += "|"
temp += ( 'source:exif_heading:%s}}' % ( heading ) )
print temp
if lon_dec == 0.0 and lat_dec == 0.0 :
print "apparently INVALID GPS data!"
taglist[ name ] = False;
badlist[ name ] = True;
continue
lat_dec /= 3600.0
lon_dec /= 3600.0
#print "Old: %f,%f" % ( lat_in_dec, lon_in_dec )
#print "New: %f,%f" % ( lat_dec, lon_dec )
if string.find(text, '{{Location' ) < 0 :
print "YAY! tagging..."
wikipedia.setAction("creating {{Location}} from EXIF data, please visit [[Commons:Geocoding]] for further information")
infopos = string.find(text, '{{Information' )
if infopos < 0 :
text2 = temp + "\n" + text
else :
last = ''
infopos += 2
curl = 1
squr = 0
print infopos
while infopos < len(text) :
c = text[infopos]
if c == '[' and last == '[' :
squr += 1
#print "[ %d" % squr
last = ''
if c == ']' and last == ']' :
squr -= 1
#print "] %d" % squr
last = ''
if c == '{' and last == '{' :
curl += 1
#print "{ %d" % curl
last = ''
if c == '}' and last == '}' :
curl -= 1
#print "} %d" % curl
last = ''
last = c
infopos += 1
if curl == 0 and squr <= 0 :
break
text2 = text[:infopos] + "\n" + temp + text[infopos:]
page.put(text2)
taglist[ name ] = False;
else :
if string.find(text, '<!-- EXIF_BOT' ) < 0 and string.find(text, 'source:exif' ) < 0 :
if math.fabs( lat_in_dec - lat_dec ) < 0.0001 and math.fabs( lon_in_dec - lon_dec ) < 0.0001 :
print "OK, existing geocoding seems reasonably accurate"
taglist[ name ] = False;
continue
print "OK, just inserting hidden suggestion"
wikipedia.setAction("adding suggested {{Location}} from EXIF data")
temp = ( "%f|%f check EWNS!\n" % ( lat_dec, lon_dec ) ) + temp;
text = '<!-- EXIF_BOT suggests: ' + temp + " -->\n" + text.replace( '{{GPS EXIF}}', '' )
page.put(text)
taglist[ name ] = False;
else :
print "HMM, looks already processed";
if oldtext != text :
wikipedia.setAction("removed gps exif request template")
page.put(text)
print "removed a superfluous gps exif request template"
taglist[ name ] = False;
file = open( "badlist.gps", "wb" )
marshal.dump( badlist, file )
file.close()
file = open( "taglist.gps", "wb" )
marshal.dump( taglist, file )
file.close()
file = open( "badlist.gps", "wb" )
marshal.dump( badlist, file )
file.close()
file = open( "taglist.gps", "wb" )
marshal.dump( taglist, file )
file.close()