User:JarektBot/Commons creator maintenance.py
Jump to navigation
Jump to search
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Script for maintenance and automatic improvements of Creator templates and their home categories on Wikimedia Commons.
Specific tasks:
1) Creator template home-categories:
* if multiple [[category:people by name]] then delete some
* if creator template is missing than add it
* Add birth and death categories if needed
* Add DEFAULTSORT if needed
2) Creator templates:
* Add {{LangSwitch}} to "Name" field based on interwiki links if present in the home-category
* If current Name" field has content that does not fit several predefined patterns than add
{{LangSwitch}} to "Name1" field so it can be merged with "Name" field by hand
* Add "Linkback" field if missing, fix if needed
* Add "Option" field if missing, fix if needed
* make sure all {{Authority Control}} templates use "|bare=1" option
* move {{Authority Control}} templates from Category to Creator
* In case other edits are done perform bunch of cosmetic edits.
Other possible tasks:
* copy interwiki links from gallery to the category is needed (separate bot for all pages?)
* update existing interwiki links inside "Name" {{LangSwitch}} block
* copy nationality and occupation from Category to Creator if missing
* copy nationality and occupation from Creator to Category if missing
* copy {{Authority Control}} templates from German or English Wikipedia to Creator (done as separate bot)
* copy data (dates, places, etc.) from English or German Wikipedia.
* tag problematic pages: autocategorizing, with <noinclude> blocks, with links in "Description", etc.
Other notes:
* Run the bot after recent run of interwiki.py to ensure that all the links are up to date
"""
#
# (C) Jarekt, 2011
#
# Distributed under the terms of the MIT license.
#
import sys, os.path, glob, string, urllib2, json
import wikipedia as pywikibot
import pywikibot.textlib as textlib
import config, catlib, time, re
# ===============================================================
def Interwiki2LangSwitch(page, Gallery_name, nationality):
# Create Name Lang Switch block based on interwiki links and gallery page name
# Inputs:
# * page - Commons page with interwiki links (usually a category page)
# * Gallery_name - name which could be likely for a name of a gallery associated with the interwiki links
# * nationality - 2 character nationality code
Commons_site = pywikibot.getSite(u'commons', u'commons')
WikiList = [nationality,"de","fr","nl","it","es","pl","pt"] # Wikipedias in the order of size (will be used to choose default article among the interwiki links
LangSwitch = u'' # Text string used to store Name LangSwitch block
nLinks = 0 # Number of interwiki links found
needDefault = True # is a default option present?
defaultStrIdx = 10;
defaultName = re.sub('\([^\)]*\)','',Gallery_name); # delete disambiguation sections, if any
defaultStr = defaultName
# Get and sort interwiki links
interwiki = {}
for pl in page.interwiki():
interwiki[pl.site()] = pl
if len(interwiki)==0:
return u'', 0
langIdx = textlib.interwikiSort(interwiki.keys(), insite = Commons_site)
# Go through the interwiki link list and create new text
for site in langIdx:
lang = site.lang # country code
pl = interwiki[site]
iw1 = pl.title()
iw2 = re.sub('\s*\([^\)]*\)','',pl.sectionFreeTitle()); # delete disambiguation sections
if (lang=='en'):
needDefault = False;
if (lang=='ru'):
m=re.search("([^\s\,]*)\, (.*)", iw2) # flip order of names in Russian if writen as LASTNAME, FIRST MIDDLE
if m!=None:
iw2 = m.group(2)+' '+m.group(1)
if lang in WikiList:
idx = WikiList.index(lang)
if idx<defaultStrIdx:
defaultStrIdx=idx
defaultStr = '[[:'+ lang + ':' + iw1 + u'|' + defaultName + u']]'
LangSwitch = LangSwitch + u' | ' + lang + u' = [[:'+ lang + ':' + iw1 + u'|' + iw2 + u']]\n'
LangSwitch1 = u'[[:'+ lang + ':' + iw1 + u'|' + Gallery_name + u']]'
nLinks += 1
if nLinks==0:
return u'', nLinks
# Add gallery page if one exists
Gallery_page = pywikibot.Page(Commons_site, Gallery_name)
if Gallery_page.exists() and not Gallery_page.isRedirectPage():
pywikibot.output('[['+Gallery_name+']] exists')
LangSwitch = LangSwitch + u' | default = [['+ Gallery_name + u']]\n'
needDefault = False;
nLinks += 1
if nLinks==1:
return LangSwitch1, nLinks
# Add default value if EN version and gallery do not exist
if needDefault==True:
LangSwitch = LangSwitch + u' | default = '+ defaultStr + u'\n'
# Add begining and the end brackets
LangSwitch = u'{{LangSwitch\n' + LangSwitch + u' }}'
return LangSwitch, nLinks
def Wikiproject2LangSwitch(links, name, letter, nationality):
default = u''
wiki_link = u''
nLinks = 0 # number of links
hasEN = False # has english version?
lang_link = {}
for subkey, subvalue in links.iteritems():
if name in subkey:
lang = subkey.replace(name,'') # language of the wikisource
wiki_link = u'%s:%s:%s\n' % (letter, lang, subvalue['title']) # wikisource link
lang_link[lang] = wiki_link
if lang=='en':
hasEN = True
if lang==nationality: # if wikisource language matches author's nationality ...
default = wiki_link # than use that link as default
nLinks = nLinks + 1 # count wikisource links
if nLinks<2:
return wiki_link
#langIdx = textlib.interwikiSort(lang_link.keys(), insite = Commons_site)
LangSwitch = u'' # build wikisource string
for lang in sorted(lang_link):
LangSwitch = LangSwitch + u' |%s=%s\n' % (lang, lang_link[lang]) # build LangSwitch block
if len(default)>0:
LangSwitch = u'{{LangSwitch\n%s |default=%s\n }}' % (LangSwitch, default) # create LangSwitch template with default
else:
if hasEN:
LangSwitch = u'{{LangSwitch\n' + LangSwitch + u' }}' # otherwise create LangSwitch template with EN version as default
else:
LangSwitch = u'{{LangSwitch\n%s |default=%s\n }}' % (LangSwitch, wiki_link) # otherwise create LangSwitch template with anything as default
print LangSwitch
return LangSwitch
#================================================
class CreatorMaintenanceBot:
# === constructor ======================================================================
def __init__(self, always=False):
#initialize the object
self.always = False # always save without asking: should be false
self.dry_run = False # run the code without saving?
self.iEditCount = 0 # initialize edit counter
self.maxEditCount = -200 # stop after number of edits (used only if >0)
self.init() # initialize the rest of attributes
# === initialize ======================================================================
def init(self):
# list of atributes that need to be initialized for each page
# All those values will be populated based on parsing of current Creator template
self.homecat = u""
self.option = u""
self.sortkey = u""
self.deathdate = u""
self.birthdate = u""
self.deathyear = u""
self.birthyear = u""
self.deathyear1 = u""
self.birthyear1 = u""
self.authority = u""
self.name = u""
self.name1 = u""
self.nationality = u""
self.type = u""
self.linkback = u""
self.wikisource = u""
self.wikiquote = u""
self.wikidata = u""
self.AC_template = u""
self.interwiki = u""
# === cosmetic_changes ======================================================================
def creator_cosmetic_changes(self, creator_txt):
# minor clean up changes to be performed only if page is going to be edited anyway
creator_txt = creator_txt.replace('|PND=','|GND=') # authority control template parameter
creator_txt = creator_txt.replace('\n\n','\n') # delete empty lines
creator_txt = creator_txt.replace('maleale','male') # clean up after some old bot runs
creator_txt = creator_txt.replace('femaleemale','female')
# spacing correction
creator_txt = re.sub(ur'\n\s*\|\s*Name1\s*= *' ,ur'\n | Name1 = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Name\s*= *' ,ur'\n | Name = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Alternative names\s*= *',ur'\n | Alternative names = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *' ,ur'\n | Nationality = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Gender\s*= *' ,ur'\n | Gender = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Occupation\s*= *' ,ur'\n | Occupation = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Description\s*= *' ,ur'\n | Description = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Birthdate\s*= *' ,ur'\n | Birthdate = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Birthyear\s*= *' ,ur'\n | Birthyear = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Birthloc\s*= *' ,ur'\n | Birthloc = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Deathdate\s*= *' ,ur'\n | Deathdate = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Deathyear\s*= *' ,ur'\n | Deathyear = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Deathloc\s*= *' ,ur'\n | Deathloc = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Workperiod\s*= *' ,ur'\n | Workperiod = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Workloc\s*= *' ,ur'\n | Workloc = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Image\s*= *' ,ur'\n | Image = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Sortkey\s*= *' ,ur'\n | Sortkey = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Homecat\s*= *' ,ur'\n | Homecat = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Option\s*= *' ,ur'\n | Option = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Linkback\s*= *' ,ur'\n | Linkback = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Authority\s*= *' ,ur'\n | Authority = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Wikisource\s*= *' ,ur'\n | Wikisource = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Wikidata\s*= *' ,ur'\n | Wikidata = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Wikiquote\s*= *' ,ur'\n | Wikiquote = ',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*([^=]*)\s*=\[\[w?\:\1\:',ur'\n |\1=[[:\1:' ,creator_txt) # spacing of lines in LangSwitch block
# correct values of specific fields
creator_txt = re.sub(ur'\n\s*\|\s*Gender\s*= *m *\n' ,ur'\n | Gender = male\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Gender\s*= *f *\n' ,ur'\n | Gender = female\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Ff]rench *\n' ,ur'\n | Nationality = FR\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Gg]erman *\n' ,ur'\n | Nationality = DE\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Ii]talian *\n',ur'\n | Nationality = IT\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Gg]reek *\n' ,ur'\n | Nationality = GR\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Jj]apanese *\n',ur'\n | Nationality = JP\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Dd]utch *\n' ,ur'\n | Nationality = NL\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Pp]olish*\n' ,ur'\n | Nationality = PL\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Rr]ussian*\n' ,ur'\n | Nationality = RU\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Ss]panish*\n' ,ur'\n | Nationality = ES\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Bb]ritish*\n' ,ur'\n | Nationality = GB\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Nationality\s*= *[Aa]merican*\n',ur'\n | Nationality = US\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Birthyear\s*= *\n',ur'\n',creator_txt)
creator_txt = re.sub(ur'\n\s*\|\s*Deathyear\s*= *\n',ur'\n',creator_txt)
return creator_txt
# === Save page ======================================================================
def save(self, text, page, comment, minorEdit=False, botflag=True):
# save dialogs
# only save if something was changed
old_txt = page.get()
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(old_txt, text)
pywikibot.output(u'Comment: %s' %comment)
if not self.dry_run:
if not self.always:
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'Always', 'Quit'],
['y', 'N', 'a', 'q'], 'N')
if choice == 'a':
self.always = True
elif choice == 'q':
import sys
sys.exit()
if self.always or choice == 'y':
try:
# Save the page
page.put(text, comment=comment,
minorEdit=minorEdit, botflag=botflag)
self.iEditCount+=1
if (self.iEditCount==self.maxEditCount):
sys.exit()
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError, error:
pywikibot.output(
u'Cannot change %s because of spam blacklist entry %s'
% (page.title(), error.url))
else:
return True
return False
# === Query CreatorLinks database ======================================================================
def QueryCreatorLinks(self, creator_page, new_creator_txt, message):
# Query CreatorLinks database parse results and if the data is not in creator template than add
# data to the creator template
# create query url and call CreatorLinks database
name = creator_page.title(underscore=True)
urlstr = u"http://creatorlinks.wmflabs.org/index.php?site=commons&format=json&title="+name
urlstr = urlstr.encode('utf-8')
pywikibot.output(urlstr)
try:
content = urllib2.urlopen(urlstr).read()
output_json = json.loads(content)
except:
return new_creator_txt, message
# add birthdate to creator template
birth_date = output_json.get('birthYear', '')
if birth_date is None:
birth_date = u'';
if len(self.birthyear+self.birthdate)==0 and len(birth_date)>0:
new_creator_txt = re.sub(ur'Birthdate[^\n]*\n', ur'Birthdate = %s\n'%birth_date, new_creator_txt)
message = message + u'Add Birthdate; '
# add deathdate to creator template
death_date = output_json.get('deathYear', '')
if death_date is None:
death_date = u'';
if len(self.deathyear+self.deathdate)==0 and len(death_date)>0:
new_creator_txt = re.sub(ur'Deathdate[^\n]*\n', ur'Deathdate = %s\n'%death_date, new_creator_txt)
message = message + u'Add Deathdate; '
# get links from the database
links = output_json.get('links', '')
if len(links)==0:
return new_creator_txt, message
## else:
## for subkey, subvalue in links.iteritems():
## print subkey, subvalue['title']
# add wikidata to creator template
wikidata = links.get('wikidata', '')
if (len(self.wikidata)==0 and len(wikidata)>0): # if not already present in the template, but present in the DB
print 'wikidata = ' + wikidata['title']
if 'Wikidata' in new_creator_txt:
new_creator_txt = re.sub(ur'(Wikidata[^\n]*)\n', ur'Wikidata = %s\n'%wikidata['title'], new_creator_txt)
else:
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Option\s*=[^\n]*)\n', ur'\1Wikidata = %s\n\1\2\n'%wikidata['title'], new_creator_txt)
message = message + u'Add Wikidata; '
# add or modify authority control data in to creator template
AC_key = ['ulan','isni','bnf','viaf','lccn','gnd','nla'] # links to look up
if len(self.authority)==0:
self.authority=u'{{Authority control|bare=1}}'
self.authority = self.authority.replace('|PND=','|GND=')
self.authority = re.sub(ur'\|[^=]*= *\|', ur'|', self.authority) # delete empty fields
self.authority = re.sub(ur'\|[^=]*= *\|', ur'|', self.authority) # delete empty fields again (not sure why once is not enough sometimes)
new_authority = self.authority
pywikibot.output(new_authority)
for key in AC_key: # add creatorlinks AC links
lnk = links.get(key, '')
if len(lnk)>0 and key.upper() not in new_authority:
new_authority = new_authority.replace('|bare', '|%s=%s|bare' % (key.upper(), lnk['title']))
if new_authority != self.authority:
if 'Authority' in new_creator_txt:
new_creator_txt = re.sub(ur'(Authority[^\n]*)\n', ur'Authority = %s\n'%new_authority, new_creator_txt)
message = message + u'Modify Authority control; '
else:
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Linkback\s*=[^\n]*)\n', ur'\1\2\1Authority = %s\n'%new_authority, new_creator_txt)
message = message + u'Add Authority control; '
pywikibot.output(new_authority)
# add or modify wikisource links in creator template
wikisource = Wikiproject2LangSwitch(links, 'source', 's', self.nationality.lower())
print 'self.wikisource = '+self.wikisource
print 'wikisource = '+wikisource
if len(wikisource)>0 and self.wikisource != wikisource.strip():
if len(self.wikisource)>0:
tmp = new_creator_txt
new_creator_txt = re.sub(ur'Wikisource\s*=\s*\{\{[Ll]ang[Ss]witch[^\}]*\}\}', ur'Wikisource = %s'%wikisource, new_creator_txt)
if tmp == new_creator_txt:
new_creator_txt = re.sub(ur'Wikisource\s*=\s*s:\w\w:.*', ur'Wikisource = %s'%wikisource, new_creator_txt)
message = message + u'Modify Wikisource; '
else:
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Option\s*=[^\n]*)\n', ur'\1Wikisource = %s\n\1\2\n'%wikisource, new_creator_txt)
message = message + u'Add Wikisource; '
# add or modify wikiquote links in creator template
wikiquote = Wikiproject2LangSwitch(links, 'quote', 'q', self.nationality.lower())
print 'self.wikiquote = ' + self.wikiquote
print 'wikiquote = ' + wikiquote
if len(wikiquote)>0 and self.wikiquote != wikiquote.strip():
if len(self.wikiquote)>0:
tmp = new_creator_txt
new_creator_txt = re.sub(ur'Wikiquote\s*=\s*\{\{[Ll]ang[Ss]witch[^\}]*\}\}', ur'Wikiquote = %s'%wikiquote, new_creator_txt)
if tmp == new_creator_txt:
new_creator_txt = re.sub(ur'Wikiquote\s*=\s*s:\w\w:.*', ur'Wikiquote = %s'% wikiquote, new_creator_txt)
message = message + u'Modify Wikiquote; '
else:
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Option\s*=[^\n]*)\n', ur'\1Wikiquote = %s\n\1\2\n'%wikiquote, new_creator_txt)
message = message + u'Add Wikiquote; '
# create interwiki text
lang_link = {}
for subkey, subvalue in links.iteritems():
if 'wiki' in subkey:
lang = subkey.replace('wiki','') # language of the wikisource
if 'data' not in lang:
wiki_link = u'[[%s:%s]]\n' % (lang, subvalue['title']) # wikisource link
lang_link[lang] = wiki_link
self.interwiki = u'' # build wikisource string
for lang in sorted(lang_link):
self.interwiki = self.interwiki + lang_link[lang] # build LangSwitch block
return new_creator_txt, message
# === Edit category based on Creator page ======================================================================
def EditCategory(self, cat_txt, creator_page, cat_page):
''' Edit Creator home category based on text in Creator template
'''
message = u'Add ';
cat_txt_l = cat_txt.lower()
cat_txt_l = cat_txt_l.replace('_',' ') # category text with "_" instead of spaces
# If there are issues with home category than write it to the log
if (cat_page.isRedirectPage()) or ('category:people by name' not in cat_txt_l):
site = pywikibot.getSite(u'commons', u'commons')
page = pywikibot.Page(site, u'User:Jarekt/Brudnopis')
page.put(page.get()+u'\n*[[:'+cat_page.title()+']]', 'New Creators')
return
# if there are multiple category:people by name then delete some
if (('[[category:people by name]]' in cat_txt_l) and ('[[category:people by name|' in cat_txt_l)):
cat_txt = re.sub(u'\[\[Category:People by name\|[^\]]*\]\]', '', cat_txt, flags=re.IGNORECASE)
message = u'Delete category double; Add ';
# minor fixes only added when other fixes are performed
cat_txt = cat_txt.replace('[[category:','[[Category:')
m=re.search("{{DEFAULTSORT:([^\}]*)\}\}", cat_txt) # and look for defaultsort
if m!=None:
sortkey = m.group(1).strip() # if found defaultsort than remove it from categories
cat_txt = cat_txt.replace('|'+sortkey+']]',']]')
new_cat_txt = cat_txt
# if creator template is missing than add it
if (('{{creator:' not in cat_txt_l) and ('{{:creator:' not in cat_txt_l)):
new_cat_txt = '{{'+creator_page.title()+'}}\n'+new_cat_txt
message = message + u'creator template, '
# Add birth category
if ('births' not in cat_txt):
if len(self.birthyear)>0:
bdate = self.birthyear
else:
bdate = self.birthyear1
if len(bdate)>0:
new_cat_txt = new_cat_txt.replace('[[Category:People by name','[[Category:'+bdate+' births]]\n[[Category:People by name')
message = message + u'birth year category, '
# Add death category
if ('deaths' not in cat_txt):
if len(self.deathyear)>0:
ddate = self.deathyear
else:
ddate = self.deathyear1
if len(ddate)>0:
new_cat_txt = new_cat_txt.replace('[[Category:People by name','[[Category:'+ddate+' deaths]]\n[[Category:People by name')
message = message + u'death year category, '
# Add DEFAULTSORT
if ('{{DEFAULTSORT:' not in cat_txt and len(self.sortkey)>0):
new_cat_txt = '{{DEFAULTSORT:'+self.sortkey+'}}\n'+new_cat_txt
message = message + u'DEFAULTSORT, '
# Add interwiki links if missing
cat_interwiki = cat_page.interwiki()
if len(cat_interwiki)==0 and len(self.interwiki)>0:
new_cat_txt = new_cat_txt + '\n' + self.interwiki
message = message + u'interwiki links, '
# save changed text if any
if new_cat_txt != cat_txt:
self.save(new_cat_txt, cat_page, message)
# === Edit creator page ======================================================================
def EditCreator(self, creator_txt, cat_txt, creator_page, cat_page):
''' Edit Creator template based on text in Creator home category
'''
message = u''
new_creator_txt = creator_txt
# follow redirect
#print 'cat_page.titleWithoutNamespace()='+cat_page.titleWithoutNamespace()
#print 'self.homecat='+self.homecat
if (cat_page.titleWithoutNamespace()!=self.homecat) :
new_creator_txt = re.sub(ur'(Homecat\s*=)[^\n]*', ur'\1 '+cat_page.titleWithoutNamespace(), new_creator_txt)
message = message + u'Correct homecat; '
# Add {{LangSwitch}} to Name
nLang = 0
Name = re.sub(ur'\([^\)]*\)', '', creator_page.titleWithoutNamespace() ) # person's name no disambiguations
if (len(self.name1)==0 and 'langswitch' not in self.name.lower()):
NameLangSwitch, nLang = Interwiki2LangSwitch(cat_page, Name, self.nationality.lower())
if (nLang>0):
if (nLang==1):
msg=u'Add wikipedia link to Name; '
else:
msg=u'Add {{LangSwitch}} to Name; '
Match = False;
if (self.name==NameLangSwitch): # before and after is the same than skip
Match = True
# Name string is a template -> leave as is
if Match==False:
txt = u"(Name\s*=\s*)\{\{"+Name+u"\}\}"
m=re.search(txt, new_creator_txt)
if m!=None:
Match = True
# Name string is basic text -> replace with LangSwitch
if Match==False:
txt = u"(Name\s*=\s*)"+Name
m=re.search(txt, new_creator_txt)
if m!=None:
new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
message = message + msg
Match = True
if Match==False: # Gallery -> replace with LangSwitch
txt = u"(Name\s*=\s*)\[\["+Name+u"\]\]"
m=re.search(txt, new_creator_txt)
if m!=None:
new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
message = message + msg
Match = True
if Match==False: # {{w|...}} -> replace with LangSwitch
txt = u"(Name\s*=\s*)\{\{w\|"+Name+u"[^\}]*\}\}"
m=re.search(txt, new_creator_txt)
if m!=None:
new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
message = message + msg
Match = True
if Match==False: # Category -> replace with LangSwitch
txt = u"(Name\s*=\s*)\[\[:[Cc]ategory:"+Name+u"\|"+Name+u"\]\]"
m=re.search(txt, new_creator_txt)
if m!=None:
new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
message = message + msg
Match = True
if Match==False: # bad wiki link -> replace with LangSwitch
txt = u"(Name\s*=\s*)\[\[[a-z\:]*"+Name+u"\]\]"
m=re.search(txt, new_creator_txt)
if m!=None:
new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
message = message + msg
Match = True
if (Match==False and self.name!=NameLangSwitch): # wiki link -> replace with LangSwitch
txt = ur"(Name\s*=\s*)\[\[[a-z\:]*[^\|]*\|"+Name+ur"\]\]"
if (nLang==1):
pywikibot.output('"%s", "%s"' % (self.name, NameLangSwitch))
m=re.search(txt, new_creator_txt)
if m!=None:
new_creator_txt = re.sub(txt, ur'\1'+NameLangSwitch, new_creator_txt)
message = message + msg
Match = True
if Match==False: # Otherwise -> Add Name1 with LangSwitch
new_creator_txt = re.sub(u'(\n\s*\|\s*)Name', ur'\1Name1 = '+NameLangSwitch+ur'\1Name', new_creator_txt)
message = message + msg.replace(u'Name',u'Name1')
# Add Linkback
if (len(self.linkback)==0):
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Homecat\s*=[^\n]*)\n', ur'\1\2\1Linkback = {{subst:FULLPAGENAME}}\n', new_creator_txt)
message = message + u'Add Linkback; '
else:
if self.linkback!=creator_page.title() and ''' not in self.linkback:
new_creator_txt = re.sub(u'(Linkback\s*=\s*)[^\n]*', ur'\1{{subst:FULLPAGENAME}}', new_creator_txt)
message = message + u'Correct Linkback; '
# Add Option
if (len(self.option)==0):
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Homecat\s*=[^\n]*)\n', ur'\1\2\1Option = {{{1|}}} <!-- Do not modify -->\n', new_creator_txt)
message = message + u'Add Option; '
else:
if ("{{{1|}}}" not in self.option):
new_creator_txt = re.sub(ur'(Option\s*=)[^\n]*', ur'\1 {{{1|}}} <!-- Do not modify -->', new_creator_txt)
message = message + u'Correct Option; '
# Correct authority control
if (len(self.authority)>0 and "|bare=1" not in self.authority):
new_creator_txt = re.sub(ur'(\{\{Authority control[^\}]*)\}\}',ur'\1|bare=1}}', new_creator_txt)
message = message + u'Correct authority; '
if (len(self.authority)==0 and u'Authority control' in cat_txt):
m=re.search(ur'(\{\{Authority control[^\}]*\}\})', cat_txt) # and look for homecat
if m!=None: # if found
ac_str = m.group(1).strip()
cat_txt = cat_page.get()
new_cat_txt = cat_txt.replace(ac_str,'')
if new_cat_txt != cat_txt:
self.save(new_cat_txt, cat_page, "move Authority control from category to creator template")
ac_str = ac_str.replace('}}', u'|bare=1}}')
if ('authority' in creator_txt):
new_creator_txt = re.sub(ur'(Authority\s*=\s*)[^\n]*', ur'\1'+ac_str, new_creator_txt)
else:
new_creator_txt = re.sub(ur'(\n\s*\|\s*)(Linkback\s*=[^\n]*)\n', ur'\1\2\1Authority = %s\n'%ac_str, new_creator_txt)
message = message + u'Add Authority control; '
new_creator_txt, message = self.QueryCreatorLinks(creator_page, new_creator_txt, message)
# save changed text if any
if new_creator_txt != creator_txt:
new_creator_txt = self.creator_cosmetic_changes(new_creator_txt) # call this function only if page is beeing changed
self.save(new_creator_txt, creator_page, message)
#=================================================================
def run(self, generator):
''' Main loop. Loop over all Creator templates in
Category:Creator templates and ...
'''
creator_regexp = { "homecat" : "Homecat\s*=\s*([^\n]*)",
"option" : "Option\s*=\s*([^\n]*)",
"sortkey" : "Sortkey\s*=\s*([^\n]*)",
"deathdate" : "Deathdate\s*=\s*([^\n]*)",
"birthdate" : "Birthdate\s*=\s*([^\n]*)",
"deathyear1" : "Deathdate\s*=\s*(\d\d\d\d?)",
"birthyear2" : "Birthdate\s*=\s*(\d\d\d\d?)",
"deathyear" : "Deathyear\s*=\s*(\d\d\d\d?)",
"birthyear" : "Birthyear\s*=\s*(\d\d\d\d?)",
"authority" : "Authority\s*=\s*(\{\{Authority control[^\}]*\}\})",
"name" : "Name\s*=\s*([^\n]*)",
"name1" : "Name1\s*=\s*([^\n]*)",
"nationality": "Nationality\s*=\s*([^\n]*)" ,
"type" : "Type\s*=\s*([^\n]*)" ,
"linkback" : "Linkback\s*=\s*([^\n]*)" ,
"wikisource" : "Wikisource\s*=\s*([^\n]*)" ,
"wikiquote" : "Wikiquote\s*=\s*([^\n]*)" ,
"wikidata" : "Wikidata\s*=\s*([^\n]*)" }
site = pywikibot.getSite(u'commons', u'commons')
skip = True # fast-forward to the page of interest
for creator_page in generator: # page from Category:Creator templates category
if (creator_page.namespace()==100): # make sure it is of correct namespace
if creator_page.title()=='Creator:Floris Balthasarsz. van Berckenrode':
skip = False
if skip:
continue
creator_str = '{{'+creator_page.title()+'}}' # creator string
creator_str_ = creator_str.replace(' ','_') # creator string with "_" instead of spaces
creator_txt = creator_page.get()
pywikibot.output(u'Getting %s:' % creator_str)
self.init()
templates = creator_page.templatesWithParams(string.replace(creator_txt, '{{{1|}}}', ''))
for template in templates: # get all the templates
#pywikibot.output(u'template[0]="%s"' % template[0])
if (template[0] == 'Creator'): # make sure Creator template is present
for param in template[1]: # go through all the template parameters
#pywikibot.output(u'"%s"' % param)
for field in creator_regexp.keys():
m=re.search(creator_regexp[field], param) # and look for homecat
if m!=None: # if found
setattr(self, field, m.group(1).strip())
#pywikibot.output(u'"%s"' % param)
#print '%s=%s\n' % (field, m.group(1).strip())
if (template[0] == 'Authority control'):
self.AC_template = template[1]
m=re.search(creator_regexp['option'], creator_txt) # and look for homecat
if m!=None: # if found
self.option = m.group(1).strip()
if len(self.homecat)>0:
cat_name = 'Category:'+self.homecat
pywikibot.output(u' homecat = "%s"' % cat_name)
pywikibot.output(u' name = "%s"' % self.name)
cat_page =catlib.Category(site, cat_name)
if cat_page.exists(): # if homecat exists
if cat_page.isRedirectPage():
cat_page = cat_page.getRedirectTarget()
pywikibot.output(u" Redirect -> %s" % cat_page.title(asLink=True))
cat_txt = cat_page.get() # get the homecat text
#pywikibot.output(cat_txt_l)
self.EditCreator (creator_txt, cat_txt, creator_page, cat_page)
self.EditCategory(cat_txt, creator_page, cat_page)
# ================================================
def main():
site = pywikibot.getSite(u'commons', u'commons')
creator_cat = catlib.Category(site, u'Category:Creator templates')
generator = creator_cat.articles(startFrom=None) # page from Category:Creator templates category
bot = CreatorMaintenanceBot()
bot.run(generator)
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()