File:Edit rates by users and bots on wikimedia Commons July1 July27 2012.png

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

Original file (2,400 × 1,230 pixels, file size: 663 KB, MIME type: image/png)

Captions

Captions

Add a one-line explanation of what this file represents

Summary

[edit]
Description
English: Time oecolution of the edit rate on Wikimedia Commons over a time period of 27 days from July 1 to July 27, 2012. The edits are split into user edits and bot edits. Data collected with SlaungerBot.
Date
Source Own work
Author Slaunger

Source

[edit]
 
This plot was created with Matplotlib.
'''
Created on 09/07/2012

@author: Kim
'''

from calendar import timegm
from time import strptime, strftime, gmtime

from numpy import arange
from pylab import plot, xlabel, ylabel, legend, grid, title, show, axhline, figure, scatter, xlim, ylim

import wikipedia as pywikibot

bot_edit_rates = {}
user_edit_rates = {}
secs_per_min = 60
default_max_bot_edit_rate_per_minute = 6
datefmtstr = "%Y-%m-%dT%H:%M:%SZ"
rcstart = "2012-07-27T00:00:00Z"
rcend   = "2012-07-01T00:00:00Z"

def recent_changes_iter(rcstart, rcend, rcshow):
    interval = 60 * 60
    rcstart_utc_sec = timegm(strptime(rcstart, datefmtstr))
    rcend_utc_sec = timegm(strptime(rcend, datefmtstr))
    rc1 = rcstart_utc_sec
    rc2 = max(rcend_utc_sec, rc1 - interval)
    while True:
        rc1str, rc2str = strftime(datefmtstr, gmtime(rc1)), strftime(datefmtstr, gmtime(rc2))
        print rcshow, ":", rc2str,'-',rc1str
        for item in site.recentchanges(number=5000, rcshow=rcshow, 
                                       rcstart=rc1str,
                                       rcend=rc2str):
            yield timegm(strptime(item[1], datefmtstr)), item[4]
        if rc2 == rcend_utc_sec:
            break
        rc1 -= interval
        rc2 = max(rcend_utc_sec, rc1 - interval)        

try:
    site = pywikibot.getSite()
    for i, (utc_sec, bot_str) in enumerate(recent_changes_iter(rcstart, rcend, rcshow="bot")):
        utc_min = utc_sec / secs_per_min
        edit_rates = bot_edit_rates.setdefault(bot_str, {})
        if edit_rates.has_key(utc_min):
            edit_rates[utc_min] += 1
        else:
            edit_rates[utc_min] = 1
        if i == 0:
            last_bot_edit = utc_sec
    first_bot_edit = utc_sec

    for item in recent_changes_iter(rcstart, rcend, rcshow="!bot"):
        utc_sec = item[0]
        utc_min = utc_sec / secs_per_min
        if user_edit_rates.has_key(utc_min):
            user_edit_rates[utc_min] += 1
        else:
            user_edit_rates[utc_min] = 1
    
finally:
    pywikibot.stopme()
    
fast_bots = []
for bot_str, edit_rates in bot_edit_rates.iteritems(): 
    if max(edit_rates.itervalues()) > 7:
        fast_bots.append(bot_str)
fast_bots = frozenset(fast_bots)    
    
utc_min_minimum = first_bot_edit / 60 + 1
utc_min_maximum = last_bot_edit / 60 - 1
minutes_after_first_bot_edit = arange(utc_min_maximum + 1 - utc_min_minimum)
days_after_first_bot_edit = minutes_after_first_bot_edit / (60.0 * 24)

accum_edit_rate = []
other_bots_edit_rates = []
all_bots_edit_rates = []

for utc_min in xrange(utc_min_minimum, utc_min_maximum + 1):
    edit_rate = user_edit_rates.get(utc_min, 0)
    other_bots_edit_rate = 0
    all_bots_edit_rate = 0
    for bot_str, ber in bot_edit_rates.iteritems():
        edit_rate += ber.get(utc_min, 0)
        all_bots_edit_rate += ber.get(utc_min, 0)
        if bot_str not in fast_bots:
            other_bots_edit_rate += ber.get(utc_min, 0)
    accum_edit_rate.append(edit_rate)
    other_bots_edit_rates.append(other_bots_edit_rate)
    all_bots_edit_rates.append(all_bots_edit_rate)

all_user_edit_rates = [user_edit_rates.get(utc_min, 0) for utc_min in xrange(utc_min_minimum, utc_min_maximum + 1)]

max_user_edit_rate = max(all_user_edit_rates)
max_bot_edit_rate = max(all_bots_edit_rates)
max_edit_rate = max(max_user_edit_rate, max_bot_edit_rate)
    


#plot(minutes_after_first_bot_edit, accum_edit_rate, '-', label="All edits", lw=2, alpha = 0.7)

for bot_str in sorted(bot_edit_rates.iterkeys()):
    if bot_str in fast_bots:
        d = bot_edit_rates[bot_str]
        edit_rate_arr = [d.get(utc_min, 0) for utc_min in xrange(utc_min_minimum, utc_min_maximum + 1)]
        #plot(minutes_after_first_bot_edit, edit_rate_arr, ls="-", lw=2.5, label=repr(bot_str)[2:-1], alpha=0.7)

figure(figsize=(12, 9))
plot(days_after_first_bot_edit, all_user_edit_rates, 'b.', label="All user edits", lw=1, alpha=0.5)
plot(days_after_first_bot_edit, all_bots_edit_rates, 'r.', label="All bot edits", lw=1, alpha=0.5)
xlabel('Days after %s' % rcend)
ylabel('Edit rate [edits/min]')
title("Edit rates on Wikimedia Commons between %s and %s" % (rcend, rcstart))
xlim((0, days_after_first_bot_edit[-1]))
legend()
grid()

figure(figsize=(11, 11))
scatter(all_user_edit_rates, all_bots_edit_rates)
xlabel('Accumulated user edit rate [edits/min]')
ylabel('Accumulated bot edit rate [edits/min]')
#axhline(default_max_bot_edit_rate_per_minute, ls='--', c="red", label="Default max bot edit rate")
#legend()
grid()
xlim((0, max_edit_rate))
ylim((0, max_edit_rate))
title("Correlation between user and bot edit rates\nbetween %s and %s" % (rcend, rcstart))

#plot(minutes_after_first_bot_edit, other_bots_edit_rates, label="Other bots (accumulated)", alpha=0.7)




show()

Licensing

[edit]
I, the copyright holder of this work, hereby publish it under the following license:
w:en:Creative Commons
attribution share alike
This file is licensed under the Creative Commons Attribution-Share Alike 3.0 Unported license.
You are free:
  • to share – to copy, distribute and transmit the work
  • to remix – to adapt the work
Under the following conditions:
  • attribution – You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
  • share alike – If you remix, transform, or build upon the material, you must distribute your contributions under the same or compatible license as the original.

File history

Click on a date/time to view the file as it appeared at that time.

Date/TimeThumbnailDimensionsUserComment
current16:49, 28 July 2012Thumbnail for version as of 16:49, 28 July 20122,400 × 1,230 (663 KB)Slaunger (talk | contribs)

Metadata