#!/usr/bin/python # -*- coding: utf-8 -*- # # Published by zhuyifei1999 (https://wikitech.wikimedia.org/wiki/User:Zhuyifei1999) # under the terms of Creative Commons Attribution-ShareAlike 3.0 Unported (CC BY-SA 3.0) # https://creativecommons.org/licenses/by-sa/3.0/ from __future__ import unicode_literals import calendar import collections import itertools import re import weakref import pywikibot pywikibot.handle_args() SITE = pywikibot.Site('en', 'wikipedia') linenos = weakref.WeakSet() class Lineno(object): def __init__(self, lineno): self.lineno = lineno linenos.add(self) def get(self): assert self.lineno is not None return self.lineno def rm_lineno(self, lineno): if self.lineno is None: return if lineno == self.lineno: self.lineno = None elif lineno < self.lineno: self.lineno -= 1 def __repr__(self): return 'Lineno(%d)' % self.lineno H2 = collections.namedtuple('H2', 'lineno year month') H3 = collections.namedtuple('H2', 'lineno pagename') reqpage = pywikibot.Page( SITE, 'Wikipedia:WikiProject_Guild_of_Copy_Editors/Requests') reqpage.origtext = reqpage.text def rm_lineno(lineno): for linenoobj in linenos: linenoobj.rm_lineno(lineno) text = reqpage.text.split('\n') text.pop(lineno) reqpage.text = '\n'.join(text) def rm_empty_lineno(lineno): text = reqpage.text.split('\n') if lineno < len(text) and not text[lineno]: rm_lineno(lineno) sections = collections.defaultdict( lambda: collections.defaultdict(lambda: lambda: None)) sections_flat = [] h3 = h2 = None for i, line in enumerate(reqpage.text.split('\n')): if not line: continue i = Lineno(i) reobj = re.match(r'^== *(?P%s) +(?P\d{4,}) *== *$' % ('|'.join( filter(None, set(calendar.month_name) | set(calendar.month_abbr)))), line) if reobj: month = reobj.group('month') try: month = list(calendar.month_name).index(month) except ValueError: month = list(calendar.month_abbr).index(month) year = int(reobj.group('year')) h2 = H2(i, year, month) h3 = None if not h2: continue reobj = re.match(r"^=== *(?:~~)?('*\[\[[^\[\]]+\]\]'*)(?:~~)? *=== *$", line) if reobj: h3 = H3(i, reobj.group(1)) obj = sections[h2][h3] sections_flat.append(obj) obj.h2 = h2 obj.h3 = h3 obj.begin = obj.end = i obj.acronym = set() obj.requester = None obj.copyeditors = [] obj.status = None continue if not h3: continue obj = sections[h2][h3] obj.end = i obj.acronym.update(re.findall(r'\b(?:ACN|ACR|BCN|BCR|CCN|DYK|FAC|FAR|FARC' r'|FLC|FLRC|GAN|GAR|PR|TFA|OTD)\b', line)) user = re.findall(r'\[\[\s*user(?:[ _]talk)?:[^\[\]]+\]\]', line, re.I) if user: user = pywikibot.link_regex.match(user[-1]) user = pywikibot.Link(user.group('title'), source=SITE) user = pywikibot.Page(user) if user.isTalkPage(): user = user.toggleTalkPage() else: user = None timestamp = re.findall( r'\d{2}:\d{2}, \d{1,2} [A-Z][a-z]+ \d{4,} $[A-Z]+$', line) if timestamp: timestamp = pywikibot.Timestamp.strptime( timestamp[-1], '%H:%M, %d %B %Y (%Z)') else: timestamp = None if timestamp: # Poor man's goto for _ in range(1): reobj = re.search(r'\{\{(declined|withdrawn)\}\}', line, re.I) if reobj: obj.status = reobj.group(1).capitalize(), timestamp break reobj = re.search(r'\{\{((?:partly[ _])?done)\}\}', line, re.I) if reobj and user: if user not in obj.copyeditors: obj.copyeditors.append(user) if reobj.group(1).lower() == 'done': obj.status = True, timestamp break if obj.status is None and obj.requester is None and user: obj.requester = user, timestamp archived = [] archives = {} for obj in sections_flat: if not obj.status: continue if not obj.requester: continue requester, request_time = obj.requester purpose, complete_time = obj.status if (pywikibot.Timestamp.utcnow() - complete_time).days < 1: continue def format_username(user): user = pywikibot.User(user) if not user.isAnonymous(): return user.title( with_ns=False, as_link=True, allow_interwiki=False) else: displayname = user.username if len(displayname) > 25: reobj = re.match(r'^((?:[0-9a-fA-F]{1,4}:){4})' r'((?:[0-9a-fA-F]{1,4}:){3}' r'[0-9a-fA-F]{1,4})$', displayname) if reobj: displayname = ' '.join(reobj.groups()) return '[[Special:Contributions/%s|%s]]' % ( user.username, displayname) if purpose is True: purpose = ' / '.join(sorted(obj.acronym)) editors = ', '.join(map(format_username, obj.copyeditors)) else: editors = 'n/a' tr = '''|- | %s | %s | %s | %s | %s | %s ''' % ( request_time.strftime('%Y-%m-%d'), obj.h3.pagename, format_username(requester), complete_time.strftime('%Y-%m-%d'), editors, purpose ) if request_time.year in archives: archive = archives[request_time.year][0] else: archive = pywikibot.Page( SITE, 'Wikipedia:WikiProject_Guild_of_Copy_Editors/' 'Requests/Archives/%d' % request_time.year) archive.origtext = archive.text archives[request_time.year] = [archive, []] partition = (request_time.month - 1) // 3 * 3 + 1 thispart_title = '==%s–%s==\n' % ( calendar.month_name[partition], calendar.month_name[partition + 2]) nextpart_title = None if partition + 3 > 12 else '==%s–%s==\n' % ( calendar.month_name[partition + 3], calendar.month_name[partition + 5]) try: startpos = archive.text.index(thispart_title) endpos = archive.text.index(nextpart_title) if nextpart_title else None except ValueError: continue def findnext(substr): if endpos: return archive.text.index(substr, startpos, endpos) else: return archive.text.index(substr, startpos) class Fail(Exception): pass try: for i in itertools.count(): try: startpos = min(findnext('\n|-\n'), findnext('\n|}\n')) + 1 except ValueError: raise Fail if archive.text[startpos:startpos + 2] == '|}': break if archive.text[startpos:startpos + 5] == '|-\n|}': break elif archive.text[startpos:startpos + 5] != '|-\n| ': print(repr(archive.text[startpos:startpos + 6])) raise Fail reobj = re.match( r'^\d{4,}-\d{2}-\d{2}', archive.text[startpos + 5:]) if not reobj: raise Fail row_time = pywikibot.Timestamp.strptime(reobj.group(0), '%Y-%m-%d') if request_time >= row_time: continue break except Fail: __import__('traceback').print_exc() continue archive.text = archive.text[:startpos] + tr + archive.text[startpos:] begin = obj.begin.get() for i in range(obj.end.get(), obj.begin.get() - 1, -1): rm_lineno(i) rm_empty_lineno(begin) del sections[obj.h2][obj.h3] if not sections[obj.h2]: lineno = obj.h2.lineno.get() rm_lineno(lineno) rm_empty_lineno(lineno) archived.append((obj.h3.pagename, purpose)) archives[request_time.year][1].append((obj.h3.pagename, purpose)) def _mk_summary(archived_lst): summ = [ '%s (%s)' % (pagename, purpose) if purpose else pagename for pagename, purpose in archived_lst] if len(summ) > 1: summ[-1] = 'and ' + summ[-1] # Special case: [[pagename]] (purpose) and [[pagename]] (purpose) if len(summ) == 2 and archived_lst[0][1]: return ' '.join(summ) return ', '.join(summ) def mk_summary(archived_lst): if len(archived_lst) == 1: return 'Bot: Archived request for ' + _mk_summary(archived_lst) return 'Bot: Archived requests for ' + _mk_summary(archived_lst) for archive, archived_inner in archives.values(): if archived_inner: archive.save(mk_summary(archived_inner)) if archived: reqpage.save(mk_summary(archived))