#!/usr/bin/python
# -*- coding: utf-8  -*-

"""
This bot goes over multiple pages of the home wiki, and if it is a user page with non-user templates,
will correct it.

"A user is not an article"

This script understands various command-line arguments:

	-start:		used as -start:page_name, specifies that the robot should
				   go alphabetically through all pages on the home wiki,
				   starting at the named page.

	-file:		 used as -file:file_name, read a list of pages to treat
				   from the named textfile. Page titles should be enclosed
				   in [[double-squared brackets]].

	-ref:		  used as -start:page_name, specifies that the robot should
				   touch all pages referring to the named page.

	-links:		used as -links:page_name, specifies that the robot should
				   touch all pages referred to from the named page.

	-cat:		  used as -cat:category_name, specifies that the robot should
				   touch all pages in the named category.

	-redir		 specifies that the robot should touch redirect pages;
				   otherwise, they will be skipped.

All other parameters will be regarded as a page title; in this case, the bot
will only touch a single page.
"""

__version__='$Id: wrongtemplate.py,v 1.1 2008/08/12 11:57:06 wendel Exp $'

import wikipedia, pagegenerators, catlib
import sys
import re
import codecs

msg={
	'en':u'Wrong Template Bot - userpage is not an article',
	'pt':u'Bot de Predefs Erradas - userpages não são artigos',
}

class WrongTemplateBot:
	def __init__(self, generator, touch_redirects, confirm_edits):
		self.generator = generator
		self.touch_redirects = touch_redirects
		self.confirm_edits = confirm_edits
		self.blacklist = []
		self.whitelist = []
		print 'Whitelist:'
		wlfile = codecs.open('wrongtemplate/whitelist.txt', 'rb', 'utf-8')
		for line in wlfile:
			self.whitelist.append(line.rstrip().lower().replace(" ","_"))
		print self.whitelist
		print 'Blacklist:'
		blfile = codecs.open('wrongtemplate/blacklist.txt', 'rb', 'utf-8')
		for line in blfile:
			self.blacklist.append(line.rstrip().lower().replace(" ","_"))
		print self.blacklist

	def confirm(self):
		if self.confirm_edits:
			answer = wikipedia.input('Perform edits ? [Y/n]');
			if answer == 'n':
				return False
		return True

	def want_whitelist(self, template):
		answer = wikipedia.input('Whitelist or Blacklist ? [W/b]');
		if answer == 'b':
			return False
		return True
	
	def save_lists(self):
		blfile = codecs.open('wrongtemplate/blacklist.txt', 'wb', 'utf-8')
		for line in self.blacklist:
			blfile.write("%s\n" % line)
		blfile.close()
		wlfile = codecs.open('wrongtemplate/whitelist.txt', 'wb', 'utf-8')	
		for line in self.whitelist:
			wlfile.write("%s\n" % line)
		wlfile.close()
		
	def run(self):
		mysite = wikipedia.getSite()
		wikipedia.setAction(wikipedia.translate(mysite,msg))
		for page in self.generator:
			try:
				print "Reading article: %s" % page.title()
				if page.title().find("/") >= 0:
					print "Skipping - this is a subpage"
					continue
					
				text = page.get(get_redirect=self.touch_redirects)
				to_delete = [];
				for m in re.finditer(r'\{\{\s*(.*?)\s*(\|.*?)?\}\}', text):
					full_template = m.group()
					template = m.group(1).lower().replace(" ","_")
					template_args  = m.group(2)
					template = template.replace(u"predefinição:", "").replace(u"template:", "")
					print "Found template %s , key = {{%s}}, args = (%s)" % (full_template, template, template_args)
					if template.startswith('ub/') or template.startswith('userbox/') or template.startswith('babel/') or template.startswith('user:') or template.startswith(u'usuário:') or template.startswith(u'#') or template.startswith('displaytitle:'):
						continue
						
					if template in self.blacklist:
						to_delete.append(full_template)
					elif template not in self.whitelist:
						if self.want_whitelist(template):
							self.whitelist.append(template)
						else:
							self.blacklist.append(template)
							to_delete.append(full_template)
				if len(to_delete) > 0:
					print "Will delete these templates from user page : "
					print to_delete
					if self.confirm():
						for template in to_delete:
							text = text.replace(template, "")
						page.put(text)
				
				self.save_lists();
			except wikipedia.NoPage:
				print "Page %s does not exist?!" % page.aslink()
			except wikipedia.IsRedirectPage:
				print "Page %s is a redirect; skipping." % page.aslink()
			except wikipedia.LockedPage:
				print "Page %s is locked?!" % page.aslink()


def main():
	#page generator
	gen = None
	redirs = False
	confirm = False
	pageTitle = []
	for arg in wikipedia.handleArgs():
		if arg.startswith('-start:'):
			page = wikipedia.Page(wikipedia.getSite(),arg[7:])
			gen = pagegenerators.AllpagesPageGenerator(page.titleWithoutNamespace(),namespace=page.namespace())
		elif arg.startswith('-ref:'):
			referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:])
			gen = pagegenerators.ReferringPageGenerator(referredPage)
		elif arg.startswith('-links:'):
			linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:])
			gen = pagegenerators.LinkedPageGenerator(linkingPage)
		elif arg.startswith('-file:'):
			gen = pagegenerators.TextfilePageGenerator(arg[6:])
		elif arg.startswith('-cat:'):
			cat = catlib.Category(wikipedia.getSite(), arg[5:])
			gen = pagegenerators.CategorizedPageGenerator(cat)
		elif arg.startswith('-redir'):
			redirs = True
		elif arg.startswith('-confirm'):
			confirm = True
		else:
			pageTitle.append(arg)

	if pageTitle:
		page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
		gen = iter([page])
	if not gen:
		wikipedia.showHelp('wrongtemplate')
	else:
		preloadingGen = pagegenerators.PreloadingGenerator(gen)
		bot = WrongTemplateBot(preloadingGen, redirs, confirm)
		bot.run()

if __name__ == "__main__":
	try:
		main()
	finally:
		wikipedia.stopme()
