#!/usr/bin/python
# -*- coding: utf-8  -*-

"""
This bot goes over multiple pages of the home wiki, removing repetitions
repetitions repetitions.

"SPAM SPAM SPAM SPAM SPAM SPAM SPAM SPAM SPAM"
 - Monty Python about SPAM SPAM SPAM SPAM SPAM SPAM SPAM

This script understands various command-line arguments:

    -start:        used as -start:page_name, specifies that the robot should
                   go alphabetically through all pages on the home wiki,
                   starting at the named page.

    -file:         used as -file:file_name, read a list of pages to treat
                   from the named textfile. Page titles should be enclosed
                   in [[double-squared brackets]].

    -ref:          used as -start:page_name, specifies that the robot should
                   touch all pages referring to the named page.

    -links:        used as -links:page_name, specifies that the robot should
                   touch all pages referred to from the named page.

    -cat:          used as -cat:category_name, specifies that the robot should
                   touch all pages in the named category.

    -redir         specifies that the robot should touch redirect pages;
                   otherwise, they will be skipped.

    -confirm       ask before editing any page

All other parameters will be regarded as a page title; in this case, the bot
will only touch a single page.
"""

__version__='$Id: monty.py,v 1.1 2007/02/05 07:13:06 wendel Exp $'

import wikipedia, pagegenerators, catlib
import sys
import re


msg={
    'en':u'Repeated text Bot',
    'pt':u'Bot removedor de repetições (SPAM SPAM SPAM)',
}

class MontyBot:
    def __init__(self, generator, touch_redirects, confirm_edits, minimum_words):
        self.generator = generator
        self.touch_redirects = touch_redirects
        self.confirm_edits = confirm_edits
	self.minimum_words = minimum_words

    def confirm(self):
	if self.confirm_edits:
	        answer = wikipedia.input('Perform edits ? [Y/n]');
        	if answer == 'n':
	            return False
        return True
        
    def run(self):
        mysite = wikipedia.getSite()
        wikipedia.setAction(wikipedia.translate(mysite,msg))
        for page in self.generator:
            try:
                print "Reading article: %s" % page.title()
                original_text = page.get(get_redirect=self.touch_redirects)
#               print original_text
		regexp = r"([^\s](.|\r|\n){4,40}?[^\s])(\s+\1){2,}"
                if re.search(regexp, original_text):
                    print "Article has SPAM SPAM SPAM - despamifying"
                    new_text = re.sub(regexp,r"\1",original_text);
                    #print text
                    colors = [None] * 5 + [13] * len(page.title()) + [None] * 4
                    wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors)
                    wikipedia.showDiff(original_text, new_text)
                    if self.confirm():
                    	page.put(new_text)                   
            except wikipedia.NoPage:
                print "Page %s does not exist?!" % page.aslink()
            except wikipedia.IsRedirectPage:
                print "Page %s is a redirect; skipping." % page.aslink()
            except wikipedia.LockedPage:
                print "Page %s is locked?!" % page.aslink()

def main():
    #page generator
    gen = None
    redirs = False
    confirm = False
    minwords = 500
    pageTitle = []
    for arg in wikipedia.handleArgs():
        if arg.startswith('-start:'):
            page = wikipedia.Page(wikipedia.getSite(),arg[7:])
            gen = pagegenerators.AllpagesPageGenerator(page.titleWithoutNamespace(),namespace=page.namespace())
        elif arg.startswith('-ref:'):
            referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:])
            gen = pagegenerators.ReferringPageGenerator(referredPage)
        elif arg.startswith('-links:'):
            linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:])
            gen = pagegenerators.LinkedPageGenerator(linkingPage)
        elif arg.startswith('-file:'):
            gen = pagegenerators.TextfilePageGenerator(arg[6:])
        elif arg.startswith('-cat:'):
            cat = catlib.Category(wikipedia.getSite(), arg[5:])
            gen = pagegenerators.CategorizedPageGenerator(cat)
        elif arg.startswith('-redir'):
            redirs = True
        elif arg.startswith('-confirm'):
            confirm = True
	elif arg.startswith('-words:'):
	    minwords = int(arg[7:])
        else:
            pageTitle.append(arg)

    if pageTitle:
        page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        wikipedia.showHelp('monty')
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = MontyBot(preloadingGen, redirs, confirm, minwords)
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
