#!/usr/bin/python
# -*- coding: utf-8  -*-

"""
This bot goes over multiple pages of the home wiki, and if a page has a
lot of text but no images, inserts a warning about that.

"All words and no image makes uncyclopedia a dull site"

This script understands various command-line arguments:

    -start:        used as -start:page_name, specifies that the robot should
                   go alphabetically through all pages on the home wiki,
                   starting at the named page.

    -file:         used as -file:file_name, read a list of pages to treat
                   from the named textfile. Page titles should be enclosed
                   in [[double-squared brackets]].

    -ref:          used as -start:page_name, specifies that the robot should
                   touch all pages referring to the named page.

    -links:        used as -links:page_name, specifies that the robot should
                   touch all pages referred to from the named page.

    -cat:          used as -cat:category_name, specifies that the robot should
                   touch all pages in the named category.

    -redir         specifies that the robot should touch redirect pages;
                   otherwise, they will be skipped.

All other parameters will be regarded as a page title; in this case, the bot
will only touch a single page.
"""

__version__='$Id: imageless.py,v 1.13 2007/01/24 01:02:06 wendel Exp $'

import wikipedia, pagegenerators, catlib
import sys
import re


msg={
    'en':u'Imageless article bot',
    'pt':u'Bot de artigos sem imagens',
}

class ImageBot:
    def __init__(self, generator, touch_redirects):
        self.generator = generator
        self.touch_redirects = touch_redirects
        
    def run(self):
        mysite = wikipedia.getSite()
        wikipedia.setAction(wikipedia.translate(mysite,msg))
        for page in self.generator:
            try:
                text = page.get(get_redirect=self.touch_redirects)
                # HACKHACKHACK: only works for pt: aka desciclopedia (call me lazy)
                # HACKHACKHACK: always 500+ words needed
                if len(text.split(None)) >= 500:
                    if re.search(r"\[\[Imagem?:|<gallery>", text):
                        print "Article has images"
                        if re.search(r"\{\{imagens\}\}", text):
                            print "Article was marked as having no images"
                            text = re.sub(r"\{\{imagens\}\}\n?",r"",text);
                            #print text
                            page.put(text)
                    else:
                        text = "{{imagens}}\n" + text
                        #print text
                        print "Article has no images"
                        page.put(text)
            except wikipedia.NoPage:
                print "Page %s does not exist?!" % page.aslink()
            except wikipedia.IsRedirectPage:
                print "Page %s is a redirect; skipping." % page.aslink()
            except wikipedia.LockedPage:
                print "Page %s is locked?!" % page.aslink()

def main():
    #page generator
    gen = None
    redirs = False
    pageTitle = []
    for arg in wikipedia.handleArgs():
        if arg.startswith('-start:'):
            page = wikipedia.Page(wikipedia.getSite(),arg[7:])
            gen = pagegenerators.AllpagesPageGenerator(page.titleWithoutNamespace(),namespace=page.namespace())
        elif arg.startswith('-ref:'):
            referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:])
            gen = pagegenerators.ReferringPageGenerator(referredPage)
        elif arg.startswith('-links:'):
            linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:])
            gen = pagegenerators.LinkedPageGenerator(linkingPage)
        elif arg.startswith('-file:'):
            gen = pagegenerators.TextfilePageGenerator(arg[6:])
        elif arg.startswith('-cat:'):
            cat = catlib.Category(wikipedia.getSite(), arg[5:])
            gen = pagegenerators.CategorizedPageGenerator(cat)
        elif arg == '-redir':
            redirs = True
        else:
            pageTitle.append(arg)

    if pageTitle:
        page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        wikipedia.showHelp('touch')
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = ImageBot(preloadingGen, redirs)
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
