Localisation updates for extension messages from Betawiki (2008-01-10 18:51 CET)
[toast/cookiecaptcha.git] / captcha.py
index e4dbc17fe3d815d586fa13db56fc01e2355ab4fd..effdd3681737dd5e18e56147ecd9b7343ee21ced 100644 (file)
@@ -21,6 +21,9 @@
 #
 # Further tweaks by Brion Vibber <brion@pobox.com>:
 # 2006-01-26: Add command-line options for the various parameters
+# 2007-02-19: Add --dirs param for hash subdirectory splits
+# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
+# 2008-01-06: Add regex check to skip words containing other than a-z
 
 import random
 import Image
@@ -32,6 +35,7 @@ import math, string, md5
 import getopt
 import os
 import sys
+import re
 
 # Does X-axis wobbly copy, sandwiched between two rotates
 def wobbly_copy(src, wob, col, scale, ang):
@@ -98,6 +102,49 @@ def gen_captcha(text, fontname, fontsize, file_name):
        # save the image, in format determined from filename
        im.save(file_name)
 
+def gen_subdir(basedir, hash, levels):
+       """Generate a subdirectory path out of the first _levels_
+       characters of _hash_, and ensure the directories exist
+       under _basedir_."""
+       subdir = None
+       for i in range(0, levels):
+               char = hash[i]
+               if subdir:
+                       subdir = os.path.join(subdir, char)
+               else:
+                       subdir = char
+               fulldir = os.path.join(basedir, subdir)
+               if not os.path.exists(fulldir):
+                       os.mkdir(fulldir)
+       return subdir
+
+def try_pick_word(words, blacklist, verbose):
+       word1 = words[random.randint(0,len(words)-1)]
+       word2 = words[random.randint(0,len(words)-1)]
+       word = word1+word2
+       if verbose:
+               print "word is %s" % word
+       r = re.compile('[^a-z]');
+       if r.search(word):
+               print "skipping word pair '%s' because it contains non-alphabetic characters" % word
+               return None
+
+       for naughty in blacklist:
+               if naughty in word:
+                       if verbose:
+                               print "skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty)
+                       return None
+       return word
+
+def pick_word(words, blacklist, verbose):
+       while True:
+               word = try_pick_word(words, blacklist, verbose)
+               if word:
+                       return word
+
+def read_wordlist(filename):
+       return [string.lower(x.strip()) for x in open(wordlist).readlines()]
+
 if __name__ == '__main__':
        """This grabs random words from the dictionary 'words' (one
        word per line) and generates a captcha image for each one,
@@ -108,18 +155,22 @@ if __name__ == '__main__':
        """
        font = "VeraBd.ttf"
        wordlist = "awordlist.txt"
+       blacklistfile = None
        key = "CHANGE_THIS_SECRET!"
        output = "."
        count = 20
        fill = 0
+       dirs = 0
        verbose = False
        
-       opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "verbose"])
+       opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"])
        for o, a in opts:
                if o == "--font":
                        font = a
                if o == "--wordlist":
                        wordlist = a
+               if o == "--blacklist":
+                       blacklistfile = a
                if o == "--key":
                        key = a
                if o == "--output":
@@ -128,6 +179,8 @@ if __name__ == '__main__':
                        count = int(a)
                if o == "--fill":
                        fill = int(a)
+               if o == "--dirs":
+                       dirs = int(a)
                if o == "--verbose":
                        verbose = True
        
@@ -136,19 +189,26 @@ if __name__ == '__main__':
                # files after...
                count = max(0, fill - len(os.listdir(output)))
        
-       words = [string.lower(x.strip()) for x in open(wordlist).readlines()]
+       words = read_wordlist(wordlist)
        words = [x for x in words
                if len(x) <= 5 and len(x) >= 4 and x[0] != "f"
                and x[0] != x[1] and x[-1] != x[-2]
                and (not "'" in x)]
+       
+       if blacklistfile:
+               blacklist = read_wordlist(blacklistfile)
+       else:
+               blacklist = []
+       
        for i in range(count):
-               word1 = words[random.randint(0,len(words)-1)]
-               word2 = words[random.randint(0,len(words)-1)]
-               word = word1+word2
+               word = pick_word(words, blacklist, verbose)
                salt = "%08x" % random.randrange(2**32)
                # 64 bits of hash is plenty for this purpose
                hash = md5.new(key+salt+word+key+salt).hexdigest()[:16]
                filename = "image_%s_%s.png" % (salt, hash)
+               if dirs:
+                       subdir = gen_subdir(output, hash, dirs)
+                       filename = os.path.join(subdir, filename)
                if verbose:
                        print filename
                gen_captcha(word, font, 40, os.path.join(output, filename))