X-Git-Url: https://git.toastfreeware.priv.at/toast/cookiecaptcha.git/blobdiff_plain/cf1c61a3bd7805fe0fa74acb7e11cea07ca35686..6207682c65e21b25ca3ca186c8944a2340da2cae:/captcha.py diff --git a/captcha.py b/captcha.py index e4dbc17..effdd36 100644 --- a/captcha.py +++ b/captcha.py @@ -21,6 +21,9 @@ # # Further tweaks by Brion Vibber : # 2006-01-26: Add command-line options for the various parameters +# 2007-02-19: Add --dirs param for hash subdirectory splits +# Tweaks by Greg Sabino Mullane : +# 2008-01-06: Add regex check to skip words containing other than a-z import random import Image @@ -32,6 +35,7 @@ import math, string, md5 import getopt import os import sys +import re # Does X-axis wobbly copy, sandwiched between two rotates def wobbly_copy(src, wob, col, scale, ang): @@ -98,6 +102,49 @@ def gen_captcha(text, fontname, fontsize, file_name): # save the image, in format determined from filename im.save(file_name) +def gen_subdir(basedir, hash, levels): + """Generate a subdirectory path out of the first _levels_ + characters of _hash_, and ensure the directories exist + under _basedir_.""" + subdir = None + for i in range(0, levels): + char = hash[i] + if subdir: + subdir = os.path.join(subdir, char) + else: + subdir = char + fulldir = os.path.join(basedir, subdir) + if not os.path.exists(fulldir): + os.mkdir(fulldir) + return subdir + +def try_pick_word(words, blacklist, verbose): + word1 = words[random.randint(0,len(words)-1)] + word2 = words[random.randint(0,len(words)-1)] + word = word1+word2 + if verbose: + print "word is %s" % word + r = re.compile('[^a-z]'); + if r.search(word): + print "skipping word pair '%s' because it contains non-alphabetic characters" % word + return None + + for naughty in blacklist: + if naughty in word: + if verbose: + print "skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty) + return None + return word + +def pick_word(words, blacklist, verbose): + while True: + word = try_pick_word(words, blacklist, verbose) + if word: + return word + +def read_wordlist(filename): + return [string.lower(x.strip()) for x in open(wordlist).readlines()] + if __name__ == '__main__': """This grabs random words from the dictionary 'words' (one word per line) and generates a captcha image for each one, @@ -108,18 +155,22 @@ if __name__ == '__main__': """ font = "VeraBd.ttf" wordlist = "awordlist.txt" + blacklistfile = None key = "CHANGE_THIS_SECRET!" output = "." count = 20 fill = 0 + dirs = 0 verbose = False - opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "verbose"]) + opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"]) for o, a in opts: if o == "--font": font = a if o == "--wordlist": wordlist = a + if o == "--blacklist": + blacklistfile = a if o == "--key": key = a if o == "--output": @@ -128,6 +179,8 @@ if __name__ == '__main__': count = int(a) if o == "--fill": fill = int(a) + if o == "--dirs": + dirs = int(a) if o == "--verbose": verbose = True @@ -136,19 +189,26 @@ if __name__ == '__main__': # files after... count = max(0, fill - len(os.listdir(output))) - words = [string.lower(x.strip()) for x in open(wordlist).readlines()] + words = read_wordlist(wordlist) words = [x for x in words if len(x) <= 5 and len(x) >= 4 and x[0] != "f" and x[0] != x[1] and x[-1] != x[-2] and (not "'" in x)] + + if blacklistfile: + blacklist = read_wordlist(blacklistfile) + else: + blacklist = [] + for i in range(count): - word1 = words[random.randint(0,len(words)-1)] - word2 = words[random.randint(0,len(words)-1)] - word = word1+word2 + word = pick_word(words, blacklist, verbose) salt = "%08x" % random.randrange(2**32) # 64 bits of hash is plenty for this purpose hash = md5.new(key+salt+word+key+salt).hexdigest()[:16] filename = "image_%s_%s.png" % (salt, hash) + if dirs: + subdir = gen_subdir(output, hash, dirs) + filename = os.path.join(subdir, filename) if verbose: print filename gen_captcha(word, font, 40, os.path.join(output, filename))