#
# Further tweaks by Brion Vibber <brion@pobox.com>:
# 2006-01-26: Add command-line options for the various parameters
+# 2007-02-19: Add --dirs param for hash subdirectory splits
+# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
+# 2008-01-06: Add regex check to skip words containing other than a-z
import random
import Image
import getopt
import os
import sys
+import re
# Does X-axis wobbly copy, sandwiched between two rotates
def wobbly_copy(src, wob, col, scale, ang):
# save the image, in format determined from filename
im.save(file_name)
+def gen_subdir(basedir, hash, levels):
+ """Generate a subdirectory path out of the first _levels_
+ characters of _hash_, and ensure the directories exist
+ under _basedir_."""
+ subdir = None
+ for i in range(0, levels):
+ char = hash[i]
+ if subdir:
+ subdir = os.path.join(subdir, char)
+ else:
+ subdir = char
+ fulldir = os.path.join(basedir, subdir)
+ if not os.path.exists(fulldir):
+ os.mkdir(fulldir)
+ return subdir
+
+def try_pick_word(words, blacklist, verbose):
+ word1 = words[random.randint(0,len(words)-1)]
+ word2 = words[random.randint(0,len(words)-1)]
+ word = word1+word2
+ if verbose:
+ print "word is %s" % word
+ r = re.compile('[^a-z]');
+ if r.search(word):
+ print "skipping word pair '%s' because it contains non-alphabetic characters" % word
+ return None
+
+ for naughty in blacklist:
+ if naughty in word:
+ if verbose:
+ print "skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty)
+ return None
+ return word
+
+def pick_word(words, blacklist, verbose):
+ while True:
+ word = try_pick_word(words, blacklist, verbose)
+ if word:
+ return word
+
+def read_wordlist(filename):
+ return [string.lower(x.strip()) for x in open(wordlist).readlines()]
+
if __name__ == '__main__':
"""This grabs random words from the dictionary 'words' (one
word per line) and generates a captcha image for each one,
"""
font = "VeraBd.ttf"
wordlist = "awordlist.txt"
+ blacklistfile = None
key = "CHANGE_THIS_SECRET!"
output = "."
count = 20
fill = 0
+ dirs = 0
verbose = False
- opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "verbose"])
+ opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"])
for o, a in opts:
if o == "--font":
font = a
if o == "--wordlist":
wordlist = a
+ if o == "--blacklist":
+ blacklistfile = a
if o == "--key":
key = a
if o == "--output":
count = int(a)
if o == "--fill":
fill = int(a)
+ if o == "--dirs":
+ dirs = int(a)
if o == "--verbose":
verbose = True
# files after...
count = max(0, fill - len(os.listdir(output)))
- words = [string.lower(x.strip()) for x in open(wordlist).readlines()]
+ words = read_wordlist(wordlist)
words = [x for x in words
if len(x) <= 5 and len(x) >= 4 and x[0] != "f"
and x[0] != x[1] and x[-1] != x[-2]
and (not "'" in x)]
+
+ if blacklistfile:
+ blacklist = read_wordlist(blacklistfile)
+ else:
+ blacklist = []
+
for i in range(count):
- word1 = words[random.randint(0,len(words)-1)]
- word2 = words[random.randint(0,len(words)-1)]
- word = word1+word2
+ word = pick_word(words, blacklist, verbose)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
hash = md5.new(key+salt+word+key+salt).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, hash)
+ if dirs:
+ subdir = gen_subdir(output, hash, dirs)
+ filename = os.path.join(subdir, filename)
if verbose:
print filename
gen_captcha(word, font, 40, os.path.join(output, filename))