Provide parameters for setting the min/max length of the captcha "word" (challenge).
authorPlatonides <platonides@gmail.com>
Fri, 27 Jul 2012 23:47:21 +0000 (01:47 +0200)
committerGerrit Code Review <gerrit@wikimedia.org>
Thu, 1 Nov 2012 15:48:18 +0000 (15:48 +0000)
Change-Id: Ic2968cec884534dfa8ae7479589a1622a4db7de0

captcha.py

index f1a050113461b677cf0f7c2fe38bac3f6eab3f1e..47456a65b016b5138bb3ec878c416885eed358b2 100644 (file)
@@ -124,12 +124,23 @@ def gen_subdir(basedir, md5hash, levels):
                        os.mkdir(fulldir)
        return subdir
 
-def try_pick_word(words, blacklist, verbose):
+def try_pick_word(words, blacklist, verbose, min_length, max_length):
        word1 = words[random.randint(0,len(words)-1)]
        word2 = words[random.randint(0,len(words)-1)]
        word = word1+word2
        if verbose:
                print "word is %s" % word
+
+       if len(word) < min_length:
+               if verbose:
+                       print "skipping word pair '%s' because it has fewer than %d characters" % (word, min_length)
+               return None
+
+       if max_length > 0 and len(word) > max_length:
+               if verbose:
+                       print "skipping word pair '%s' because it has more than %d characters" % (word, max_length)
+               return None
+
        if nonalpha.search(word):
                if verbose:
                        print "skipping word pair '%s' because it contains non-alphabetic characters" % word
@@ -142,9 +153,9 @@ def try_pick_word(words, blacklist, verbose):
                        return None
        return word
 
-def pick_word(words, blacklist, verbose):
+def pick_word(words, blacklist, verbose, min_length, max_length):
        for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
-               word = try_pick_word(words, blacklist, verbose)
+               word = try_pick_word(words, blacklist, verbose, min_length, max_length)
                if word:
                        return word
        sys.exit("Unable to find valid word combinations")
@@ -170,7 +181,9 @@ if __name__ == '__main__':
        parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE")
        parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
        parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
-       parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')    
+       parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
+       parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
+       parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
        
        opts, args = parser.parse_args()
 
@@ -197,7 +210,7 @@ if __name__ == '__main__':
        dirs = opts.dirs
        verbose = opts.verbose
        fontsize = opts.font_size
-       
+
        if fill:
                count = max(0, fill - len(os.listdir(output)))
        
@@ -212,7 +225,7 @@ if __name__ == '__main__':
                blacklist = []
        
        for i in range(count):
-               word = pick_word(words, blacklist, verbose)
+               word = pick_word(words, blacklist, verbose, opts.min_length, opts.max_length)
                salt = "%08x" % random.randrange(2**32)
                # 64 bits of hash is plenty for this purpose
                md5hash = hashlib.md5(key+salt+word+key+salt).hexdigest()[:16]