Merge "File backend support for Captchas."
authorTim Starling <tstarling@wikimedia.org>
Wed, 26 Sep 2012 06:34:05 +0000 (06:34 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 26 Sep 2012 06:34:05 +0000 (06:34 +0000)
FancyCaptcha.class.php
FancyCaptcha.php
maintenance/GenerateFancyCaptchas.php [new file with mode: 0644]

index 27b6f2850c5546e2a286f1539b998c2be6884dac..f559f0f3c9f15f70ab0c78ecbb6e671da20fe2b7 100644 (file)
@@ -1,6 +1,53 @@
 <?php
 
 class FancyCaptcha extends SimpleCaptcha {
+       /**
+        * @return FileBackend
+        */
+       public function getBackend() {
+               global $wgCaptchaFileBackend, $wgCaptchaDirectory;
+
+               if ( $wgCaptchaFileBackend ) {
+                       return FileBackendGroup::singleton()->get( $wgCaptchaFileBackend );
+               } else {
+                       static $backend = null;
+                       if ( !$backend ) {
+                               $backend = new FSFileBackend( array(
+                                       'name'           => 'captcha-backend',
+                                       'lockManager'    => 'nullLockManager',
+                                       'containerPaths' => array( 'captcha-render' => $wgCaptchaDirectory ),
+                                       'fileMode'       => 777
+                               ) );
+                       }
+                       return $backend;
+               }
+       }
+
+       /**
+        * @return integer Estimate of the number of captchas files
+        */
+       public function estimateCaptchaCount() {
+               global $wgCaptchaDirectoryLevels;
+
+               $factor = 1;
+               $sampleDir = $this->getBackend()->getRootStoragePath() . '/captcha-render';
+               if ( $wgCaptchaDirectoryLevels >= 1 ) { // 1/16 sample if 16 shards
+                       $sampleDir .= '/' . dechex( mt_rand( 0, 15 ) );
+                       $factor = 16;
+               }
+               if ( $wgCaptchaDirectoryLevels >= 3 ) { // 1/256 sample if 4096 shards
+                       $sampleDir .= '/' . dechex( mt_rand( 0, 15 ) );
+                       $factor = 256;
+               }
+
+               $count = 0;
+               foreach ( $this->getBackend()->getFileList( array( 'dir' => $sampleDir ) ) as $file ) {
+                       ++$count;
+               }
+
+               return ( $count * $factor );
+       }
+
        /**
         * Check if the submitted form matches the captcha session data provided
         * by the plugin when the form was generated.
@@ -85,99 +132,151 @@ class FancyCaptcha extends SimpleCaptcha {
 
        /**
         * Select a previously generated captcha image from the queue.
-        * @fixme subject to race conditions if lots of files vanish
         * @return mixed tuple of (salt key, text hash) or false if no image to find
         */
-       function pickImage() {
-               global $wgCaptchaDirectory, $wgCaptchaDirectoryLevels;
-               return $this->pickImageDir(
-                       $wgCaptchaDirectory,
-                       $wgCaptchaDirectoryLevels );
+       protected function pickImage() {
+               global $wgCaptchaDirectoryLevels;
+
+               $lockouts = 0; // number of times another process claimed a file before this one
+               $baseDir = $this->getBackend()->getRootStoragePath() . '/captcha-render';
+               return $this->pickImageDir( $baseDir, $wgCaptchaDirectoryLevels, $lockouts );
        }
 
-       function pickImageDir( $directory, $levels ) {
-               if ( $levels ) {
-                       $dirs = array();
+       /**
+        * @param $directory string
+        * @param $levels integer
+        * @param $lockouts integer
+        * @return Array|bool
+        */
+       protected function pickImageDir( $directory, $levels, &$lockouts ) {
+               global $wgMemc;
 
-                       // Check which subdirs are actually present...
-                       $dir = opendir( $directory );
-                       if ( !$dir ) {
-                               return false;
-                       }
-                       while ( false !== ( $entry = readdir( $dir ) ) ) {
+               if ( $levels <= 0 ) { // $directory has regular files
+                       return $this->pickImageFromDir( $directory, $lockouts );
+               }
+
+               $backend = $this->getBackend();
+
+               $key  = "fancycaptcha:dirlist:{$backend->getWikiId()}:" . sha1( $directory );
+               $dirs = $wgMemc->get( $key ); // check cache
+               if ( !is_array( $dirs ) ) { // cache miss
+                       $dirs = array(); // subdirs actually present...
+                       foreach ( $backend->getTopDirectoryList( array( 'dir' => $directory ) ) as $entry ) {
                                if ( ctype_xdigit( $entry ) && strlen( $entry ) == 1 ) {
                                        $dirs[] = $entry;
                                }
                        }
-                       closedir( $dir );
-
-                       $place = mt_rand( 0, count( $dirs ) - 1 );
-                       // In case all dirs are not filled,
-                       // cycle through next digits...
-                       for ( $j = 0; $j < count( $dirs ); $j++ ) {
-                               $char = $dirs[( $place + $j ) % count( $dirs )];
-                               $return = $this->pickImageDir( "$directory/$char", $levels - 1 );
-                               if ( $return ) {
-                                       return $return;
-                               }
-                       }
-                       // Didn't find any images in this directory... empty?
-                       return false;
-               } else {
-                       return $this->pickImageFromDir( $directory );
+                       wfDebug( "Cache miss for $directory subdirectory listing.\n" );
+                       $wgMemc->set( $key, $dirs, 86400 );
                }
-       }
 
-       function pickImageFromDir( $directory ) {
-               if ( !is_dir( $directory ) ) {
-                       return false;
+               if ( !count( $dirs ) ) {
+                       // Remove this directory if empty so callers don't keep looking here
+                       $backend->clean( array( 'dir' => $directory ) );
+                       return false; // none found
                }
-               $dirCount = $this->countFiles( $directory );
-               if ( $dirCount === 0 ) {
-                       return false;
+
+               $place = mt_rand( 0, count( $dirs ) - 1 ); // pick a random subdir
+               // In case all dirs are not filled, cycle through next digits...
+               for ( $j = 0; $j < count( $dirs ); $j++ ) {
+                       $char = $dirs[( $place + $j ) % count( $dirs )];
+                       $info = $this->pickImageDir( "$directory/$char", $levels - 1, $lockouts );
+                       if ( $info ) {
+                               return $info; // found a captcha
+                       } else {
+                               wfDebug( "Could not find captcha in $directory.\n" );
+                               $wgMemc->delete( $key ); // files changed on disk?
+                       }
                }
-               $n = mt_rand( 0, $dirCount - 1 );
-               $dir = opendir( $directory );
 
-               $count = 0;
+               return false; // didn't find any images in this directory... empty?
+       }
 
-               $entry = readdir( $dir );
-               $pick = false;
-               while ( false !== $entry ) {
-                       $entry = readdir( $dir );
-                       if ( preg_match( '/^image_([0-9a-f]+)_([0-9a-f]+)\\.png$/', $entry, $matches ) ) {
-                               $size = getimagesize( "$directory/$entry" );
-                               $pick = array(
-                                       'salt' => $matches[1],
-                                       'hash' => $matches[2],
-                                       'width' => $size[0],
-                                       'height' => $size[1],
-                                       'viewed' => false,
-                               );
-                               if ( $count++ == $n ) {
+       /**
+        * @param $directory string
+        * @param $lockouts integer
+        * @return Array|bool
+        */
+       protected function pickImageFromDir( $directory, &$lockouts ) {
+               global $wgMemc;
+
+               $backend = $this->getBackend();
+
+               $key   = "fancycaptcha:filelist:{$backend->getWikiId()}:" . sha1( $directory );
+               $files = $wgMemc->get( $key ); // check cache
+               if ( !is_array( $files ) ) { // cache miss
+                       $files = array(); // captcha files
+                       foreach ( $backend->getTopFileList( array( 'dir' => $directory ) ) as $entry ) {
+                               $files[] = $entry;
+                               if ( count( $files ) >= 500 ) { // sanity
+                                       wfDebug( 'Skipping some captchas; $wgCaptchaDirectoryLevels set too low?.' );
                                        break;
                                }
                        }
+                       $wgMemc->set( $key, $files, 86400 );
+                       wfDebug( "Cache miss for $directory captcha listing.\n" );
+               }
+
+               if ( !count( $files ) ) {
+                       // Remove this directory if empty so callers don't keep looking here
+                       $backend->clean( array( 'dir' => $directory ) );
+                       return false;
+               }
+
+               $info = $this->pickImageFromList( $directory, $files, $lockouts );
+               if ( !$info ) {
+                       wfDebug( "Could not find captcha in $directory.\n" );
+                       $wgMemc->delete( $key ); // files changed on disk?
                }
-               closedir( $dir );
-               return $pick;
+
+               return $info;
        }
 
        /**
-        * Count the number of files in a directory.
-        * @param $dirname
-        * @return int
+        * @param $directory string
+        * @param $files array
+        * @param $lockouts integer
+        * @return boolean
         */
-       function countFiles( $dirname ) {
-               $dir = opendir( $dirname );
-               $count = 0;
-               while ( false !== ( $entry = readdir( $dir ) ) ) {
-                       if ( $entry != '.' && $entry != '..' ) {
-                               $count++;
+       protected function pickImageFromList( $directory, array $files, &$lockouts ) {
+               global $wgMemc, $wgCaptchaDeleteOnSolve;
+
+               if ( !count( $files ) ) {
+                       return false; // none found
+               }
+
+               $backend  = $this->getBackend();
+               $place    = mt_rand( 0, count( $files ) - 1 ); // pick a random file
+               $misses   = 0; // number of files in listing that don't actually exist
+               for ( $j = 0; $j < count( $files ); $j++ ) {
+                       $entry = $files[( $place + $j ) % count( $files )];
+                       if ( preg_match( '/^image_([0-9a-f]+)_([0-9a-f]+)\\.png$/', $entry, $matches ) ) {
+                               if ( $wgCaptchaDeleteOnSolve ) { // captcha will be deleted when solved
+                                       $key = "fancycaptcha:filelock:{$backend->getWikiId()}:" . sha1( $entry );
+                                       // Try to claim this captcha for 10 minutes (for the user to solve)...
+                                       if ( ++$lockouts <= 10 && !$wgMemc->add( $key, '1', 600 ) ) {
+                                               continue; // could not acquire (skip it to avoid race conditions)
+                                       }
+                               }
+                               $fsFile = $backend->getLocalReference( array( 'src' => "$directory/$entry" ) );
+                               if ( !$fsFile || !$fsFile->exists() ) {
+                                       if ( ++$misses >= 5 ) { // too many files in the listing don't exist
+                                               break; // listing cache too stale? break out so it will be cleared
+                                       }
+                                       continue; // try next file
+                               }
+                               $size = getimagesize( $fsFile->getPath() );
+                               return array(
+                                       'salt'   => $matches[1],
+                                       'hash'   => $matches[2],
+                                       'width'  => $size[0],
+                                       'height' => $size[1],
+                                       'viewed' => false,
+                               );
                        }
                }
-               closedir( $dir );
-               return $count;
+
+               return false; // none found
        }
 
        function showImage() {
@@ -193,32 +292,47 @@ class FancyCaptcha extends SimpleCaptcha {
 
                        $salt = $info['salt'];
                        $hash = $info['hash'];
-                       $file = $this->imagePath( $salt, $hash );
-
-                       if ( file_exists( $file ) ) {
-                               global $IP;
-                               require_once "$IP/includes/StreamFile.php";
-                               header( "Cache-Control: private, s-maxage=0, max-age=3600" );
-                               StreamFile::stream( $file );
-                               return true;
-                       }
+
+                       return $this->getBackend()->streamFile( array(
+                               'src'     => $this->imagePath( $salt, $hash ),
+                               'headers' => array( "Cache-Control: private, s-maxage=0, max-age=3600" )
+                       ) )->isOK();
                }
+
                wfHttpError( 500, 'Internal Error', 'Requested bogus captcha image' );
                return false;
        }
 
-       function imagePath( $salt, $hash ) {
-               global $wgCaptchaDirectory, $wgCaptchaDirectoryLevels;
-               $file = $wgCaptchaDirectory;
-               $file .= DIRECTORY_SEPARATOR;
+       /**
+        * @param $salt string
+        * @param $hash string
+        * @return string
+        */
+       public function imagePath( $salt, $hash ) {
+               global $wgCaptchaDirectoryLevels;
+
+               $file = $this->getBackend()->getRootStoragePath() . '/captcha-render/';
                for ( $i = 0; $i < $wgCaptchaDirectoryLevels; $i++ ) {
-                       $file .= $hash { $i } ;
-                       $file .= DIRECTORY_SEPARATOR;
+                       $file .= $hash{ $i } . '/';
                }
                $file .= "image_{$salt}_{$hash}.png";
+
                return $file;
        }
 
+       /**
+        * @param $basename string
+        * @return Array (salt, hash)
+        * @throws MWException
+        */
+       public function hashFromImageName( $basename ) {
+               if ( preg_match( '/^image_([0-9a-f]+)_([0-9a-f]+)\\.png$/', $basename, $matches ) ) {
+                       return array( $matches[1], $matches[2] );
+               } else {
+                       throw new MWException( "Invalid filename '$basename'.\n" );
+               }
+       }
+
        /**
         * Show a message asking the user to enter a captcha on edit
         * The result will be treated as wiki text
@@ -231,7 +345,8 @@ class FancyCaptcha extends SimpleCaptcha {
                $text = wfMessage( $name )->text();
                # Obtain a more tailored message, if possible, otherwise, fall back to
                # the default for edits
-               return wfMessage( $name, $text )->isDisabled() ? wfMessage( 'fancycaptcha-edit' )->text() : $text;
+               return wfMessage( $name, $text )->isDisabled() ?
+                       wfMessage( 'fancycaptcha-edit' )->text() : $text;
        }
 
        /**
@@ -244,10 +359,9 @@ class FancyCaptcha extends SimpleCaptcha {
                $pass = parent::passCaptcha();
 
                if ( $pass && $wgCaptchaDeleteOnSolve ) {
-                       $filename = $this->imagePath( $info['salt'], $info['hash'] );
-                       if ( file_exists( $filename ) ) {
-                               unlink( $filename );
-                       }
+                       $this->getBackend()->quickDelete( array(
+                               'src' => $this->imagePath( $info['salt'], $info['hash'] )
+                       ) );
                }
 
                return $pass;
index 6de858a3f32c7e493eeaa8fe6e888dd74271c1a6..69e3be01235a990d69d309b65dfbdb8ad8383f45 100644 (file)
@@ -33,6 +33,12 @@ $dir = __DIR__;
 require_once $dir . '/ConfirmEdit.php';
 $wgCaptchaClass = 'FancyCaptcha';
 
+/**
+ * The name of a file backend ($wgFileBackends) to be used for storing files.
+ * Defaults to FSFileBackend using $wgCaptchaDirectory as a base path.
+ */
+$wgCaptchaFileBackend = '';
+
 global $wgCaptchaDirectory;
 $wgCaptchaDirectory = "$wgUploadDirectory/captcha"; // bad default :D
 
diff --git a/maintenance/GenerateFancyCaptchas.php b/maintenance/GenerateFancyCaptchas.php
new file mode 100644 (file)
index 0000000..e562075
--- /dev/null
@@ -0,0 +1,128 @@
+<?php
+/**
+ * Generate captchas using a python script and copy them into storage.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ * @ingroup Maintenance
+ */
+if ( getenv( 'MW_INSTALL_PATH' ) ) {
+       $IP = getenv( 'MW_INSTALL_PATH' );
+} else {
+       $IP = dirname(__FILE__).'/../../..';
+}
+
+require_once( "$IP/maintenance/Maintenance.php" );
+
+/**
+ * Maintenance script to change the password of a given user.
+ *
+ * @ingroup Maintenance
+ */
+class GenerateFancyCaptchas extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               // See captcha.py for argument usage
+               $this->addOption( "wordlist", 'A list of words', true, true );
+               $this->addOption( "font", "The font to use", true, true );
+               $this->addOption( "font-size", "The font size ", false, true );
+               $this->addOption( "blacklist", "A blacklist of words that should not be used", false, true );
+               $this->addOption( "fill", "Fill the captcha container to N files", true, true );
+               $this->addOption( "verbose", "Show debugging information" );
+               $this->mDescription = "Generate new captchas and move them into storage";
+       }
+
+       public function execute() {
+               global $wgCaptchaSecret, $wgCaptchaDirectoryLevels;
+
+               $instance = ConfirmEditHooks::getInstance();
+               if ( !( $instance instanceof FancyCaptcha ) ) {
+                       $this->error( "\$wgCaptchaClass is not FancyCaptcha.\n", 1 );
+               }
+               $backend = $instance->getBackend();
+
+               $countAct = $instance->estimateCaptchaCount();
+               $this->output( "Estimated number of captchas is $countAct.\n" );
+
+               $countGen = (int)$this->getOption( 'fill' ) - $countAct;
+               if ( $countGen <= 0 ) {
+                       $this->output( "No need to generate anymore captchas.\n" );
+                       return;
+               }
+
+               $tmpDir = wfTempDir() . '/mw-fancycaptcha-' . time() . '-' . wfRandomString( 6 );
+               if ( !wfMkdirParents( $tmpDir ) ) {
+                       $this->error( "Could not create temp directory.\n", 1 );
+               }
+
+               $e = null; // exception
+               try {
+                       $cmd = sprintf( "python %s --key %s --output %s --count %s --dirs %s",
+                               wfEscapeShellArg( __DIR__ . '/../captcha.py' ),
+                               wfEscapeShellArg( $wgCaptchaSecret ),
+                               wfEscapeShellArg( $tmpDir ),
+                               wfEscapeShellArg( $countGen ),
+                               wfEscapeShellArg( $wgCaptchaDirectoryLevels )
+                       );
+                       foreach ( array( 'wordlist', 'font', 'font-size', 'blacklist', 'verbose' ) as $par ) {
+                               if ( $this->hasOption( $par ) ) {
+                                       $cmd .= " --$par " . wfEscapeShellArg( $this->getOption( $par ) );
+                               }
+                       }
+
+                       $this->output( "Generating $countGen new captchas...\n" );
+                       $retVal = 1;
+                       wfShellExec( $cmd, $retVal );
+                       if ( $retVal != 0 ) {
+                               wfRecursiveRemoveDir( $tmpDir );
+                               $this->error( "Could not run generation script.\n", 1 );
+                       }
+
+                       $flags = FilesystemIterator::SKIP_DOTS;
+                       $iter = new RecursiveIteratorIterator(
+                               new RecursiveDirectoryIterator( $tmpDir, $flags ),
+                               RecursiveIteratorIterator::CHILD_FIRST // include dirs
+                       );
+
+                       $this->output( "Copying the new captchas to storage...\n" );
+                       foreach ( $iter as $fileInfo ) {
+                               if ( !$fileInfo->isFile() ) {
+                                       continue;
+                               }
+                               list( $salt, $hash ) = $instance->hashFromImageName( $fileInfo->getBasename() );
+                               $status = $backend->quickStore( array(
+                                       'src' => $fileInfo->getPathname(),
+                                       'dst' => $instance->imagePath( $salt, $hash )
+                               ) );
+                               if ( !$status->isOK() ) {
+                                       $this->error( "Could not save file '{$fileInfo->getPathname()}'.\n" );
+                               }
+                       }
+               } catch ( Exception $e ) {
+                       wfRecursiveRemoveDir( $tmpDir );
+                       throw $e;
+               }
+
+               $this->output( "Removing temporary files...\n" );
+               wfRecursiveRemoveDir( $tmpDir );
+               $this->output( "Done.\n" );
+       }
+}
+
+$maintClass = "GenerateFancyCaptchas";
+require_once( RUN_MAINTENANCE_IF_MAIN );