Support precomputed data in GitInfo
authorBryan Davis <bd808@wikimedia.org>
Mon, 28 Apr 2014 19:50:03 +0000 (13:50 -0600)
committerOri.livneh <ori@wikimedia.org>
Mon, 5 May 2014 23:50:12 +0000 (23:50 +0000)
Support reading git repository information from a JSON file in the cache
directory. When present, this file serves to provide information needed
by getHead, getHeadSHA1, getHeadCommitDate, getCurrentBranch and a new
getRemoteUrl method. A GitInfo::precomputeValues method is also provided
which can generate the cache file for a given GitInfo instance.

This support can be combined with a deployment step to reduce the need
to repeatedly gather information from the .git files/git binary. It also
allows computing information that can be lost when directory structures
differ between deployment staging hosts and hosts running MediaWiki.

This change also adds memoization of computed values for a given GitInfo
instance which may provide a small performance boost even for
deployments which are not using precomputed cache files.

Bug: 53972
Change-Id: I66e058acc5a71e5d82644f85d819f49d6ee9d1e6

includes/GitInfo.php
tests/phpunit/data/gitinfo/info-testValidJsonData.json [new file with mode: 0644]
tests/phpunit/includes/GitInfoTest.php [new file with mode: 0644]

index 6b092d9..dc2fff1 100644 (file)
@@ -35,33 +35,91 @@ class GitInfo {
         */
        protected $basedir;
 
+       /**
+        * Path to JSON cache file for pre-computed git information.
+        */
+       protected $cacheFile;
+
+       /**
+        * Cached git information.
+        */
+       protected $cache = array();
+
        /**
         * Map of repo URLs to viewer URLs. Access via static method getViewers().
         */
        private static $viewers = false;
 
        /**
-        * @param string $dir The root directory of the repo where the .git dir can be found
+        * @param string $repoDir The root directory of the repo where .git can be found
+        * @param bool $usePrecomputed Use precomputed information if available
+        * @see precomputeValues
+        */
+       public function __construct( $repoDir, $usePrecomputed = true ) {
+               $this->cacheFile = self::getCacheFilePath( $repoDir );
+               if ( $usePrecomputed &&
+                       $this->cacheFile !== null &&
+                       is_readable( $this->cacheFile )
+               ) {
+                       $this->cache = FormatJson::decode(
+                               file_get_contents( $this->cacheFile ),
+                               true
+                       );
+               }
+
+               if ( !$this->cacheIsComplete() ) {
+                       $this->basedir = $repoDir . DIRECTORY_SEPARATOR . '.git';
+                       if ( is_readable( $this->basedir ) && !is_dir( $this->basedir ) ) {
+                               $GITfile = file_get_contents( $this->basedir );
+                               if ( strlen( $GITfile ) > 8 &&
+                                       substr( $GITfile, 0, 8 ) === 'gitdir: '
+                               ) {
+                                       $path = rtrim( substr( $GITfile, 8 ), "\r\n" );
+                                       if ( $path[0] === '/' || substr( $path, 1, 1 ) === ':' ) {
+                                               // Path from GITfile is absolute
+                                               $this->basedir = $path;
+                                       } else {
+                                               $this->basedir = $repoDir . DIRECTORY_SEPARATOR . $path;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Compute the path to the cache file for a given directory.
+        *
+        * @param string $repoDir The root directory of the repo where .git can be found
+        * @return string Path to GitInfo cache file in $wgCacheDirectory or null if
+        * $wgCacheDirectory is false (cache disabled).
         */
-       public function __construct( $dir ) {
-               $this->basedir = $dir . DIRECTORY_SEPARATOR . '.git';
-               if ( is_readable( $this->basedir ) && !is_dir( $this->basedir ) ) {
-                       $GITfile = file_get_contents( $this->basedir );
-                       if ( strlen( $GITfile ) > 8 && substr( $GITfile, 0, 8 ) === 'gitdir: ' ) {
-                               $path = rtrim( substr( $GITfile, 8 ), "\r\n" );
-                               $isAbsolute = $path[0] === '/' || substr( $path, 1, 1 ) === ':';
-                               $this->basedir = $isAbsolute ? $path : $dir . DIRECTORY_SEPARATOR . $path;
+       protected static function getCacheFilePath( $repoDir ) {
+               global $IP, $wgCacheDirectory;
+               if ( $wgCacheDirectory ) {
+                       // Transform path to git repo to something we can safely embed in a filename
+                       $repoName = $repoDir;
+                       if ( strpos( $repoName, $IP ) === 0 ) {
+                               // Strip $IP from path
+                               $repoName = substr( $repoName, strlen( $IP ) );
                        }
+                       $repoName = strtr( $repoName, DIRECTORY_SEPARATOR, '-' );
+                       $fileName = 'info' . $repoName . '.json';
+                       return implode(
+                               DIRECTORY_SEPARATOR,
+                               array( $wgCacheDirectory, 'gitinfo', $fileName )
+                       );
                }
+               return null;
        }
 
        /**
-        * Return a singleton for the repo at $IP
+        * Get the singleton for the repo at $IP
+        *
         * @return GitInfo
         */
        public static function repo() {
-               global $IP;
                if ( is_null( self::$repo ) ) {
+                       global $IP;
                        self::$repo = new self( $IP );
                }
                return self::$repo;
@@ -78,50 +136,56 @@ class GitInfo {
        }
 
        /**
-        * Return the HEAD of the repo (without any opening "ref: ")
-        * @return string The HEAD
+        * Get the HEAD of the repo (without any opening "ref: ")
+        *
+        * @return string|bool The HEAD (git reference or SHA1) or false
         */
        public function getHead() {
-               $headFile = "{$this->basedir}/HEAD";
+               if ( !isset( $this->cache['head'] ) ) {
+                       $headFile = "{$this->basedir}/HEAD";
+                       $head = false;
 
-               if ( !is_readable( $headFile ) ) {
-                       return false;
-               }
+                       if ( is_readable( $headFile ) ) {
+                               $head = file_get_contents( $headFile );
 
-               $head = file_get_contents( $headFile );
-
-               if ( preg_match( "/ref: (.*)/", $head, $m ) ) {
-                       return rtrim( $m[1] );
-               } else {
-                       return rtrim( $head );
+                               if ( preg_match( "/ref: (.*)/", $head, $m ) ) {
+                                       $head = rtrim( $m[1] );
+                               } else {
+                                       $head = rtrim( $head );
+                               }
+                       }
+                       $this->cache['head'] = $head;
                }
+               return $this->cache['head'];
        }
 
        /**
-        * Return the SHA1 for the current HEAD of the repo
-        * @return string A SHA1 or false
+        * Get the SHA1 for the current HEAD of the repo
+        *
+        * @return string|bool A SHA1 or false
         */
        public function getHeadSHA1() {
-               $head = $this->getHead();
-
-               // If detached HEAD may be a SHA1
-               if ( self::isSHA1( $head ) ) {
-                       return $head;
-               }
-
-               // If not a SHA1 it may be a ref:
-               $refFile = "{$this->basedir}/{$head}";
-               if ( !is_readable( $refFile ) ) {
-                       return false;
+               if ( !isset( $this->cache['headSHA1'] ) ) {
+                       $head = $this->getHead();
+                       $sha1 = false;
+
+                       // If detached HEAD may be a SHA1
+                       if ( self::isSHA1( $head ) ) {
+                               $sha1 = $head;
+                       } else {
+                               // If not a SHA1 it may be a ref:
+                               $refFile = "{$this->basedir}/{$head}";
+                               if ( is_readable( $refFile ) ) {
+                                       $sha1 = rtrim( file_get_contents( $refFile ) );
+                               }
+                       }
+                       $this->cache['headSHA1'] = $sha1;
                }
-
-               $sha1 = rtrim( file_get_contents( $refFile ) );
-
-               return $sha1;
+               return $this->cache['headSHA1'];
        }
 
        /**
-        * Return the commit date of HEAD entry of the git code repository
+        * Get the commit date of HEAD entry of the git code repository
         *
         * @since 1.22
         * @return int|bool Commit date (UNIX timestamp) or false
@@ -129,67 +193,51 @@ class GitInfo {
        public function getHeadCommitDate() {
                global $wgGitBin;
 
-               if ( !is_file( $wgGitBin ) || !is_executable( $wgGitBin ) ) {
-                       return false;
-               }
-
-               $environment = array( "GIT_DIR" => $this->basedir );
-               $cmd = wfEscapeShellArg( $wgGitBin ) . " show -s --format=format:%ct HEAD";
-               $retc = false;
-               $commitDate = wfShellExec( $cmd, $retc, $environment );
-
-               if ( $retc !== 0 ) {
-                       return false;
-               } else {
-                       return (int)$commitDate;
+               if ( !isset( $this->cache['headCommitDate'] ) ) {
+                       $date = false;
+                       if ( is_file( $wgGitBin ) && is_executable( $wgGitBin ) ) {
+                               $environment = array( "GIT_DIR" => $this->basedir );
+                               $cmd = wfEscapeShellArg( $wgGitBin ) .
+                                       " show -s --format=format:%ct HEAD";
+                               $retc = false;
+                               $commitDate = wfShellExec( $cmd, $retc, $environment );
+                               if ( $retc === 0 ) {
+                                       $date = (int)$commitDate;
+                               }
+                       }
+                       $this->cache['headCommitDate'] = $date;
                }
+               return $this->cache['headCommitDate'];
        }
 
        /**
-        * Return the name of the current branch, or HEAD if not found
-        * @return string The branch name, HEAD, or false
+        * Get the name of the current branch, or HEAD if not found
+        *
+        * @return string|bool The branch name, HEAD, or false
         */
        public function getCurrentBranch() {
-               $head = $this->getHead();
-               if ( $head && preg_match( "#^refs/heads/(.*)$#", $head, $m ) ) {
-                       return $m[1];
-               } else {
-                       return $head;
+               if ( !isset( $this->cache['branch'] ) ) {
+                       $branch = $this->getHead();
+                       if ( $branch &&
+                               preg_match( "#^refs/heads/(.*)$#", $branch, $m )
+                       ) {
+                               $branch = $m[1];
+                       }
+                       $this->cache['branch'] = $branch;
                }
+               return $this->cache['branch'];
        }
 
        /**
         * Get an URL to a web viewer link to the HEAD revision.
         *
-        * @return string|bool string if a URL is available or false otherwise.
+        * @return string|bool String if a URL is available or false otherwise
         */
        public function getHeadViewUrl() {
-               $config = "{$this->basedir}/config";
-               if ( !is_readable( $config ) ) {
-                       return false;
-               }
-
-               wfSuppressWarnings();
-               $configArray = parse_ini_file( $config, true );
-               wfRestoreWarnings();
-               $remote = false;
-
-               // Use the "origin" remote repo if available or any other repo if not.
-               if ( isset( $configArray['remote origin'] ) ) {
-                       $remote = $configArray['remote origin'];
-               } elseif ( is_array( $configArray ) ) {
-                       foreach ( $configArray as $sectionName => $sectionConf ) {
-                               if ( substr( $sectionName, 0, 6 ) == 'remote' ) {
-                                       $remote = $sectionConf;
-                               }
-                       }
-               }
-
-               if ( $remote === false || !isset( $remote['url'] ) ) {
+               $url = $this->getRemoteUrl();
+               if ( $url === false ) {
                        return false;
                }
-
-               $url = $remote['url'];
                if ( substr( $url, -4 ) !== '.git' ) {
                        $url .= '.git';
                }
@@ -209,6 +257,91 @@ class GitInfo {
                return false;
        }
 
+       /**
+        * Get the URL of the remote origin.
+        * @return string|bool string if a URL is available or false otherwise.
+        */
+       protected function getRemoteUrl() {
+               if ( !isset( $this->cache['remoteURL'] ) ) {
+                       $config = "{$this->basedir}/config";
+                       $url = false;
+                       if ( is_readable( $config ) ) {
+                               wfSuppressWarnings();
+                               $configArray = parse_ini_file( $config, true );
+                               wfRestoreWarnings();
+                               $remote = false;
+
+                               // Use the "origin" remote repo if available or any other repo if not.
+                               if ( isset( $configArray['remote origin'] ) ) {
+                                       $remote = $configArray['remote origin'];
+                               } elseif ( is_array( $configArray ) ) {
+                                       foreach ( $configArray as $sectionName => $sectionConf ) {
+                                               if ( substr( $sectionName, 0, 6 ) == 'remote' ) {
+                                                       $remote = $sectionConf;
+                                               }
+                                       }
+                               }
+
+                               if ( $remote !== false && isset( $remote['url'] ) ) {
+                                       $url = $remote['url'];
+                               }
+                       }
+                       $this->cache['remoteURL'] = $url;
+               }
+               return $this->cache['remoteURL'];
+       }
+
+       /**
+        * Check to see if the current cache is fully populated.
+        *
+        * Note: This method is public only to make unit testing easier. There's
+        * really no strong reason that anything other than a test should want to
+        * call this method.
+        *
+        * @return bool True if all expected cache keys exist, false otherwise
+        */
+       public function cacheIsComplete() {
+               return isset( $this->cache['head'] ) &&
+                       isset( $this->cache['headSHA1'] ) &&
+                       isset( $this->cache['headCommitDate'] ) &&
+                       isset( $this->cache['branch'] ) &&
+                       isset( $this->cache['remoteURL'] );
+       }
+
+       /**
+        * Precompute and cache git information.
+        *
+        * Creates a JSON file in the cache directory associated with this
+        * GitInfo instance. This cache file will be used by subsequent GitInfo objects referencing
+        * the same directory to avoid needing to examine the .git directory again.
+        *
+        * @since 1.24
+        */
+       public function precomputeValues() {
+               if ( $this->cacheFile !== null ) {
+                       // Try to completely populate the cache
+                       $this->getHead();
+                       $this->getHeadSHA1();
+                       $this->getHeadCommitDate();
+                       $this->getCurrentBranch();
+                       $this->getRemoteUrl();
+
+                       if ( !$this->cacheIsComplete() ) {
+                               wfDebugLog( "Failed to compute GitInfo for \"{$this->basedir}\"" );
+                               return;
+                       }
+
+                       $cacheDir = dirname( $this->cacheFile );
+                       if ( !file_exists( $cacheDir ) &&
+                               !wfMkdirParents( $cacheDir, null, __METHOD__ )
+                       ) {
+                               throw new MWException( "Unable to create GitInfo cache \"{$cacheDir}\"" );
+                       }
+
+                       file_put_contents( $this->cacheFile, FormatJson::encode( $this->cache ) );
+               }
+       }
+
        /**
         * @see self::getHeadSHA1
         * @return string
diff --git a/tests/phpunit/data/gitinfo/info-testValidJsonData.json b/tests/phpunit/data/gitinfo/info-testValidJsonData.json
new file mode 100644 (file)
index 0000000..e955a2b
--- /dev/null
@@ -0,0 +1 @@
+{\r    "head": "refs/heads/master",\r    "headSHA1": "0123456789abcdef0123456789abcdef01234567",\r    "headCommitDate": "1070884800",\r    "branch": "master",\r    "remoteURL": "https://gerrit.wikimedia.org/r/mediawiki/core"\r}\r
\ No newline at end of file
diff --git a/tests/phpunit/includes/GitInfoTest.php b/tests/phpunit/includes/GitInfoTest.php
new file mode 100644 (file)
index 0000000..7c684d5
--- /dev/null
@@ -0,0 +1,42 @@
+<?php
+/**
+ * @covers GitInfo
+ */
+class GitInfoTest extends MediaWikiTestCase {
+
+       protected function setUp() {
+               parent::setUp();
+               $this->setMwGlobals( 'wgCacheDirectory', __DIR__ . '/../data' );
+       }
+
+       public function testValidJsonData() {
+               $dir = $GLOBALS['IP'] . '/testValidJsonData';
+               $fixture = new GitInfo( $dir );
+
+               $this->assertTrue( $fixture->cacheIsComplete() );
+               $this->assertEquals( 'refs/heads/master', $fixture->getHead() );
+               $this->assertEquals( '0123456789abcdef0123456789abcdef01234567',
+                       $fixture->getHeadSHA1() );
+               $this->assertEquals( '1070884800', $fixture->getHeadCommitDate() );
+               $this->assertEquals( 'master', $fixture->getCurrentBranch() );
+               $this->assertContains( '0123456789abcdef0123456789abcdef01234567',
+                       $fixture->getHeadViewUrl() );
+       }
+
+       public function testMissingJsonData() {
+               $dir = $GLOBALS['IP'] . '/testMissingJsonData';
+               $fixture = new GitInfo( $dir );
+
+               $this->assertFalse( $fixture->cacheIsComplete() );
+
+               $this->assertEquals( false, $fixture->getHead() );
+               $this->assertEquals( false, $fixture->getHeadSHA1() );
+               $this->assertEquals( false, $fixture->getHeadCommitDate() );
+               $this->assertEquals( false, $fixture->getCurrentBranch() );
+               $this->assertEquals( false, $fixture->getHeadViewUrl() );
+
+               // After calling all the outputs, the cache should be complete
+               $this->assertTrue( $fixture->cacheIsComplete() );
+       }
+
+}