Merge "Improve the shell cgroup feature"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Thu, 7 Feb 2013 18:40:49 +0000 (18:40 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Thu, 7 Feb 2013 18:40:49 +0000 (18:40 +0000)
RELEASE-NOTES-1.21
bin/ulimit5.sh [deleted file]
includes/DefaultSettings.php
includes/GlobalFunctions.php
includes/limit.sh [new file with mode: 0644]

index 36e660e..bf1a20b 100644 (file)
@@ -87,6 +87,9 @@ production.
   a security fix (bug 42202).
 * Added the ability to limit the wall clock time used by shell processes,
   as well as the CPU time. Configurable with $wgMaxShellWallClockTime.
+* Allow memory of shell subprocesses to be limited using Linux cgroups
+  instead of ulimit -v, which tends to cause deadlocks in recent versions
+  of ImageMagick. Configurable with $wgShellCgroup.
 * Added $wgWhitelistReadRegexp for regex whitelisting.
 * (bug 5346) Categories that are redirects will be displayed italic in
   the category links section at the bottom of a page.
diff --git a/bin/ulimit5.sh b/bin/ulimit5.sh
deleted file mode 100644 (file)
index 36c3176..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-if [ "$1" -gt 0 ]; then
-       ulimit -t "$1"
-fi
-if [ "$2" -gt 0 ]; then
-       if [ -e /sys/fs/cgroup/memory/mediawiki/job/ ]; then
-               mkdir -m 0700 /sys/fs/cgroup/memory/mediawiki/job/$$
-               echo $$ > /sys/fs/cgroup/memory/mediawiki/job/$$/tasks
-               echo "1" > /sys/fs/cgroup/memory/mediawiki/job/$$/notify_on_release
-               #memory
-               echo $(($2*1024)) > /sys/fs/cgroup/memory/mediawiki/job/$$/memory.limit_in_bytes
-               #memory+swap
-               echo $(($2*1024)) > /sys/fs/cgroup/memory/mediawiki/job/$$/memory.memsw.limit_in_bytes
-       fi
-       ulimit -v "$2"
-fi
-if [ "$3" -gt 0 ]; then
-       ulimit -f "$3"
-fi
-if [ "$4" -gt 0 -a -x "/usr/bin/timeout" ]; then
-       /usr/bin/timeout $4 /bin/bash -c "$5"
-       STATUS="$?"
-       if [ "$STATUS" == 124 ]; then
-               echo "ulimit5.sh: timed out." 1>&2
-       fi
-       exit "$STATUS"
-else
-       eval "$5"
-fi
index d0eb89c..3b76c57 100644 (file)
@@ -6188,6 +6188,31 @@ $wgMaxShellTime = 180;
  */
 $wgMaxShellWallClockTime = 180;
 
+/**
+ * Under Linux: a cgroup directory used to constrain memory usage of shell 
+ * commands. The directory must be writable by the user which runs MediaWiki.
+ *
+ * If specified, this is used instead of ulimit, which is inaccurate, and
+ * causes malloc() to return NULL, which exposes bugs in C applications, making
+ * them segfault or deadlock.
+ *
+ * A wrapper script will create a cgroup for each shell command that runs, as
+ * a subgroup of the specified cgroup. If the memory limit is exceeded, the 
+ * kernel will send a SIGKILL signal to a process in the subgroup.
+ *
+ * @par Example:
+ * @code
+ *    mkdir -p /sys/fs/cgroup/memory/mediawiki
+ *    mkdir -m 0777 /sys/fs/cgroup/memory/mediawiki/job
+ *    echo '$wgShellCgroup = "/sys/fs/cgroup/memory/mediawiki/job";' >> LocalSettings.php
+ * @endcode
+ *
+ * The reliability of cgroup cleanup can be improved by installing a 
+ * notify_on_release script in the root cgroup, see e.g.
+ * https://gerrit.wikimedia.org/r/#/c/40784
+ */
+$wgShellCgroup = false;
+
 /**
  * Executable path of the PHP cli binary (php/php5). Should be set up on install.
  */
index 90bc891..3fa816f 100644 (file)
@@ -2778,7 +2778,7 @@ function wfEscapeShellArg( ) {
  */
 function wfShellExec( $cmd, &$retval = null, $environ = array(), $limits = array() ) {
        global $IP, $wgMaxShellMemory, $wgMaxShellFileSize, $wgMaxShellTime,
-               $wgMaxShellWallClockTime;
+               $wgMaxShellWallClockTime, $wgShellCgroup;
 
        static $disabled;
        if ( is_null( $disabled ) ) {
@@ -2837,8 +2837,15 @@ function wfShellExec( $cmd, &$retval = null, $environ = array(), $limits = array
                $filesize = intval ( isset( $limits['filesize'] ) ? $limits['filesize'] : $wgMaxShellFileSize );
 
                if ( $time > 0 || $mem > 0 || $filesize > 0 || $wallTime > 0 ) {
-                       $cmd = '/bin/bash ' . escapeshellarg( "$IP/bin/ulimit5.sh" ) .
-                               " $time $mem $filesize $wallTime " . escapeshellarg( $cmd );
+                       $cmd = '/bin/bash ' . escapeshellarg( "$IP/includes/limit.sh" ) . ' ' .
+                               escapeshellarg( $cmd ) . ' ' .
+                               escapeshellarg(
+                                       "MW_CPU_LIMIT=$time; " .
+                                       'MW_CGROUP=' . escapeshellarg( $wgShellCgroup ) . '; ' .
+                                       "MW_MEM_LIMIT=$mem; " .
+                                       "MW_FILE_SIZE_LIMIT=$filesize; " .
+                                       "MW_WALL_CLOCK_LIMIT=$wallTime"
+                               );
                }
        }
        wfDebug( "wfShellExec: $cmd\n" );
diff --git a/includes/limit.sh b/includes/limit.sh
new file mode 100644 (file)
index 0000000..44b9edc
--- /dev/null
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Resource limiting wrapper for command execution
+#
+# Why is this in shell script? Because bash has a setrlimit() wrapper
+# and is available on most Linux systems. If Perl was distributed with
+# BSD::Resource included, we would happily use that instead, but it isn't.
+
+MW_CPU_LIMIT=0
+MW_CGROUP=
+MW_MEM_LIMIT=0
+MW_FILE_SIZE_LIMIT=0
+MW_WALL_CLOCK_LIMIT=0
+
+# Override settings
+eval "$2"
+
+if [ "$MW_CPU_LIMIT" -gt 0 ]; then
+       ulimit -t "$MW_CPU_LIMIT"
+fi
+if [ "$MW_MEM_LIMIT" -gt 0 ]; then
+       if [ -n "$MW_CGROUP" ]; then
+               # Create cgroup
+               if ! mkdir -m 0700 "$MW_CGROUP"/$$; then
+                       echo "limit.sh: failed to create the cgroup." 1>&2
+                       exit 1
+               fi
+               echo $$ > "$MW_CGROUP"/$$/tasks
+               if [ -n "$MW_CGROUP_NOTIFY" ]; then
+                       echo "1" > "$MW_CGROUP"/$$/notify_on_release
+               fi
+               # Memory
+               echo $(($MW_MEM_LIMIT*1024)) > "$MW_CGROUP"/$$/memory.limit_in_bytes
+               # Memory+swap
+               echo $(($MW_MEM_LIMIT*1024)) > "$MW_CGROUP"/$$/memory.memsw.limit_in_bytes
+       else
+               ulimit -v "$MW_MEM_LIMIT"
+       fi
+fi
+if [ "$MW_FILE_SIZE_LIMIT" -gt 0 ]; then
+       ulimit -f "$MW_FILE_SIZE_LIMIT"
+fi
+if [ "$MW_WALL_CLOCK_LIMIT" -gt 0 -a -x "/usr/bin/timeout" ]; then
+       /usr/bin/timeout $MW_WALL_CLOCK_LIMIT /bin/bash -c "$1"
+       STATUS="$?"
+       if [ "$STATUS" == 124 ]; then
+               echo "limit.sh: timed out." 1>&2
+       fi
+else
+       eval "$1"
+       STATUS="$?"
+fi
+
+# Clean up cgroup
+cleanup() {
+       # First we have to move the current task into a "garbage" group, otherwise 
+       # the cgroup will not be empty, and attempting to remove it will fail with
+       # "Device or resource busy"
+       if [ -w "$MW_CGROUP"/tasks ]; then
+               GARBAGE="$MW_CGROUP"
+       else
+               GARBAGE="$MW_CGROUP"/garbage-"$USER"
+               if [ ! -e "$GARBAGE" ]; then
+                       mkdir -m 0700 "$GARBAGE"
+               fi
+       fi
+       echo $BASHPID > "$GARBAGE"/tasks
+
+       # Suppress errors in case the cgroup has disappeared due to a release script
+       rmdir "$MW_CGROUP"/$$ 2>/dev/null
+}
+
+updateTaskCount() {
+       # There are lots of ways to count lines in a file in shell script, but this
+       # is one of the few that doesn't create another process, which would
+       # increase the returned number of tasks.
+       readarray < "$MW_CGROUP"/$$/tasks
+       NUM_TASKS=${#MAPFILE[*]}
+}
+
+if [ -n "$MW_CGROUP" ]; then
+       updateTaskCount
+
+       if [ $NUM_TASKS -gt 1 ]; then
+               # Spawn a monitor process which will continue to poll for completion 
+               # of all processes in the cgroup after termination of the parent shell
+               (
+                       while [ $NUM_TASKS -gt 1 ]; do
+                               sleep 10
+                               updateTaskCount
+                       done
+                       cleanup
+               ) >&/dev/null < /dev/null &
+               disown -a
+       else
+               cleanup
+       fi
+fi
+exit "$STATUS"
+