SpawnManager.h

00001 /*
00002  *  Phusion Passenger - http://www.modrails.com/
00003  *  Copyright (C) 2008  Phusion
00004  *
00005  *  This program is free software; you can redistribute it and/or modify
00006  *  it under the terms of the GNU General Public License as published by
00007  *  the Free Software Foundation; version 2 of the License.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License along
00015  *  with this program; if not, write to the Free Software Foundation, Inc.,
00016  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00017  */
00018 #ifndef _PASSENGER_SPAWN_MANAGER_H_
00019 #define _PASSENGER_SPAWN_MANAGER_H_
00020 
00021 #include <string>
00022 #include <list>
00023 #include <boost/shared_ptr.hpp>
00024 #include <boost/thread/mutex.hpp>
00025 
00026 #include <sys/types.h>
00027 #include <sys/wait.h>
00028 #include <sys/stat.h>
00029 #include <arpa/inet.h>
00030 #include <cstdio>
00031 #include <cstdarg>
00032 #include <unistd.h>
00033 #include <errno.h>
00034 #include <pwd.h>
00035 #ifdef TESTING_SPAWN_MANAGER
00036         #include <signal.h>
00037 #endif
00038 
00039 #include "Application.h"
00040 #include "MessageChannel.h"
00041 #include "Exceptions.h"
00042 #include "Logging.h"
00043 
00044 namespace Passenger {
00045 
00046 using namespace std;
00047 using namespace boost;
00048 
00049 /**
00050  * @brief Spawning of Ruby on Rails application instances.
00051  *
00052  * This class is responsible for spawning new instances of Ruby on Rails applications.
00053  * Use the spawn() method to do so.
00054  *
00055  * @note This class is fully thread-safe.
00056  *
00057  * <h2>Implementation details</h2>
00058  * Internally, this class makes use of a spawn server, which is written in Ruby. This server
00059  * is automatically started when a SpawnManager instance is created, and automatically
00060  * shutdown when that instance is destroyed. The existance of the spawn server is almost
00061  * totally transparent to users of this class. Spawn requests are sent to the server,
00062  * and details about the spawned process is returned.
00063  *
00064  * If the spawn server dies during the middle of an operation, it will be restarted.
00065  * See spawn() for full details.
00066  *
00067  * The communication channel with the server is anonymous, i.e. no other processes
00068  * can access the communication channel, so communication is guaranteed to be safe
00069  * (unless, of course, if the spawn server itself is a trojan).
00070  *
00071  * The server will try to keep the spawning time as small as possible, by keeping
00072  * corresponding Ruby on Rails frameworks and application code in memory. So the second
00073  * time an instance of the same application is spawned, the spawn time is significantly
00074  * lower than the first time. Nevertheless, spawning is a relatively expensive operation
00075  * (compared to the processing of a typical HTTP request/response), and so should be
00076  * avoided whenever possible.
00077  *
00078  * See the documentation of the spawn server for full implementation details.
00079  *
00080  * @ingroup Support
00081  */
00082 class SpawnManager {
00083 private:
00084         static const int SPAWN_SERVER_INPUT_FD = 3;
00085 
00086         string spawnServerCommand;
00087         string logFile;
00088         string environment;
00089         string rubyCommand;
00090         string user;
00091         
00092         mutex lock;
00093         
00094         MessageChannel channel;
00095         pid_t pid;
00096         bool serverNeedsRestart;
00097 
00098         /**
00099          * Restarts the spawn server.
00100          *
00101          * @throws SystemException An error occured while trying to setup the spawn server.
00102          * @throws IOException The specified log file could not be opened.
00103          */
00104         void restartServer() {
00105                 if (pid != 0) {
00106                         channel.close();
00107                         
00108                         // Wait at most 5 seconds for the spawn server to exit.
00109                         // If that doesn't work, kill it, then wait at most 5 seconds
00110                         // for it to exit.
00111                         time_t begin = time(NULL);
00112                         bool done = false;
00113                         while (!done && time(NULL) - begin < 5) {
00114                                 if (waitpid(pid, NULL, WNOHANG) > 0) {
00115                                         done = true;
00116                                 } else {
00117                                         usleep(100000);
00118                                 }
00119                         }
00120                         if (!done) {
00121                                 P_TRACE(2, "Spawn server did not exit in time, killing it...");
00122                                 kill(pid, SIGTERM);
00123                                 begin = time(NULL);
00124                                 while (time(NULL) - begin < 5) {
00125                                         if (waitpid(pid, NULL, WNOHANG) > 0) {
00126                                                 break;
00127                                         } else {
00128                                                 usleep(100000);
00129                                         }
00130                                 }
00131                                 P_TRACE(2, "Spawn server has exited.");
00132                         }
00133                         pid = 0;
00134                 }
00135                 
00136                 int fds[2];
00137                 FILE *logFileHandle = NULL;
00138                 
00139                 serverNeedsRestart = true;
00140                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == -1) {
00141                         throw SystemException("Cannot create a Unix socket", errno);
00142                 }
00143                 if (!logFile.empty()) {
00144                         logFileHandle = fopen(logFile.c_str(), "a");
00145                         if (logFileHandle == NULL) {
00146                                 string message("Cannot open log file '");
00147                                 message.append(logFile);
00148                                 message.append("' for writing.");
00149                                 throw IOException(message);
00150                         }
00151                 }
00152 
00153                 pid = fork();
00154                 if (pid == 0) {
00155                         if (!logFile.empty()) {
00156                                 dup2(fileno(logFileHandle), STDERR_FILENO);
00157                                 fclose(logFileHandle);
00158                         }
00159                         dup2(STDERR_FILENO, STDOUT_FILENO);
00160                         if (!environment.empty()) {
00161                                 setenv("RAILS_ENV", environment.c_str(), true);
00162                         }
00163                         dup2(fds[1], SPAWN_SERVER_INPUT_FD);
00164                         
00165                         // Close all unnecessary file descriptors
00166                         for (long i = sysconf(_SC_OPEN_MAX) - 1; i >= 0; i--) {
00167                                 if (i > SPAWN_SERVER_INPUT_FD) {
00168                                         close(i);
00169                                 }
00170                         }
00171                         
00172                         if (!user.empty()) {
00173                                 struct passwd *entry = getpwnam(user.c_str());
00174                                 if (entry != NULL) {
00175                                         if (setgid(entry->pw_gid) != 0) {
00176                                                 int e = errno;
00177                                                 fprintf(stderr, "*** Passenger: cannot run spawn "
00178                                                         "manager as group %d: %s (%d)\n",
00179                                                         entry->pw_gid,
00180                                                         strerror(e),
00181                                                         e);
00182                                         }
00183                                         if (setuid(entry->pw_uid) != 0) {
00184                                                 int e = errno;
00185                                                 fprintf(stderr, "*** Passenger: cannot run spawn "
00186                                                         "manager as user %s (%d): %s (%d)\n",
00187                                                         user.c_str(), entry->pw_uid,
00188                                                         strerror(e),
00189                                                         e);
00190                                         }
00191                                 } else {
00192                                         fprintf(stderr, "*** Passenger: cannot run spawn manager "
00193                                                 "as nonexistant user '%s'.\n",
00194                                                 user.c_str());
00195                                 }
00196                                 fflush(stderr);
00197                         }
00198                         
00199                         execlp(rubyCommand.c_str(),
00200                                 rubyCommand.c_str(),
00201                                 spawnServerCommand.c_str(),
00202                                 // The spawn server changes the process names of the subservers
00203                                 // that it starts, for better usability. However, the process name length
00204                                 // (as shown by ps) is limited. Here, we try to expand that limit by
00205                                 // deliberately passing a useless whitespace string to the spawn server.
00206                                 // This argument is ignored by the spawn server. This works on some
00207                                 // systems, such as Ubuntu Linux.
00208                                 "                                                             ",
00209                                 NULL);
00210                         int e = errno;
00211                         fprintf(stderr, "*** Passenger ERROR: Could not start the spawn server: %s: %s\n",
00212                                 rubyCommand.c_str(), strerror(e));
00213                         fflush(stderr);
00214                         _exit(1);
00215                 } else if (pid == -1) {
00216                         int e = errno;
00217                         close(fds[0]);
00218                         close(fds[1]);
00219                         if (logFileHandle != NULL) {
00220                                 fclose(logFileHandle);
00221                         }
00222                         pid = 0;
00223                         throw SystemException("Unable to fork a process", e);
00224                 } else {
00225                         close(fds[1]);
00226                         if (!logFile.empty()) {
00227                                 fclose(logFileHandle);
00228                         }
00229                         channel = MessageChannel(fds[0]);
00230                         serverNeedsRestart = false;
00231                         
00232                         #ifdef TESTING_SPAWN_MANAGER
00233                                 if (nextRestartShouldFail) {
00234                                         kill(pid, SIGTERM);
00235                                         usleep(500000);
00236                                 }
00237                         #endif
00238                 }
00239         }
00240         
00241         /**
00242          * Send the spawn command to the spawn server.
00243          *
00244          * @param appRoot The application root of the application to spawn.
00245          * @param lowerPrivilege Whether to lower the application's privileges.
00246          * @param lowestUser The user to fallback to if lowering privilege fails.
00247          * @return An Application smart pointer, representing the spawned application.
00248          * @throws SpawnException Something went wrong.
00249          */
00250         ApplicationPtr sendSpawnCommand(const string &appRoot, bool lowerPrivilege, const string &lowestUser) {
00251                 vector<string> args;
00252                 int ownerPipe;
00253                 
00254                 try {
00255                         channel.write("spawn_application",
00256                                 appRoot.c_str(),
00257                                 (lowerPrivilege) ? "true" : "false",
00258                                 lowestUser.c_str(),
00259                                 NULL);
00260                 } catch (const SystemException &e) {
00261                         throw SpawnException(string("Could not write 'spawn_application' "
00262                                 "command to the spawn server: ") + e.sys());
00263                 }
00264                 
00265                 try {
00266                         // Read status.
00267                         if (!channel.read(args)) {
00268                                 throw SpawnException("The spawn server has exited unexpectedly.");
00269                         }
00270                         if (args.size() != 1) {
00271                                 throw SpawnException("The spawn server sent an invalid message.");
00272                         }
00273                         if (args[0] == "error_page") {
00274                                 string errorPage;
00275                                 
00276                                 if (!channel.readScalar(errorPage)) {
00277                                         throw SpawnException("The spawn server has exited unexpectedly.");
00278                                 }
00279                                 throw SpawnException("An error occured while spawning the application.",
00280                                         errorPage);
00281                         } else if (args[0] != "ok") {
00282                                 throw SpawnException("The spawn server sent an invalid message.");
00283                         }
00284                         
00285                         // Read application info.
00286                         if (!channel.read(args)) {
00287                                 throw SpawnException("The spawn server has exited unexpectedly.");
00288                         }
00289                 } catch (const SystemException &e) {
00290                         throw SpawnException(string("Could not read from the spawn server: ") + e.sys());
00291                 }
00292                 
00293                 try {
00294                         ownerPipe = channel.readFileDescriptor();
00295                 } catch (const SystemException &e) {
00296                         throw SpawnException(string("Could not receive the spawned "
00297                                 "application's owner pipe from the spawn server: ") +
00298                                 e.sys());
00299                 } catch (const IOException &e) {
00300                         throw SpawnException(string("Could not receive the spawned "
00301                                 "application's owner pipe from the spawn server: ") +
00302                                 e.what());
00303                 }
00304                 
00305                 if (args.size() != 3) {
00306                         close(ownerPipe);
00307                         throw SpawnException("The spawn server sent an invalid message.");
00308                 }
00309                 
00310                 pid_t pid = atoi(args[0]);
00311                 bool usingAbstractNamespace = args[2] == "true";
00312                 
00313                 if (!usingAbstractNamespace) {
00314                         chmod(args[1].c_str(), S_IRUSR | S_IWUSR);
00315                         chown(args[1].c_str(), getuid(), getgid());
00316                 }
00317                 return ApplicationPtr(new Application(appRoot, pid, args[1],
00318                         usingAbstractNamespace, ownerPipe));
00319         }
00320         
00321         ApplicationPtr
00322         handleSpawnException(const SpawnException &e, const string &appRoot,
00323                              bool lowerPrivilege, const string &lowestUser) {
00324                 bool restarted;
00325                 try {
00326                         P_DEBUG("Spawn server died. Attempting to restart it...");
00327                         restartServer();
00328                         P_DEBUG("Restart seems to be successful.");
00329                         restarted = true;
00330                 } catch (const IOException &e) {
00331                         P_DEBUG("Restart failed: " << e.what());
00332                         restarted = false;
00333                 } catch (const SystemException &e) {
00334                         P_DEBUG("Restart failed: " << e.what());
00335                         restarted = false;
00336                 }
00337                 if (restarted) {
00338                         return sendSpawnCommand(appRoot, lowerPrivilege, lowestUser);
00339                 } else {
00340                         throw SpawnException("The spawn server died unexpectedly, and restarting it failed.");
00341                 }
00342         }
00343         
00344         /**
00345          * Send the reload command to the spawn server.
00346          *
00347          * @param appRoot The application root to reload.
00348          * @throws SystemException Something went wrong.
00349          */
00350         void sendReloadCommand(const string &appRoot) {
00351                 try {
00352                         channel.write("reload", appRoot.c_str(), NULL);
00353                 } catch (const SystemException &e) {
00354                         throw SystemException("Could not write 'reload' command "
00355                                 "to the spawn server", e.code());
00356                 }
00357         }
00358         
00359         void handleReloadException(const SystemException &e, const string &appRoot) {
00360                 bool restarted;
00361                 try {
00362                         P_DEBUG("Spawn server died. Attempting to restart it...");
00363                         restartServer();
00364                         P_DEBUG("Restart seems to be successful.");
00365                         restarted = true;
00366                 } catch (const IOException &e) {
00367                         P_DEBUG("Restart failed: " << e.what());
00368                         restarted = false;
00369                 } catch (const SystemException &e) {
00370                         P_DEBUG("Restart failed: " << e.what());
00371                         restarted = false;
00372                 }
00373                 if (restarted) {
00374                         return sendReloadCommand(appRoot);
00375                 } else {
00376                         throw SpawnException("The spawn server died unexpectedly, and restarting it failed.");
00377                 }
00378         }
00379         
00380         IOException prependMessageToException(const IOException &e, const string &message) {
00381                 return IOException(message + ": " + e.what());
00382         }
00383         
00384         SystemException prependMessageToException(const SystemException &e, const string &message) {
00385                 return SystemException(message + ": " + e.brief(), e.code());
00386         }
00387 
00388 public:
00389         #ifdef TESTING_SPAWN_MANAGER
00390                 bool nextRestartShouldFail;
00391         #endif
00392 
00393         /**
00394          * Construct a new SpawnManager.
00395          *
00396          * @param spawnServerCommand The filename of the spawn server to use.
00397          * @param logFile Specify a log file that the spawn server should use.
00398          *            Messages on its standard output and standard error channels
00399          *            will be written to this log file. If an empty string is
00400          *            specified, no log file will be used, and the spawn server
00401          *            will use the same standard output/error channels as the
00402          *            current process.
00403          * @param environment The RAILS_ENV environment that all RoR applications
00404          *            should use. If an empty string is specified, the current value
00405          *            of the RAILS_ENV environment variable will be used.
00406          * @param rubyCommand The Ruby interpreter's command.
00407          * @param user The user that the spawn manager should run as. This
00408          *             parameter only has effect if the current process is
00409          *             running as root. If the empty string is given, or if
00410          *             the <tt>user</tt> is not a valid username, then
00411          *             the spawn manager will be run as the current user.
00412          * @throws SystemException An error occured while trying to setup the spawn server.
00413          * @throws IOException The specified log file could not be opened.
00414          */
00415         SpawnManager(const string &spawnServerCommand,
00416                      const string &logFile = "",
00417                      const string &environment = "production",
00418                      const string &rubyCommand = "ruby",
00419                      const string &user = "") {
00420                 this->spawnServerCommand = spawnServerCommand;
00421                 this->logFile = logFile;
00422                 this->environment = environment;
00423                 this->rubyCommand = rubyCommand;
00424                 this->user = user;
00425                 pid = 0;
00426                 #ifdef TESTING_SPAWN_MANAGER
00427                         nextRestartShouldFail = false;
00428                 #endif
00429                 try {
00430                         restartServer();
00431                 } catch (const IOException &e) {
00432                         throw prependMessageToException(e, "Could not start the spawn server");
00433                 } catch (const SystemException &e) {
00434                         throw prependMessageToException(e, "Could not start the spawn server");
00435                 }
00436         }
00437         
00438         ~SpawnManager() throw() {
00439                 if (pid != 0) {
00440                         channel.close();
00441                         waitpid(pid, NULL, 0);
00442                 }
00443         }
00444         
00445         /**
00446          * Spawn a new instance of a Ruby on Rails application.
00447          *
00448          * If the spawn server died during the spawning process, then the server
00449          * will be automatically restarted, and another spawn attempt will be made.
00450          * If restarting the server fails, or if the second spawn attempt fails,
00451          * then an exception will be thrown.
00452          *
00453          * If <tt>lowerPrivilege</tt> is true, then it will be attempt to
00454          * switch the spawned application instance to the user who owns the
00455          * application's <tt>config/environment.rb</tt>, and to the default
00456          * group of that user.
00457          *
00458          * If that user doesn't exist on the system, or if that user is root,
00459          * then it will be attempted to switch to the username given by
00460          * <tt>lowestUser</tt> (and to the default group of that user).
00461          * If <tt>lowestUser</tt> doesn't exist either, or if switching user failed
00462          * (because the spawn server process does not have the privilege to do so),
00463          * then the application will be spawned anyway, without reporting an error.
00464          *
00465          * It goes without saying that lowering privilege is only possible if
00466          * the spawn server is running as root (and thus, by induction, that
00467          * Passenger and Apache's control process are also running as root).
00468          * Note that if Apache is listening on port 80, then its control process must
00469          * be running as root. See "doc/Security of user switching.txt" for
00470          * a detailed explanation.
00471          *
00472          * @param appRoot The application root of a RoR application, i.e. the folder that
00473          *             contains 'app/', 'public/', 'config/', etc. This must be a valid directory,
00474          *             but the path does not have to be absolute.
00475          * @param lowerPrivilege Whether to lower the application's privileges.
00476          * @param lowestUser The user to fallback to if lowering privilege fails.
00477          * @return A smart pointer to an Application object, which represents the application
00478          *         instance that has been spawned. Use this object to communicate with the
00479          *         spawned application.
00480          * @throws SpawnException Something went wrong.
00481          */
00482         ApplicationPtr spawn(const string &appRoot, bool lowerPrivilege = true, const string &lowestUser = "nobody") {
00483                 mutex::scoped_lock l(lock);
00484                 try {
00485                         return sendSpawnCommand(appRoot, lowerPrivilege, lowestUser);
00486                 } catch (const SpawnException &e) {
00487                         if (e.hasErrorPage()) {
00488                                 throw;
00489                         } else {
00490                                 return handleSpawnException(e, appRoot, lowerPrivilege, lowestUser);
00491                         }
00492                 }
00493         }
00494         
00495         /**
00496          * Remove the cached application instances at the given application root.
00497          *
00498          * Application code might be cached in memory. But once it a while, it will
00499          * be necessary to reload the code for an application, such as after
00500          * deploying a new version of the application. This method makes sure that
00501          * any cached application code is removed, so that the next time an
00502          * application instance is spawned, the application code will be freshly
00503          * loaded into memory.
00504          *
00505          * @throws SystemException Unable to communicate with the spawn server,
00506          *         even after a restart.
00507          * @throws SpawnException The spawn server died unexpectedly, and a
00508          *         restart was attempted, but it failed.
00509          */
00510         void reload(const string &appRoot) {
00511                 try {
00512                         return sendReloadCommand(appRoot);
00513                 } catch (const SystemException &e) {
00514                         return handleReloadException(e, appRoot);
00515                 }
00516         }
00517         
00518         /**
00519          * Get the Process ID of the spawn server. This method is used in the unit tests
00520          * and should not be used directly.
00521          */
00522         pid_t getServerPid() const {
00523                 return pid;
00524         }
00525 };
00526 
00527 /** Convenient alias for SpawnManager smart pointer. */
00528 typedef shared_ptr<SpawnManager> SpawnManagerPtr;
00529 
00530 } // namespace Passenger
00531 
00532 #endif /* _PASSENGER_SPAWN_MANAGER_H_ */

Generated on Wed May 7 20:28:18 2008 for Passenger by  doxygen 1.5.3