astcanary.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 2007, Digium, Inc.
00005  *
00006  * Tilghman Lesher <tlesher AT digium DOT com>
00007  *
00008  * See http://www.asterisk.org for more information about
00009  * the Asterisk project. Please do not directly contact
00010  * any of the maintainers of this project for assistance;
00011  * the project provides a web site, mailing lists and IRC
00012  * channels for your use.
00013  *
00014  * This program is free software, distributed under the terms of
00015  * the GNU General Public License Version 2. See the LICENSE file
00016  * at the top of the source tree.
00017  */
00018 
00019 /*** MODULEINFO
00020    <support_level>core</support_level>
00021  ***/
00022 
00023 #include <sys/types.h>
00024 #include <sys/stat.h>
00025 #include <sys/time.h>
00026 #include <sys/resource.h>
00027 #include <utime.h>
00028 #include <fcntl.h>
00029 #include <unistd.h>
00030 #include <stdlib.h>
00031 #include <string.h>
00032 #include <stdio.h>
00033 
00034 /*!\brief
00035  * At one time, canaries were carried along with coal miners down
00036  * into a mine.  Their purpose was to alert the miners when they
00037  * had drilled into a pocket of methane gas or another noxious
00038  * substance.  The canary, being the most sensitive animal, would
00039  * immediately fall over.  Seeing this, the miners could take
00040  * action to escape the mine, seeing an imminent danger.
00041  *
00042  * This process serves a similar purpose, though with the realtime
00043  * priority being the reason.  When a thread starts running away
00044  * with the processor, it is typically difficult to tell what
00045  * thread caused the problem, as the machine acts as if it is
00046  * locked up (in fact, what has happened is that Asterisk runs at
00047  * a higher priority than even the login shell, so the runaway
00048  * thread hogs all available CPU time.
00049  *
00050  * If that happens, this canary process will cease to get any
00051  * process time, which we can monitor with a realtime thread in
00052  * Asterisk.  Should that happen, that monitoring thread may take
00053  * immediate action to slow down Asterisk to regular priority,
00054  * thus allowing an administrator to login to the system and
00055  * restart Asterisk or perhaps take another course of action
00056  * (such as retrieving a backtrace to let the developers know
00057  * what precisely went wrong).
00058  *
00059  * Note that according to POSIX.1, all threads inside a single
00060  * process must share the same priority, so when the monitoring
00061  * thread deprioritizes itself, it deprioritizes all threads at
00062  * the same time.  This is also why this canary must exist as a
00063  * completely separate process and not simply as a thread within
00064  * Asterisk itself.
00065  *
00066  * Quote:
00067  * "The nice value set with setpriority() shall be applied to the
00068  * process. If the process is multi-threaded, the nice value shall
00069  * affect all system scope threads in the process."
00070  *
00071  * Source:
00072  * http://www.opengroup.org/onlinepubs/000095399/functions/setpriority.html
00073  *
00074  * In answer to the question, what aren't system scope threads, the
00075  * answer is, in Asterisk, nothing.  Process scope threads are the
00076  * alternative, but they aren't supported in Linux.
00077  */
00078 
00079 static const char explanation[] =
00080 "This file is created when Asterisk is run with a realtime priority (-p).  It\n"
00081 "must continue to exist, and the astcanary process must be allowed to continue\n"
00082 "running, or else the Asterisk process will, within a short period of time,\n"
00083 "slow itself down to regular priority.\n\n"
00084 "The technical explanation for this file is to provide an assurance to Asterisk\n"
00085 "that there are no threads that have gone into runaway mode, thus hogging the\n"
00086 "CPU, and making the Asterisk machine seem to be unresponsive.  When that\n"
00087 "happens, the astcanary process will be unable to update the timestamp on this\n"
00088 "file, and Asterisk will notice within 120 seconds and react.  Slowing the\n"
00089 "Asterisk process down to regular priority will permit an administrator to\n"
00090 "intervene, thus avoiding a need to reboot the entire machine.\n";
00091 
00092 int main(int argc, char *argv[])
00093 {
00094    int fd;
00095    pid_t parent;
00096 
00097    if (argc < 3) {
00098       fprintf(stderr, "Usage: %s <monitor-filename> <ppid>\n", argv[0]);
00099       exit(1);
00100    }
00101 
00102    /* Run at normal priority */
00103    setpriority(PRIO_PROCESS, 0, 0);
00104 
00105    /*!\note
00106     * See http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap03.html#tag_03_265
00107     * for a justification of this approach.  The PPID after the creator dies in Linux and
00108     * most other Unix-like systems will be 1, but this is not strictly the case.  The POSIX
00109     * specification allows it to be an implementation-defined system process.  However, it
00110     * most certainly will not be the original parent PID, which makes the following code
00111     * POSIX-compliant.
00112     */
00113    for (parent = atoi(argv[2]); parent == getppid() ;) {
00114       /* Update the modification times (checked from Asterisk) */
00115       if (utime(argv[1], NULL)) {
00116          /* Recreate the file if it doesn't exist */
00117          if ((fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT, 0777)) > -1) {
00118             if (write(fd, explanation, strlen(explanation)) < 0) {
00119                exit(1);
00120             }
00121             close(fd);
00122          } else {
00123             exit(1);
00124          }
00125          continue;
00126       }
00127 
00128       /* Run occasionally */
00129       sleep(5);
00130    }
00131 
00132    /* Exit when the parent dies */
00133    return 0;
00134 }
00135 

Generated on Thu Apr 16 06:27:14 2015 for Asterisk - The Open Source Telephony Project by  doxygen 1.5.6