很多童鞋在啟動mysql的時候,碰到過這個錯誤, 首先,澄清一點,出現這個錯誤的前提是:通過服務腳本來啟動mysql。通過mysqld_safe或mysqld啟動mysql實例並不會報這個錯誤。 那麼,出現這個錯誤的原因具體是什麼呢? 哈哈,對分析過程不care的童鞋可直接跳到文末的總結部分~ 總結 ...
很多童鞋在啟動mysql的時候,碰到過這個錯誤,
首先,澄清一點,出現這個錯誤的前提是:通過服務腳本來啟動mysql。通過mysqld_safe或mysqld啟動mysql實例並不會報這個錯誤。
那麼,出現這個錯誤的原因具體是什麼呢?
哈哈,對分析過程不care的童鞋可直接跳到文末的總結部分~
下麵,來分析下mysql的服務啟動腳本
腳本完整內容如下:
#!/bin/sh # Copyright Abandoned 1996 TCX DataKonsult AB & Monty Program KB & Detron HB # This file is public domain and comes with NO WARRANTY of any kind # MySQL daemon start/stop script. # Usually this is put in /etc/init.d (at least on machines SYSV R4 based # systems) and linked to /etc/rc3.d/S99mysql and /etc/rc0.d/K01mysql. # When this is done the mysql server will be started when the machine is # started and shut down when the systems goes down. # Comments to support chkconfig on RedHat Linux # chkconfig: 2345 64 36 # description: A very fast and reliable SQL database engine. # Comments to support LSB init script conventions ### BEGIN INIT INFO # Provides: mysql # Required-Start: $local_fs $network $remote_fs # Should-Start: ypbind nscd ldap ntpd xntpd # Required-Stop: $local_fs $network $remote_fs # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: start and stop MySQL # Description: MySQL is a very fast and reliable SQL database engine. ### END INIT INFO # If you install MySQL on some other places than /usr/local/mysql, then you # have to do one of the following things for this script to work: # # - Run this script from within the MySQL installation directory # - Create a /etc/my.cnf file with the following information: # [mysqld] # basedir=<path-to-mysql-installation-directory> # - Add the above to any other configuration file (for example ~/.my.ini) # and copy my_print_defaults to /usr/bin # - Add the path to the mysql-installation-directory to the basedir variable # below. # # If you want to affect other MySQL variables, you should make your changes # in the /etc/my.cnf, ~/.my.cnf or other MySQL configuration files. # If you change base dir, you must also change datadir. These may get # overwritten by settings in the MySQL configuration files. basedir= datadir= # Default value, in seconds, afterwhich the script should timeout waiting # for server start. # Value here is overriden by value in my.cnf. # 0 means don't wait at all # Negative numbers mean to wait indefinitely service_startup_timeout=900 # Lock directory for RedHat / SuSE. lockdir='/var/lock/subsys' lock_file_path="$lockdir/mysql" # The following variables are only set for letting mysql.server find things. # Set some defaults mysqld_pid_file_path= if test -z "$basedir" then basedir=/usr/local/mysql bindir=/usr/local/mysql/bin if test -z "$datadir" then datadir=/usr/local/mysql/data fi sbindir=/usr/local/mysql/bin libexecdir=/usr/local/mysql/bin else bindir="$basedir/bin" if test -z "$datadir" then datadir="$basedir/data" fi sbindir="$basedir/sbin" libexecdir="$basedir/libexec" fi # datadir_set is used to determine if datadir was set (and so should be # *not* set inside of the --basedir= handler.) datadir_set= # # Use LSB init script functions for printing messages, if possible # lsb_functions="/lib/lsb/init-functions" if test -f $lsb_functions ; then . $lsb_functions else log_success_msg() { echo " SUCCESS! $@" } log_failure_msg() { echo " ERROR! $@" } fi PATH="/sbin:/usr/sbin:/bin:/usr/bin:$basedir/bin" export PATH mode=$1 # start or stop [ $# -ge 1 ] && shift other_args="$*" # uncommon, but needed when called from an RPM upgrade action # Expected: "--skip-networking --skip-grant-tables" # They are not checked here, intentionally, as it is the resposibility # of the "spec" file author to give correct arguments only. case `echo "testing\c"`,`echo -n testing` in *c*,-n*) echo_n= echo_c= ;; *c*,*) echo_n=-n echo_c= ;; *) echo_n= echo_c='\c' ;; esac parse_server_arguments() { for arg do case "$arg" in --basedir=*) basedir=`echo "$arg" | sed -e 's/^[^=]*=//'` bindir="$basedir/bin" if test -z "$datadir_set"; then datadir="$basedir/data" fi sbindir="$basedir/sbin" libexecdir="$basedir/libexec" ;; --datadir=*) datadir=`echo "$arg" | sed -e 's/^[^=]*=//'` datadir_set=1 ;; --pid-file=*) mysqld_pid_file_path=`echo "$arg" | sed -e 's/^[^=]*=//'` ;; --service-startup-timeout=*) service_startup_timeout=`echo "$arg" | sed -e 's/^[^=]*=//'` ;; esac done } wait_for_pid () { verb="$1" # created | removed pid="$2" # process ID of the program operating on the pid-file pid_file_path="$3" # path to the PID file. i=0 avoid_race_condition="by checking again" while test $i -ne $service_startup_timeout ; do case "$verb" in 'created') # wait for a PID-file to pop into existence. test -s "$pid_file_path" && i='' && break ;; 'removed') # wait for this PID-file to disappear test ! -s "$pid_file_path" && i='' && break ;; *) echo "wait_for_pid () usage: wait_for_pid created|removed pid pid_file_path" exit 1 ;; esac # if server isn't running, then pid-file will never be updated if test -n "$pid"; then if kill -0 "$pid" 2>/dev/null; then : # the server still runs else # The server may have exited between the last pid-file check and now. if test -n "$avoid_race_condition"; then avoid_race_condition="" continue # Check again. fi # there's nothing that will affect the file. log_failure_msg "The server quit without updating PID file ($pid_file_path)." return 1 # not waiting any more. fi fi echo $echo_n ".$echo_c" i=`expr $i + 1` sleep 1 done if test -z "$i" ; then log_success_msg return 0 else log_failure_msg return 1 fi } # Get arguments from the my.cnf file, # the only group, which is read from now on is [mysqld] if test -x ./bin/my_print_defaults then print_defaults="./bin/my_print_defaults" elif test -x $bindir/my_print_defaults then print_defaults="$bindir/my_print_defaults" elif test -x $bindir/mysql_print_defaults then print_defaults="$bindir/mysql_print_defaults" else # Try to find basedir in /etc/my.cnf conf=/etc/my.cnf print_defaults= if test -r $conf then subpat='^[^=]*basedir[^=]*=\(.*\)$' dirs=`sed -e "/$subpat/!d" -e 's//\1/' $conf` for d in $dirs do d=`echo $d | sed -e 's/[ ]//g'` if test -x "$d/bin/my_print_defaults" then print_defaults="$d/bin/my_print_defaults" break fi if test -x "$d/bin/mysql_print_defaults" then print_defaults="$d/bin/mysql_print_defaults" break fi done fi # Hope it's in the PATH ... but I doubt it test -z "$print_defaults" && print_defaults="my_print_defaults" fi # # Read defaults file from 'basedir'. If there is no defaults file there # check if it's in the old (depricated) place (datadir) and read it from there # extra_args="" if test -r "$basedir/my.cnf" then extra_args="-e $basedir/my.cnf" else if test -r "$datadir/my.cnf" then extra_args="-e $datadir/my.cnf" fi fi parse_server_arguments `$print_defaults $extra_args mysqld server mysql_server mysql.server` # # Set pid file if not given # if test -z "$mysqld_pid_file_path" then mysqld_pid_file_path=$datadir/`hostname`.pid else case "$mysqld_pid_file_path" in /* ) ;; * ) mysqld_pid_file_path="$datadir/$mysqld_pid_file_path" ;; esac fi case "$mode" in 'start') # Start daemon # Safeguard (relative paths, core dumps..) cd $basedir echo $echo_n "Starting MySQL" if test -x $bindir/mysqld_safe then # Give extra arguments to mysqld with the my.cnf file. This script # may be overwritten at next upgrade. $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args >/dev/null 2>&1 & wait_for_pid created "$!" "$mysqld_pid_file_path"; return_value=$? # Make lock for RedHat / SuSE if test -w "$lockdir" then touch "$lock_file_path" fi exit $return_value else log_failure_msg "Couldn't find MySQL server ($bindir/mysqld_safe)" fi ;; 'stop') # Stop daemon. We use a signal here to avoid having to know the # root password. if test -s "$mysqld_pid_file_path" then mysqld_pid=`cat "$mysqld_pid_file_path"` if (kill -0 $mysqld_pid 2>/dev/null) then echo $echo_n "Shutting down MySQL" kill $mysqld_pid # mysqld should remove the pid file when it exits, so wait for it. wait_for_pid removed "$mysqld_pid" "$mysqld_pid_file_path"; return_value=$? else log_failure_msg "MySQL server process #$mysqld_pid is not running!" rm "$mysqld_pid_file_path" fi # Delete lock for RedHat / SuSE if test -f "$lock_file_path" then rm -f "$lock_file_path" fi exit $return_value else log_failure_msg "MySQL server PID file could not be found!" fi ;; 'restart') # Stop the service and regardless of whether it was # running or not, start it again. if $0 stop $other_args; then $0 start $other_args else log_failure_msg "Failed to stop running server, so refusing to try to start." exit 1 fi ;; 'reload'|'force-reload') if test -s "$mysqld_pid_file_path" ; then read mysqld_pid < "$mysqld_pid_file_path" kill -HUP $mysqld_pid && log_success_msg "Reloading service MySQL" touch "$mysqld_pid_file_path" else log_failure_msg "MySQL PID file could not be found!" exit 1 fi ;; 'status') # First, check to see if pid file exists if test -s "$mysqld_pid_file_path" ; then read mysqld_pid < "$mysqld_pid_file_path" if kill -0 $mysqld_pid 2>/dev/null ; then log_success_msg "MySQL running ($mysqld_pid)" exit 0 else log_failure_msg "MySQL is not running, but PID file exists" exit 1 fi else # Try to find appropriate mysqld process mysqld_pid=`pidof $libexecdir/mysqld` # test if multiple pids exist pid_count=`echo $mysqld_pid | wc -w` if test $pid_count -gt 1 ; then log_failure_msg "Multiple MySQL running but PID file could not be found ($mysqld_pid)" exit 5 elif test -z $mysqld_pid ; then if test -f "$lock_file_path" ; then log_failure_msg "MySQL is not running, but lock file ($lock_file_path) exists" exit 2 fi log_failure_msg "MySQL is not running" exit 3 else log_failure_msg "MySQL is running but PID file could not be found" exit 4 fi fi ;; *) # usage basename=`basename "$0"` echo "Usage: $basename {start|stop|restart|reload|force-reload|status} [ MySQL server options ]" exit 1 ;; esac exit 0View Code
首先,定義相關參數
basedir= datadir= # Default value, in seconds, afterwhich the script should timeout waiting # for server start. # Value here is overriden by value in my.cnf. # 0 means don't wait at all # Negative numbers mean to wait indefinitely service_startup_timeout=900 # Lock directory for RedHat / SuSE. lockdir='/var/lock/subsys' lock_file_path="$lockdir/mysql"
其中,
basedir 指的二進位壓縮包解壓後所在的目錄,譬如/usr/local/mysql。
datadir 指的是數據目錄
service_startup_timeout=900 定義mysql服務啟動的時間限制,如果在900s中沒有啟動成功,則該腳本會退出。
lockdir='/var/lock/subsys'
關於/var/lock/subsys,網上的解釋如下,後續會用到。
總的來說,系統關閉的過程(發出關閉信號,調用服務自身的進程)中會檢查/var/lock/subsys下的文件,逐一關閉每個服務,如果某一運行的服務在/var/lock/subsys下沒有相應的選項。在系統關閉的時候,會像殺死普通進程一樣殺死這個服務。 通過察看/etc/rc.d/init.d下的腳本,可以發現每個服務自己操縱時都會去查看/var/lock/subsys下相應的服務。 很多程式需要判斷是否當前已經有一個實例在運行,這個目錄就是讓程式判斷是否有實例運行的標誌,比如說xinetd,如果存在這個文件,表示已經有xinetd在運行了,否則就是沒有,當然程式裡面還要有相應的判斷措施來真正確定是否有實例在運行。通常與該目錄配套的還有/var/run目錄,用來存放對應實例的PID,如果你寫腳本的話,會發現這2個目錄結合起來可以很方便的判斷出許多服務是否在運行,運行的相關信息等等。
判斷basedir和datadir
# Set some defaults mysqld_pid_file_path= if test -z "$basedir" then basedir=/usr/local/mysql bindir=/usr/local/mysql/bin if test -z "$datadir" then datadir=/usr/local/mysql/data fi sbindir=/usr/local/mysql/bin libexecdir=/usr/local/mysql/bin else bindir="$basedir/bin" if test -z "$datadir" then datadir="$basedir/data" fi sbindir="$basedir/sbin" libexecdir="$basedir/libexec" fi
其中,
mysqld_pid_file_path 指定pid文件的路徑
-z string 判斷字元串是否為空
如果basedir沒有顯示設置,則預設為/usr/local/mysql,這也是為什麼很多mysql安裝教程都推薦將mysql相關文件放到/usr/local/mysql下。
如果datadir沒有顯示設置,則預設為$basedir/data。
定義log_success_msg()和log_failure_msg()函數
首先,判斷/lib/lsb/init-functions文件是否存在,如果存在,則使定義在init-functions文件中的所有shell函數在當前腳本中生效。
如果沒有,則定義兩個函數,一個用於列印成功日誌,一個是列印錯誤日誌。
在RHCS 6.7中,該文件並不存在,已被/etc/init.d/functions所替代。
# # Use LSB init script functions for printing messages, if possible # lsb_functions="/lib/lsb/init-functions" if test -f $lsb_functions ; then . $lsb_functions else log_success_msg() { echo " SUCCESS! $@" } log_failure_msg() { echo " ERROR! $@" } fi
傳遞參數
將第一個參數傳遞給mode,剩下的參數傳遞給other_args
PATH="/sbin:/usr/sbin:/bin:/usr/bin:$basedir/bin" export PATH mode=$1 # start or stop [ $# -ge 1 ] && shift other_args="$*" # uncommon, but needed when called from an RPM upgrade action # Expected: "--skip-networking --skip-grant-tables" # They are not checked here, intentionally, as it is the resposibility # of the "spec" file author to give correct arguments only. case `echo "testing\c"`,`echo -n testing` in *c*,-n*) echo_n= echo_c= ;; *c*,*) echo_n=-n echo_c= ;; *) echo_n= echo_c='\c' ;; esac
解析配置文件中的參數
這個函數在腳本後面會涉及到。
主要涉及如下參數:--basedir,--datadir,--pid-file,--service-startup-timeout。
parse_server_arguments() { for arg do case "$arg" in --basedir=*) basedir=`echo "$arg" | sed -e 's/^[^=]*=//'` bindir="$basedir/bin" if test -z "$datadir_set"; then datadir="$basedir/data" fi sbindir="$basedir/sbin" libexecdir="$basedir/libexec" ;; --datadir=*) datadir=`echo "$arg" | sed -e 's/^[^=]*=//'` datadir_set=1 ;; --pid-file=*) mysqld_pid_file_path=`echo "$arg" | sed -e 's/^[^=]*=//'` ;; --service-startup-timeout=*) service_startup_timeout=`echo "$arg" | sed -e 's/^[^=]*=//'` ;; esac done }
判斷my_print_defaults的位置
首先,它判斷當前路徑下的bin目錄中是否存在該可執行文件,如果不存在,則再判斷$bindir(通常指的是$basedir/bin)目錄下是否存在。
如果還是沒有,則會判斷/etc/my.cnf是否存在並且可讀,如果是,則判斷該配置文件中是否指定了basedir參數,
如果指定了,則取出該參數的值,並判斷該值對應的目錄中是否存在bin/my_print_defaults可執行文件
最後一步,如果在上述目錄中實在沒發現my_print_defaults文件,
索性就將print_defaults設置為"my_print_defaults",寄希望於該命令在當前的PATH環境中。
# Get arguments from the my.cnf file, # the only group, which is read from now on is [mysqld] if test -x ./bin/my_print_defaults then print_defaults="./bin/my_print_defaults" elif test -x $bindir/my_print_defaults then print_defaults="$bindir/my_print_defaults" elif test -x $bindir/mysql_print_defaults then print_defaults="$bindir/mysql_print_defaults" else # Try to find basedir in /etc/my.cnf conf=/etc/my.cnf print_defaults= if test -r $conf then subpat='^[^=]*basedir[^=]*=\(.*\)$' dirs=`sed -e "/$subpat/!d" -e 's//\1/' $conf` for d in $dirs do d=`echo $d | sed -e 's/[ ]//g'` if test -x "$d/bin/my_print_defaults" then print_defaults="$d/bin/my_print_defaults" break fi if test -x "$d/bin/mysql_print_defaults" then print_defaults="$d/bin/mysql_print_defaults" break fi done fi # Hope it's in the PATH ... but I doubt it test -z "$print_defaults" && print_defaults="my_print_defaults" fi
查找預設的配置文件
-r file 如果文件可讀,則為真
# # Read defaults file from 'basedir'. If there is no defaults file there # check if it's in the old (depricated) place (datadir) and read it from there # extra_args="" if test -r "$basedir/my.cnf" then extra_args="-e $basedir/my.cnf" else if test -r "$datadir/my.cnf" then extra_args="-e $datadir/my.cnf" fi fi
解析配置文件中的參數
my_print_defaults的用法如下:
my_print_defaults --defaults-file=example.cnf client mysql
即讀取配置文件中,client和mysql部分的參數配置,
具體在本腳本中,是讀取mysqld,server,mysql_server,mysql.server四個部分的配置參數。
parse_server_arguments `$print_defaults $extra_args mysqld server mysql_server mysql.server`
設置pid file的路徑
-z string 判斷字元串是否為空
如果--pid-file沒有在讀取到的配置文件中設置或者腳本剛開始的mysqld_pid_file_path參數沒有設置,
則pid file預設設置在datadir下,以主機名.pid命名。
如果該參數設置了,還需要進一步判斷
如果該參數中帶有斜杠,則代表給定的值帶有路徑,可直接使用。
如果該參數中沒帶路徑,則代表給定的值只是pid的文件名,可將其設在datadir下。
# # Set pid file if not given # if test -z "$mysqld_pid_file_path" then mysqld_pid_file_path=$datadir/`hostname`.pid else case "$mysqld_pid_file_path" in /* ) ;; * ) mysqld_pid_file_path="$datadir/$mysqld_pid_file_path" ;; esac fi
服務腳本start選項
首先,切換到$basedir中
其次,判斷$basedir/bin中的mysqld_safe是否是可執行文件,如果是,則啟動mysqld實例,如果不是,則報錯退出。
那麼,啟動流程又是如何實現的呢?
首先,執行$bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args >/dev/null 2>&1 &命令,啟動mysqld實例。
註意到沒有,mysqld_safe其實是在basedir中執行的,包括mysql初始化腳本mysql_install_db,也建議在basedir中執行,具體可參考:
分析MariaDB初始化腳本mysql_install_db
然後通過wait_for_pid函數進行判斷,具體可見下文對於wait_for_pid函數的分析
判斷完畢後,
查看$lockdir目錄是否可寫,可寫的話,則在目錄上創建一個文件。
case "$mode" in 'start') # Start daemon # Safeguard (relative paths, core dumps..) cd $basedir echo $echo_n "Starting MySQL" if test -x $bindir/mysqld_safe then # Give extra arguments to mysqld with the my.cnf file. This script # may be overwritten at next upgrade. $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args >/dev/null 2>&1 & wait_for_pid created "$!" "$mysqld_pid_file_path"; return_value=$? # Make lock for RedHat / SuSE if test -w "$lockdir" then touch "$lock_file_path" fi exit $return_value else log_failure_msg "Couldn't find MySQL server ($bindir/mysqld_safe)" fi ;;
wait_for_pid函數
在利用mysqld_safe啟動mysql實例後,會調用該參數
wait_for_pid created "$!" "$mysqld_pid_file_path"; return_value=$?
其中$!在shell中用於獲取最後運行的後臺Process的PID,具體在本例中,是mysqld_safe進程的pid。
因為第一個參數是created,所以會執行test -s "$pid_file_path" && i='' && break命令。
-s file 如果文件的長度不為零,則為真
該命令的意思是如果pid文件存在,則將變數i設置為空,並退出while迴圈。
然後執行如下判斷,
if test -z "$i" ; then log_success_msg return 0 else log_failure_msg return 1 fi
如果$i為空,則列印成功日誌,並退出腳本,很顯然,在pid文件存在的情況下,會將變數i設置為空。
再來看看pid文件不存在的情況
首先,會判斷$pid是否不為空(即if test -n "$pid")
如果不為空,則代表在執行完mysqld_safe後,已經捕捉到了該進程的pid。
在這種情況下,進一步通過kill -0 "$pid"確認該進程是否存在。
kill -0就是不發送任何信號,但是系統會進行錯誤檢查,所以經常用來檢查一個進程是否存在,當進程不存在時, kill -0 pid會返回錯誤
如果該進程存在,則不執行任何操作,直接跳到如下操作
echo $echo_n ".$echo_c" i=`expr $i + 1` sleep 1
將變數i加1,並sleep 1s。
然後,繼續while迴圈,之所以這樣做,是考慮到mysqld_safe已經執行,但是mysqld實例還在啟動過程中,還沒創建好pid文件。
一直到$1達到$service_startup_timeout定義的時長。
如果在while迴圈的過程中,通過kill -0 "$pid"判斷到進程已經不存在了,
則會再判斷一次,如果這次判斷的結果依舊是pid file不存在,且進程不存在,則會執行
log_failure_msg "The server quit without updating PID file ($pid_file_path)."
這就是大名鼎鼎的“The server quit without updating PID file”的由來。
wait_for_pid () { verb="$1" # created | removed pid="$2" # process ID of the program operating on the pid-file pid_file_path="$3" # path to the PID file. i=0 avoid_race_condition="by checking again" while test $i -ne $service_startup_timeout ; do case "$verb" in 'created') # wait for a PID-file to pop into existence. test -s "$pid_file_path" && i='' && break ;; 'removed') # wait for this PID-file to disappear test ! -s "$pid_file_path" && i='' && break ;; *) echo "wait_for_pid () usage: wait_for_pid created|removed pid pid_file_path" exit 1 ;; esac # if server isn't running, then pid-file will never be updated if test -n "$pid"; then if kill -0 "$pid" 2>/dev/null; then : # the server still runs else # The server may have exited between the last pid-file check and now. if test -n "$avoid_race_condition"; then avoid_race_condition="" continue # Check again. fi # there's nothing that will affect the file. log_failure_msg "The server quit without updating PID file ($pid_file_path)." return 1 # not waiting any more. fi fi echo $echo_n "