diff --git a/script/gs_dropnode b/script/gs_dropnode index 291bcd73739d73099a559685152af0a19c6bd84a..9f4635fe96ba8862a12462acdee179e5a3e4bbc5 100644 --- a/script/gs_dropnode +++ b/script/gs_dropnode @@ -137,6 +137,9 @@ General options: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G") if len(self.hostIpListForDel) == 0: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h") + # check if upgrade action is exist + if DefaultValue.isUnderUpgrade(self.user): + GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"]) try: pw_user = pwd.getpwnam(self.user) diff --git a/script/gs_expansion b/script/gs_expansion index 34bc040af230a41eade2e670393e4128a8e1ac75..f695a4e657360d0f260c99571b62c1620bb4e56c 100644 --- a/script/gs_expansion +++ b/script/gs_expansion @@ -21,6 +21,9 @@ import os import sys +import subprocess + +import socket package_path = os.path.dirname(os.path.realpath(__file__)) ld_path = package_path + "/gspylib/clib" if 'LD_LIBRARY_PATH' not in os.environ: @@ -61,6 +64,8 @@ class Expansion(ParallelBaseOM): self.newHostList = [] self.clusterInfoDict = {} self.backIpNameMap = {} + self.newHostCasRoleMap = {} + self.hostAzNameMap = {} self.packagepath = os.path.realpath( os.path.join(os.path.realpath(__file__), "../../")) @@ -89,7 +94,7 @@ General options: -V, --version Show version information. """ print(self.usage.__doc__) - + def parseCommandLine(self): """ parse parameter from command line @@ -123,50 +128,34 @@ General options: if (ParaDict.__contains__("nodename")): self.newHostList = ParaDict.get("nodename") - def checkParameters(self): """ function: Check parameter from command line input: NA output: NA """ - + # check user | group | xmlfile | node if len(self.user) == 0: - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U") + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U") if len(self.group) == 0: - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G") + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G") if len(self.xmlFile) == 0: - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X") + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X") if len(self.newHostList) == 0: - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h") - - clusterInfo = ExpansipnClusterInfo() + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h") + # check if upgrade action is exist + if DefaultValue.isUnderUpgrade(self.user): + GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"]) + + def _getClusterInfoDict(self): + clusterInfo = ExpansionClusterInfo() self.clusterInfo = clusterInfo hostNameIpDict = clusterInfo.initFromXml(self.xmlFile) clusterDict = clusterInfo.getClusterDirectorys() - backIpList = clusterInfo.getClusterBackIps() - nodeNameList = clusterInfo.getClusterNodeNames() + self.nodeNameList = clusterInfo.getClusterNodeNames() - # only support single az now. - azNames = clusterInfo.getazNames() - self.azName = "AZ1" - if len(azNames) > 0: - self.azName = azNames[0] - - self.localIp = backIpList[0] - self.nodeNameList = nodeNameList - self.backIpNameMap = {} - for backip in backIpList: - self.backIpNameMap[backip] = clusterInfo.getNodeNameByBackIp(backip) - - # check parameter node must in xml config file - for nodeid in self.newHostList: - if nodeid not in backIpList: - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \ - nodeid) - - # get corepath and toolpath from xml file + # get corepath and toolpath from xml file corePath = clusterInfo.readClustercorePath(self.xmlFile) toolPath = clusterInfo.getToolPath(self.xmlFile) # parse xml file and cache node info @@ -175,20 +164,16 @@ General options: clusterInfoDict["logPath"] = clusterDict["logPath"][0] clusterInfoDict["corePath"] = corePath clusterInfoDict["toolPath"] = toolPath - for nodeName in nodeNameList: + for nodeName in self.nodeNameList: hostInfo = hostNameIpDict[nodeName] ipList = hostInfo[0] portList = hostInfo[1] - backIp = "" - sshIp = "" - if len(ipList) == 1: - backIp = sshIp = ipList[0] - elif len(ipList) == 2: - backIp = ipList[0] - sshIp = ipList[1] + backIp = ipList[0] + sshIp = ipList[1] port = portList[0] cluster = clusterDict[nodeName] dataNode = cluster[2] + dbNode = clusterInfo.getDbNodeByName(nodeName) clusterInfoDict[nodeName] = { "backIp": backIp, "sshIp": sshIp, @@ -197,17 +182,17 @@ General options: "localservice": int(port) + 4, "heartBeatPort": int(port) + 3, "dataNode": dataNode, - "instanceType": -1 + "instanceType": -1, + "azPriority": dbNode.azPriority } - + nodeIdList = clusterInfo.getClusterNodeIds() for id in nodeIdList: insType = clusterInfo.getdataNodeInstanceType(id) hostName = clusterInfo.getHostNameByNodeId(id) clusterInfoDict[hostName]["instanceType"] = insType self.clusterInfoDict = clusterInfoDict - - + def initLogs(self): """ init log file @@ -223,12 +208,108 @@ General options: self.initLogger("gs_expansion") self.logger.ignoreErr = True + + def getExpansionInfo(self): + self._getClusterInfoDict() + self._getBackIpNameMap() + self._getHostAzNameMap() + self._getNewHostCasRoleMap() + + def checkXmlIncludeNewHost(self): + """ + check parameter node must in xml config file + """ + backIpList = self.clusterInfo.getClusterBackIps() + for nodeIp in self.newHostList: + if nodeIp not in backIpList: + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \ + nodeIp) -class ExpansipnClusterInfo(dbClusterInfo): + def _getBackIpNameMap(self): + backIpList = self.clusterInfo.getClusterBackIps() + for backip in backIpList: + self.backIpNameMap[backip] = \ + self.clusterInfo.getNodeNameByBackIp(backip) + + def checkExecutingUser(self): + """ + check whether current user executing this command is root + """ + if os.getuid() != 0: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"]) + + def checkExecutingHost(self): + """ + check whether current host is primary host + """ + currentHost = socket.gethostname() + primaryHost = "" + for nodeName in self.nodeNameList: + if self.clusterInfoDict[nodeName]["instanceType"] \ + == 0: + primaryHost = nodeName + break + if currentHost != primaryHost: + GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] % \ + (currentHost + ", which is not primary.")) + + def checkTrust(self, hostList = None): + """ + check trust between primary/current host and every host in hostList + """ + if hostList == None: + hostList = self.nodeNameList + rootSSHExceptionHosts = [] + individualSSHExceptionHosts = [] + sshTool = SshTool(hostList, timeout = 0) + retmap, output = sshTool.getSshStatusOutput("pwd") + for host in hostList: + # check root's trust + if retmap[host] != DefaultValue.SUCCESS: + rootSSHExceptionHosts.append(host) + try: + sshTool.clenSshResultFiles() + except Exception as e: + self.logger.debug(str(e)) + # check individual user's trust + checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, host) + (status, output) = subprocess.getstatusoutput(checkUserTrustCmd) + if status != 0: + individualSSHExceptionHosts.append(host) + # output ssh exception info if ssh connect failed + if rootSSHExceptionHosts or individualSSHExceptionHosts: + sshExceptionInfo = "" + if rootSSHExceptionHosts: + sshExceptionInfo += "\n" + sshExceptionInfo += ", ".join(rootSSHExceptionHosts) + sshExceptionInfo += " by root" + if individualSSHExceptionHosts: + sshExceptionInfo += "\n" + sshExceptionInfo += ", ".join(individualSSHExceptionHosts) + sshExceptionInfo += " by individual user" + GaussLog.exitWithError(ErrorCode.GAUSS_511["GAUSS_51100"] % + sshExceptionInfo) + + def _getHostAzNameMap(self): + """ + get azName of all hosts + """ + for dbnode in self.clusterInfo.dbNodes: + self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName + + def _getNewHostCasRoleMap(self): + """ + get cascadeRole of newHosts + """ + for dbnode in self.clusterInfo.dbNodes: + if dbnode.backIps[0] in self.newHostList: + self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole + +class ExpansionClusterInfo(dbClusterInfo): def __init__(self): dbClusterInfo.__init__(self) - + def getToolPath(self, xmlFile): """ function : Read tool path from default xml file @@ -246,13 +327,18 @@ class ExpansipnClusterInfo(dbClusterInfo): checkPathVaild(toolPath) return toolPath + if __name__ == "__main__": """ """ expansion = Expansion() + expansion.checkExecutingUser() expansion.parseCommandLine() expansion.checkParameters() expansion.initLogs() + expansion.getExpansionInfo() + expansion.checkXmlIncludeNewHost() + expansion.checkExecutingHost() + expansion.checkTrust() expImpl = ExpansionImpl(expansion) - expImpl.run() - + expImpl.run() \ No newline at end of file diff --git a/script/gs_preinstall b/script/gs_preinstall index 2bcf81ed1e90119f89f358d00d59794ea918e916..5cfdd2bfeab319b83c90303f0b74118812be7b0c 100644 --- a/script/gs_preinstall +++ b/script/gs_preinstall @@ -258,19 +258,6 @@ General options: "be a directory." % self.mpprcFile) - try: - # check the user if exist - DefaultValue.getUserId(self.user) - except Exception as e: - mpprcFileTopPath = os.path.dirname(self.mpprcFile) - # the mpprc file can not be specified in the /home/user directory - userpath = "/home/%s/" % self.user - if (mpprcFilePath.startswith(userpath)): - GaussLog.exitWithError( - ErrorCode.GAUSS_500["GAUSS_50004"] % '-sep-env-file' + \ - " Environment variable separation file can not be " - "created under %s." % mpprcFileTopPath) - DefaultValue.checkMpprcFileChange(self.mpprcFile, "", self.mpprcFile) (checkstatus, checkoutput) = DefaultValue.checkEnvFile(self.mpprcFile) if (not checkstatus): diff --git a/script/gs_upgradectl b/script/gs_upgradectl index f4cddf5ac97f2ee7385b3adf54f591cf0fa998b7..7c4edd8d02e2386219dd8d51eb06e771473d3840 100644 --- a/script/gs_upgradectl +++ b/script/gs_upgradectl @@ -96,9 +96,7 @@ Usage: gs_upgradectl -t chose-strategy [-l LOGFILE] gs_upgradectl -t commit-upgrade -X XMLFILE [-l LOGFILE] - gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE] - gs_upgradectl -t auto-upgrade -X XMLFILE --grey [-l LOGFILE] - {-h HOSTNAME | -g NODENUMBER | --continue} + gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE] [--grey] gs_upgradectl -t auto-rollback -X XMLFILE [-l LOGFILE] [--force] General options: @@ -112,13 +110,8 @@ General options: later version cluster. --force Force to rollback when cluster status is not normal + --grey Use grey-binary-upgrade -Options for grey upgrade - -h Under grey upgrade, specified nodes name in - ssh mode. - -g Under grey upgrade, upgrade node numbers. - --continue Under grey upgrade, continue to upgrade - remain nodes. """ print(self.usage.__doc__) @@ -145,12 +138,6 @@ Options for grey upgrade if "grey" in ParaDict.keys(): self.is_grey_upgrade = True self.is_inplace_upgrade = False - if "nodename" in ParaDict.keys(): - self.nodeNames = ParaDict.get("nodename") - if "nodesNum" in ParaDict.keys(): - self.nodesNum = ParaDict.get("nodesNum") - if "continue" in ParaDict.keys(): - self.upgrade_remain = True if "force" in ParaDict.keys(): self.forceRollback = True @@ -203,14 +190,6 @@ Options for grey upgrade if not os.path.exists(self.xmlFile): raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % self.xmlFile) - - # check parameter base on different action - # check the param which input - if self.action == Const.ACTION_AUTO_UPGRADE: - if self.is_grey_upgrade: - self.checkCommandConflicts(inplace=False) - else: - self.checkCommandConflicts() # check mpprc file path # get mpprcFile by MPPDB_ENV_SEPARATE_PATH. Even if the return value # is "" or None, no need to pay attention @@ -285,30 +264,6 @@ Options for grey upgrade self.logger.debug("Retry distributing xml command, " "the {0} time.".format(count)) - def checkCommandConflicts(self, inplace=True): - """ - function: check the command line for conflict input - :return: - """ - conflictPara = 0 - if self.upgrade_remain: - conflictPara += 1 - if len(self.nodeNames) != 0: - conflictPara += 1 - if self.nodesNum != -1: - conflictPara += 1 - if inplace: - if conflictPara > 0: - raise Exception("The parameter %s should be used in grey " - "upgrade" % "'-continue, -h, -g'") - else: - if conflictPara > 1: - GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50005"] % ( - "-continue", "-h, -g")) - if conflictPara == 0: - GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % - "-continue, -h, -g") - if __name__ == '__main__': """ diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 7aaac20d0f320ea5d956a01f3e956068c3873b2c..6e3fdf04807665d9cef0cdcd3307ee7de56576a8 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -1453,7 +1453,7 @@ class dbClusterInfo(): dnInst.azName) if dnInst.localRole == "Primary": outText = outText + ( - "static_connections : %s\n\n" % + "static_connections : %s\n" % dnInst.staticConnections) outText = outText + ( "HA_state : %s\n" % @@ -1462,14 +1462,14 @@ class dbClusterInfo(): "instance_role : %s\n" % dnInst.localRole) if dnInst.localRole == "Primary": - outText = outText + "------------------------" \ + outText = outText + "\n------------------------" \ "---------------" \ "--------------------------------\n\n" continue for i_loop in syncInfo: - if i_loop[11] == '': - i_loop[11] = 'Unknown' if i_loop[0] == dnInst.listenIps[0]: + if i_loop[11] == '': + i_loop[11] = 'Unknown' outText = outText + ( "HA_state : %s\n" % i_loop[1]) @@ -1507,11 +1507,10 @@ class dbClusterInfo(): outText = outText + ( "upstream_nodeIp : %s\n" % i_loop[12]) - outText = outText + ("\n") - outText = outText + "------------------------" \ - "---------------" \ - "--------------------------------\n\n" break + outText = outText + "\n------------------------" \ + "---------------" \ + "--------------------------------\n\n" if nodeId != 0: break else: @@ -1861,7 +1860,6 @@ class dbClusterInfo(): "receiver_received_location, receiver_write_location," \ "receiver_flush_location, receiver_replay_location," \ "sync_percent, channel from pg_stat_get_wal_receiver();" - cascadeOutput = "" if dbNode.name != localHostName: cmd = "[need_replace_quotes] gsql -m -d postgres -p " \ "%s -A -t -c \"%s\"" % \ @@ -1872,7 +1870,7 @@ class dbClusterInfo(): "failed to connect") >= 0: continue else: - output = cascadeOutput.split('\n')[1:-1] + cascadeOutput = cascadeOutput.split('\n')[1:-1] else: cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % ( dnInst.port, subsql) @@ -1962,9 +1960,7 @@ class dbClusterInfo(): with open(fileName, "a") as fp: fp.write(content) fp.flush() - - else: - sys.stdout.write(content) + sys.stdout.write(content) def __checkOsUser(self, user): """ @@ -3188,36 +3184,24 @@ class dbClusterInfo(): "with cm and etcd") + errMsg) # create a dictionary nodeipport[dbNode.name] = [nodeips, nodeports] - # delete redundant records - self.__Deduplication(nodeports) - self.__Deduplication(nodeips) # check port and ip self.__checkPortandIP(nodeips, nodeports, dbNode.name) return nodeipport - def __Deduplication(self, currentlist): - """ - function : Delete the deduplication. - input : [] - output : NA - """ - currentlist.sort() - for i in range(len(currentlist) - 2, -1, -1): - if currentlist.count(currentlist[i]) > 1: - del currentlist[i] - def __checkPortandIP(self, ips, ports, name): """ function : Check port and IP. input : String,int,string output : NA """ - for port in ports: + ipsCopy = list(set(ips)) + portsCopy = list(set(ports)) + for port in portsCopy: if (not self.__isPortValid(port)): raise Exception(ErrorCode.GAUSS_512["GAUSS_51233"] % (port, name) + " Please check it.") - for ip in ips: + for ip in ipsCopy: if (not self.__isIpValid(ip)): raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \ "The IP address is: %s." % ip + " Please " diff --git a/script/gspylib/common/ErrorCode.py b/script/gspylib/common/ErrorCode.py index 4f5ca1c05f49270ebcb801a8a7eb50000f8529bd..ad38a224cd21c34b588b3b1089f4c3e8763663d5 100644 --- a/script/gspylib/common/ErrorCode.py +++ b/script/gspylib/common/ErrorCode.py @@ -1112,8 +1112,8 @@ class ErrorCode(): "detail.", "GAUSS_35704": "[GAUSS-35704] %s [%s] does not exist on node [%s].", "GAUSS_35705": "[GAUSS-35705] Error, the database version is " - "inconsistent in %s: %s" - + "inconsistent in %s: %s", + "GAUSS_35706": "[GAUSS-35706] All new hosts %s failed." } ########################################################################## diff --git a/script/gspylib/common/ParameterParsecheck.py b/script/gspylib/common/ParameterParsecheck.py index 1f2137e6ab004749b74d54b33d58213aa9dc356a..896609243ec00fb887994b3b730ea681211971ad 100644 --- a/script/gspylib/common/ParameterParsecheck.py +++ b/script/gspylib/common/ParameterParsecheck.py @@ -130,7 +130,7 @@ gs_upgradectl_chose_strategy = ["-t:", "-?", "--help", "-V", "--version", "-l:"] # auto-upgrade parameter lists gs_upgradectl_auto_upgrade = ["-t:", "-?", "--help", "-V", "--version", "-l:", - "-X:", "--grey", "-h:", "-g:", "--continue"] + "-X:", "--grey"] # auto-rollback parameter lists gs_upgradectl_auto_rollback = ["-t:", "-?", "--help", "-V", "--version", "-l:", "-X:", "--force"] diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index d2a85d94039ab79108352c4b549c016358c4600d..0fe15f1b2f6d3abdf8182af398c2df7910993ac5 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -129,7 +129,14 @@ class Kernel(BaseComponent): raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % "instance" + " Error: \n%s." % output) if output.find("No such process") > 0: - GaussLog.exitWithError(output) + cmd = "ps c -eo pid,euid,cmd | grep gaussdb | grep -v grep | " \ + "awk '{if($2 == curuid && $1!=\"-n\") " \ + "print \"/proc/\"$1\"/cwd\"}' curuid=`id -u`|" \ + " xargs ls -l |awk '{if ($NF==\"%s\") print $(NF-2)}' | " \ + "awk -F/ '{print $3 }'" % (self.instInfo.datadir) + (status, rightpid) = subprocess.getstatusoutput(cmd) + if rightpid or status != 0: + GaussLog.exitWithError(output) def isPidFileExist(self): pidFile = "%s/postmaster.pid" % self.instInfo.datadir diff --git a/script/gspylib/inspection/lib/checknetspeed/speed_test b/script/gspylib/inspection/lib/checknetspeed/speed_test index 1607f8f1ed6bd9f9097683c096020a4dabeff588..6505de566f1f03af1ee79e1af3b05353c9b917f2 100644 --- a/script/gspylib/inspection/lib/checknetspeed/speed_test +++ b/script/gspylib/inspection/lib/checknetspeed/speed_test @@ -25,45 +25,51 @@ listen_port = 31111 run_mode = 0 # 0:connect, 1:send, 2:recv def send_main(): - global listen_ip - global listen_port - buf = "this is a test !" * 512 # buf 8192 block - sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) - print(listen_ip+":"+listen_port) - while(sockets.connect_ex((listen_ip, int(listen_port))) != 0): - print("connect failed:%m\n") - time.sleep(1) - print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp") - print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp") - i = 0 - while True: - i = i + 1 - n = sockets.send(buf.encode()) - if n == 0: - print("send failed:%m\n") - break - print("%d send:%s, len=%d\n", i, buf, n) + try: + global listen_ip + global listen_port + buf = "this is a test !" * 512 # buf 8192 block + sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + print(listen_ip+":"+listen_port) + while(sockets.connect_ex((listen_ip, int(listen_port))) != 0): + print("connect failed:%m\n") + time.sleep(1) + print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp") + print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp") + i = 0 + while True: + i = i + 1 + n = sockets.send(buf.encode()) + if n == 0: + print("send failed:%m\n") + break + print("%d send:%s, len=%d\n", i, buf, n) + except Exception as e: + print(str(e)) def recv_main(): - global listen_ip - global listen_port - sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) - sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) - sockets.bind((listen_ip, int(listen_port))) - sockets.listen(128) - while True: - client, addr = sockets.accept() - print('client:', client) - print('addr:', addr) + try: + global listen_ip + global listen_port + sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) + sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + sockets.bind((listen_ip, int(listen_port))) + sockets.listen(128) while True: - data = client.recv(8192) - print(data.decode()) - if not data: - client.close() - break + client, addr = sockets.accept() + print('client:', client) + print('addr:', addr) + while True: + data = client.recv(8192) + print(data.decode()) + if not data: + client.close() + break + except Exception as e: + print(str(e)) def connect_main(): sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM) diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py index 40375e63cacf10023076080dd427cdd782d4c0e5..495ec08b4dbd70ba0addaed76adc2a813e9bd163 100644 --- a/script/impl/expansion/ExpansionImpl.py +++ b/script/impl/expansion/ExpansionImpl.py @@ -46,10 +46,17 @@ DefaultValue.doConfigForParamiko() import paramiko -#mode +#boot/build mode MODE_PRIMARY = "primary" MODE_STANDBY = "standby" MODE_NORMAL = "normal" +MODE_CASCADE = "cascade_standby" + +# instance local_role +ROLE_NORMAL = "normal" +ROLE_PRIMARY = "primary" +ROLE_STANDBY = "standby" +ROLE_CASCADE = "cascade standby" #db state STAT_NORMAL = "normal" @@ -78,7 +85,10 @@ class ExpansionImpl(): self.user = self.context.user self.group = self.context.group - + self.existingHosts = [] + self.expansionSuccess = {} + for newHost in self.context.newHostList: + self.expansionSuccess[newHost] = False self.logger = self.context.logger envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH") @@ -102,32 +112,26 @@ class ExpansionImpl(): """ create software dir and send it on each nodes """ - self.logger.debug("Start to send soft to each standby nodes.\n") - hostNames = self.context.newHostList - hostList = hostNames - - sshTool = SshTool(hostNames) - + self.logger.log("Start to send soft to each standby nodes.") srcFile = self.context.packagepath - targetDir = os.path.realpath( - os.path.join(srcFile, "../")) - - ## mkdir package dir and send package to remote nodes. - sshTool.executeCommand("mkdir -p %s" % srcFile , "", DefaultValue.SUCCESS, - hostList) - sshTool.scpFiles(srcFile, targetDir, hostList) - - ## change mode of package dir to set privileges for users + targetDir = os.path.realpath(os.path.join(srcFile, "../")) + # change mode of package dir to set privileges for users tPathList = os.path.split(targetDir) path2ChangeMode = targetDir if len(tPathList) > 2: path2ChangeMode = os.path.join(tPathList[0],tPathList[1]) - changeModCmd = "chmod -R a+x {srcFile}".format(user=self.user, - group=self.group,srcFile=path2ChangeMode) - sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS, - hostList) - self.logger.debug("End to send soft to each standby nodes.\n") - self.cleanSshToolFile(sshTool) + changeModCmd = "chmod -R a+x {srcFile}".format(user = self.user, + group = self.group, srcFile = path2ChangeMode) + for host in self.context.newHostList: + sshTool = SshTool([host], timeout = 300) + # mkdir package dir and send package to remote nodes. + sshTool.executeCommand("mkdir -p %s" % srcFile , "", + DefaultValue.SUCCESS, [host]) + sshTool.scpFiles(srcFile, targetDir, [host]) + sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS, + [host]) + self.cleanSshToolFile(sshTool) + self.logger.log("End to send soft to each standby nodes.") def generateAndSendXmlFile(self): """ @@ -151,7 +155,7 @@ class ExpansionImpl(): fo.write( xmlContent ) fo.close() # send single deploy xml file to each standby node - sshTool = SshTool(host) + sshTool = SshTool([host]) retmap, output = sshTool.getSshStatusOutput("mkdir -p %s" % self.tempFileDir , [host], self.envFile) retmap, output = sshTool.getSshStatusOutput("chown %s:%s %s" % @@ -167,6 +171,7 @@ class ExpansionImpl(): """ nodeName = self.context.backIpNameMap[backIp] nodeInfo = self.context.clusterInfoDict[nodeName] + clusterName = self.context.clusterInfo.name backIp = nodeInfo["backIp"] sshIp = nodeInfo["sshIp"] @@ -181,12 +186,14 @@ class ExpansionImpl(): tmpMppdbPath = DefaultValue.getEnv("PGHOST") if tmpMppdbPath: mppdbconfig = '' % tmpMppdbPath + azName = self.context.hostAzNameMap[backIp] + azPriority = nodeInfo["azPriority"] xmlConfig = """\ - + @@ -197,10 +204,10 @@ class ExpansionImpl(): - + - + @@ -210,10 +217,10 @@ class ExpansionImpl(): - """.format(nodeName=nodeName,backIp=backIp,appPath=appPath, - logPath=logPath,toolPath=toolPath,corePath=corePath, - sshIp=sshIp,port=port,dataNode=dataNode,azName=self.context.azName, - mappdbConfig=mppdbconfig) + """.format(clusterName = clusterName, nodeName = nodeName, backIp = backIp, + appPath = appPath, logPath = logPath, toolPath = toolPath, corePath = corePath, + sshIp = sshIp, port = port, dataNode = dataNode, azName = azName, + azPriority = azPriority, mappdbConfig = mppdbconfig) return xmlConfig def changeUser(self): @@ -247,36 +254,62 @@ class ExpansionImpl(): self.logger.log("Authentication failed.") self.initSshConnect(host, user) + def hasNormalStandbyInAZOfCascade(self, cascadeIp, existingStandbys): + # check whether there are normal standbies in hostAzNameMap[cascadeIp] azZone + hasStandbyWithSameAZ = False + hostAzNameMap = self.context.hostAzNameMap + for existingStandby in existingStandbys: + existingStandbyName = self.context.backIpNameMap[existingStandby] + existingStandbyDataNode = \ + self.context.clusterInfoDict[existingStandbyName]["dataNode"] + insType, dbStat = self.commonGsCtl.queryInstanceStatus( + existingStandby, existingStandbyDataNode, self.envFile) + if dbStat != STAT_NORMAL: + continue + if hostAzNameMap[cascadeIp] != hostAzNameMap[existingStandby]: + continue + hasStandbyWithSameAZ = True + break + return hasStandbyWithSameAZ + def installDatabaseOnHosts(self): """ install database on each standby node """ - hostList = self.context.newHostList - envfile = self.envFile + standbyHosts = self.context.newHostList tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir - installCmd = "source {envfile} ; gs_install -X {xmlfile} \ - 2>&1".format(envfile=envfile,xmlfile=tempXmlFile) - - statusArr = [] - - for newHost in hostList: - - self.logger.log("\ninstalling database on node %s:" % newHost) - self.logger.debug(installCmd) - + installCmd = "source {envFile} ; gs_install -X {xmlFile} "\ + "2>&1".format(envFile = self.envFile, xmlFile = tempXmlFile) + self.logger.debug(installCmd) + primaryHostName = self.getPrimaryHostName() + primaryHostIp = self.context.clusterInfoDict[primaryHostName]["backIp"] + existingStandbys = list(set(self.existingHosts) - (set([primaryHostIp]))) + failedInstallHosts = [] + notInstalledCascadeHosts = [] + for newHost in standbyHosts: + if not self.expansionSuccess[newHost]: + continue + self.logger.log("Installing database on node %s:" % newHost) hostName = self.context.backIpNameMap[newHost] sshIp = self.context.clusterInfoDict[hostName]["sshIp"] + if self.context.newHostCasRoleMap[newHost] == "on": + # check whether there are normal standbies in hostAzNameMap[host] azZone + hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(newHost, + existingStandbys) + if not hasStandbyWithSameAZ: + notInstalledCascadeHosts.append(newHost) + self.expansionSuccess[newHost] = False + continue self.initSshConnect(sshIp, self.user) - stdin, stdout, stderr = self.sshClient.exec_command(installCmd, - get_pty=True) + get_pty=True) channel = stdout.channel echannel = stderr.channel while not channel.exit_status_ready(): try: recvOut = channel.recv(1024) - outDecode = recvOut.decode("utf-8"); + outDecode = recvOut.decode("utf-8") outStr = outDecode.strip() if(len(outStr) == 0): continue @@ -309,187 +342,200 @@ class ExpansionImpl(): not channel.recv_ready(): channel.close() break - stdout.close() stderr.close() - status = channel.recv_exit_status() - statusArr.append(status) - - isBothSuccess = True - for status in statusArr: - if status != 0: - isBothSuccess = False - break - if isBothSuccess: - self.logger.log("\nSuccessfully install database on node %s" % - hostList) - else: - sys.exit(1) + if channel.recv_exit_status() != 0: + self.expansionSuccess[newHost] = False + failedInstallHosts.append(newHost) + else: + if self.context.newHostCasRoleMap[newHost] == "off": + existingStandbys.append(newHost) + self.logger.log("%s install success." % newHost) + if notInstalledCascadeHosts: + self.logger.log("OpenGauss won't be installed on cascade_standby" + " %s, because there is no Normal standby in the same azZone." % + ", ".join(notInstalledCascadeHosts)) + if failedInstallHosts: + self.logger.log(ErrorCode.GAUSS_527["GAUSS_52707"] % + ", ".join(failedInstallHosts)) + self.logger.log("Finish to install database on all nodes.") + if self._isAllFailed(): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "install") def preInstallOnHosts(self): """ execute preinstall step """ - self.logger.debug("Start to preinstall database step.\n") - newBackIps = self.context.newHostList - newHostNames = [] - for host in newBackIps: - newHostNames.append(self.context.backIpNameMap[host]) - envfile = self.envFile + self.logger.log("Start to preinstall database step.") tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir - userpath = pwd.getpwnam(self.user).pw_dir - mpprcFile = os.path.join(userpath, ".bashrc") - if envfile == mpprcFile: - preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \ - -X {xmlfile} --non-interactive 2>&1\ - ".format(softpath=self.context.packagepath,user=self.user, - group=self.group,xmlfile=tempXmlFile) + if not DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\ + "-X {xmlFile} --non-interactive 2>&1".format( + softPath = self.context.packagepath, user = self.user, + group = self.group, xmlFile = tempXmlFile) else: - preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \ - -X {xmlfile} --sep-env-file={envfile} \ - --non-interactive 2>&1\ - ".format(softpath=self.context.packagepath,user=self.user, - group=self.group,xmlfile=tempXmlFile,envfile=envfile) - - sshTool = SshTool(newHostNames) - - status, output = sshTool.getSshStatusOutput(preinstallCmd , [], envfile) - statusValues = status.values() - if STATUS_FAIL in statusValues: - GaussLog.exitWithError(output) - - self.logger.debug("End to preinstall database step.\n") - self.cleanSshToolFile(sshTool) - + preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\ + "-X {xmlFile} --sep-env-file={envFile} --non-interactive 2>&1".format( + softPath = self.context.packagepath, user = self.user, + group = self.group, xmlFile = tempXmlFile, envFile = self.envFile) + + failedPreinstallHosts = [] + for host in self.context.newHostList: + sshTool = SshTool([host], timeout = 300) + resultMap, output = sshTool.getSshStatusOutput(preinstallCmd, [], self.envFile) + if resultMap[host] == DefaultValue.SUCCESS: + self.expansionSuccess[host] = True + self.logger.log("Preinstall %s success" % host) + else: + failedPreinstallHosts.append(host) + self.cleanSshToolFile(sshTool) + if failedPreinstallHosts: + self.logger.log("Failed to preinstall on: \n%s" % ", ".join(failedPreinstallHosts)) + self.logger.log("End to preinstall database step.") def buildStandbyRelation(self): """ func: after install single database on standby nodes. build the relation with primary and standby nodes. step: - 1. restart primary node with Primary Mode - (only used to Single-Node instance) - 2. set guc config to primary node - 3. restart standby node with Standby Mode - 4. set guc config to standby node - 5. generate cluster static file and send to each node. - """ - self.queryPrimaryClusterDetail() - self.setPrimaryGUCConfig() - self.setStandbyGUCConfig() - self.addTrustOnExistNodes() - self.restartSingleDbWithPrimaryMode() + 1. set all nodes' guc config parameter: replconninfo, available_zone(only for new) + 2. add trust on all hosts + 3. generate GRPC cert on new hosts, and primary if current cluster is single instance + 4. build new hosts : + (1) restart new instance with standby mode + (2) build new instances + 5. rollback guc config of existing hosts if build failed + 6. generate cluster static file and send to each node. + """ + self.setGucConfig() + self.addTrust() + self.generateGRPCCert() self.buildStandbyHosts() + self.rollback() self.generateClusterStaticFile() - def queryPrimaryClusterDetail(self): + def getExistingHosts(self): """ - get current cluster type. - single-node or primary-standby + get the exiting hosts """ - self.logger.debug("Query primary database instance mode.\n") - self.isSingleNodeInstance = True + self.logger.debug("Get the existing hosts.") primaryHost = self.getPrimaryHostName() - result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile) - instance = re.findall(r"node\s+node_ip\s+instance\s+state", result) - if len(instance) > 1: - self.isSingleNodeInstance = False - self.logger.debug("Original instance mode is %s" % - self.isSingleNodeInstance) + command = "" + if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + command = "su - %s -c 'source %s;gs_om -t status --detail'" % \ + (self.user, self.envFile) + else: + command = "su - %s -c 'source /etc/profile;source /home/%s/.bashrc;"\ + "gs_om -t status --detail'" % (self.user, self.user) + sshTool = SshTool([primaryHost]) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [primaryHost], self.envFile) + self.logger.debug(outputCollect) + if resultMap[primaryHost] != DefaultValue.SUCCESS: + GaussLog.exitWithError("Unable to query current cluster state.") + instances = re.split('(?:\|)|(?:\n)', outputCollect) + self.existingHosts = [] + pattern = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*') + for inst in instances: + existingHosts = pattern.findall(inst) + if len(existingHosts) != 0: + self.existingHosts.append(existingHosts[0]) - def setPrimaryGUCConfig(self): + def setGucConfig(self): """ + set replconninfo on all hosts """ - self.logger.debug("Start to set primary node GUC config.\n") - primaryHost = self.getPrimaryHostName() + self.logger.debug("Start to set GUC config on all hosts.\n") + gucDict = self.getGUCConfig() + tempShFile = "%s/guc.sh" % self.tempFileDir + hostIpList = list(self.existingHosts) + for host in self.expansionSuccess: + hostIpList.append(host) - self.setGUCOnClusterHosts([primaryHost]) - self.addStandbyIpInPrimaryConf() - - - def setStandbyGUCConfig(self): - """ - set the expansion standby node db guc config - """ - self.logger.debug("Stat to set standby node GUC config.\n") - nodeList = self.context.nodeNameList - primaryHost = self.getPrimaryHostName() - standbyHostNames = list(set(nodeList).difference(set([primaryHost]))) - self.setGUCOnClusterHosts(standbyHostNames) - - def addTrustOnExistNodes(self): - """ - add host trust in pg_hba.conf on existing standby node. - """ - self.logger.debug("Start to set host trust on existing node.") - allNodeNames = self.context.nodeNameList - newNodeIps = self.context.newHostList - newNodeNames = [] - trustCmd = [] - for node in newNodeIps: - nodeName = self.context.backIpNameMap[node] - newNodeNames.append(nodeName) - cmd = 'host all all %s/32 trust' % node - trustCmd.append(cmd) - existNodes = list(set(allNodeNames).difference(set(newNodeNames))) - for node in existNodes: - dataNode = self.context.clusterInfoDict[node]["dataNode"] - cmd = "" - for trust in trustCmd: - cmd += "source %s; gs_guc set -D %s -h '%s';" % \ - (self.envFile, dataNode, trust) - sshTool = SshTool([node]) - resultMap, outputCollect = sshTool.getSshStatusOutput(cmd, - [node], self.envFile) + nodeDict = self.context.clusterInfoDict + backIpNameMap = self.context.backIpNameMap + hostAzNameMap = self.context.hostAzNameMap + for host in hostIpList: + hostName = backIpNameMap[host] + # set Available_zone for the new standby + if host in self.context.newHostList: + dataNode = nodeDict[hostName]["dataNode"] + gucDict[hostName] += """\ +gs_guc set -D {dn} -c "available_zone='{azName}'" + """.format(dn=dataNode, azName=hostAzNameMap[host]) + command = "source %s ; " % self.envFile + gucDict[hostName] + self.logger.debug("[%s] gucCommand:%s" % (host, command)) + + sshTool = SshTool([host]) + # create temporary dir to save guc command bashfile. + mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \ + (self.tempFileDir, self.user, self.group, self.tempFileDir) + sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile) + subprocess.getstatusoutput("touch %s; cat /dev/null > %s" % + (tempShFile, tempShFile)) + with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT, + stat.S_IWUSR | stat.S_IRUSR), 'w') as fo: + fo.write("#bash\n") + fo.write(command) + fo.close() + + # send guc command bashfile to each host and execute it. + sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host], + self.envFile) + resultMap, outputCollect = sshTool.getSshStatusOutput( + "sh %s" % tempShFile, [host], self.envFile) + + self.logger.debug(outputCollect) self.cleanSshToolFile(sshTool) - self.logger.debug("End to set host trust on existing node.") - - def restartSingleDbWithPrimaryMode(self): + + def addTrust(self): + """ + add authentication rules about new host ip in existing hosts and + add authentication rules about other all hosts ip in new hosts + """ + self.logger.debug("Start to set host trust on all node.") + allHosts = self.existingHosts + self.context.newHostList + for hostExec in allHosts: + hostExecName = self.context.backIpNameMap[hostExec] + dataNode = self.context.clusterInfoDict[hostExecName]["dataNode"] + cmd = "source %s;gs_guc set -D %s" % (self.envFile, dataNode) + if hostExec in self.existingHosts: + for hostParam in self.context.newHostList: + cmd += " -h 'host all all %s/32 trust'" % \ + hostParam + else: + for hostParam in allHosts: + if hostExec != hostParam: + cmd += " -h 'host all all %s/32 trust'" % \ + hostParam + self.logger.debug("[%s] trustCmd:%s" % (hostExec, cmd)) + sshTool = SshTool([hostExec]) + resultMap, outputCollect = sshTool.getSshStatusOutput(cmd, + [hostExec], self.envFile) + self.cleanSshToolFile(sshTool) + self.logger.debug("End to set host trust on all node.") + + def generateGRPCCert(self): """ + generate GRPC cert for single node """ primaryHost = self.getPrimaryHostName() dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"] - + needGRPCHosts = [] + for host in self.expansionSuccess: + if self.expansionSuccess[host]: + needGRPCHosts.append(host) insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost, - dataNode,self.envFile) + dataNode,self.envFile) if insType != MODE_PRIMARY: - self.commonGsCtl.stopInstance(primaryHost, dataNode, self.envFile) - self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode, - MODE_PRIMARY,self.envFile) - - # start db to primary state for three times max - start_retry_num = 1 - while start_retry_num <= 3: - insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost, - dataNode, self.envFile) - if insType == MODE_PRIMARY: - break - self.logger.debug("Start database as Primary mode failed, \ -retry for %s times" % start_retry_num) - self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode, - MODE_PRIMARY, self.envFile) - start_retry_num = start_retry_num + 1 - - def addStandbyIpInPrimaryConf(self): - """ - add standby hosts ip in primary node pg_hba.conf - """ - - standbyHosts = self.context.newHostList - primaryHost = self.getPrimaryHostName() - command = '' - for host in standbyHosts: - hostName = self.context.backIpNameMap[host] - dataNode = self.context.clusterInfoDict[hostName]["dataNode"] - command += ("source %s; gs_guc set -D %s -h 'host all all %s/32 " + \ - "trust';") % (self.envFile, dataNode, host) - self.logger.debug(command) - sshTool = SshTool([primaryHost]) - resultMap, outputCollect = sshTool.getSshStatusOutput(command, - [primaryHost], self.envFile) - self.logger.debug(outputCollect) - self.cleanSshToolFile(sshTool) + primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"] + needGRPCHosts.append(primaryHostIp) + self.logger.debug("Start to generate GRPC cert.") + if needGRPCHosts: + self.context.initSshTool(needGRPCHosts) + self.context.createGrpcCa(needGRPCHosts) + self.logger.debug("End to generate GRPC cert.") def reloadPrimaryConf(self): """ @@ -500,7 +546,7 @@ retry for %s times" % start_retry_num) sshTool = SshTool([primaryHost]) self.logger.debug(command) resultMap, outputCollect = sshTool.getSshStatusOutput(command, - [primaryHost], self.envFile) + [primaryHost], self.envFile) self.logger.debug(outputCollect) self.cleanSshToolFile(sshTool) @@ -510,7 +556,7 @@ retry for %s times" % start_retry_num) primaryHost = "" for nodeName in self.context.nodeNameList: if self.context.clusterInfoDict[nodeName]["instanceType"] \ - == MASTER_INSTANCE: + == MASTER_INSTANCE: primaryHost = nodeName break return primaryHost @@ -520,56 +566,121 @@ retry for %s times" % start_retry_num) """ stop the new standby host`s database and build it as standby mode """ - self.logger.debug("start to build standby node...\n") + self.logger.debug("Start to build new nodes.") standbyHosts = self.context.newHostList + hostAzNameMap = self.context.hostAzNameMap + primaryHostName = self.getPrimaryHostName() + primaryHost = self.context.clusterInfoDict[primaryHostName]["backIp"] + existingStandbys = list(set(self.existingHosts).difference(set([primaryHost]))) + primaryDataNode = self.context.clusterInfoDict[primaryHostName]["dataNode"] + self.reloadPrimaryConf() + time.sleep(10) + insType, dbStat = self.commonGsCtl.queryInstanceStatus( + primaryHost, primaryDataNode, self.envFile) + primaryExceptionInfo = "" + if insType != ROLE_PRIMARY: + primaryExceptionInfo = "The server mode of primary host" \ + "is not primary." + if dbStat != STAT_NORMAL: + primaryExceptionInfo = "The primary is not in Normal state." + if primaryExceptionInfo != "": + self.rollback() + GaussLog.exitWithError(primaryExceptionInfo) for host in standbyHosts: + if not self.expansionSuccess[host]: + continue hostName = self.context.backIpNameMap[host] dataNode = self.context.clusterInfoDict[hostName]["dataNode"] - + buildMode = "" + hostRole = "" + if self.context.newHostCasRoleMap[host] == "on": + buildMode = MODE_CASCADE + hostRole = ROLE_CASCADE + # check whether there are normal standbies in hostAzNameMap[host] azZone + hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(host, + existingStandbys) + if not hasStandbyWithSameAZ: + self.logger.log("There is no Normal standby in %s" % + hostAzNameMap[host]) + self.expansionSuccess[host] = False + continue + else: + buildMode = MODE_STANDBY + hostRole = ROLE_STANDBY + self.logger.log("Start to build %s %s." % (hostRole, host)) self.checkTmpDir(hostName) - + # start new host as standby mode self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile) - self.commonGsCtl.startInstanceWithMode(hostName, dataNode, - MODE_STANDBY, self.envFile) - - # start standby as standby mode for three times max. - start_retry_num = 1 - while start_retry_num <= 3: - insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName, - dataNode, self.envFile) - if insType != MODE_STANDBY: - self.logger.debug("Start database as Standby mode failed, \ -retry for %s times" % start_retry_num) - self.commonGsCtl.startInstanceWithMode(hostName, dataNode, - MODE_STANDBY, self.envFile) - start_retry_num = start_retry_num + 1 + result, output = self.commonGsCtl.startInstanceWithMode(host, + dataNode, MODE_STANDBY, self.envFile) + if result[host] != DefaultValue.SUCCESS: + if "uncompleted build is detected" not in output: + self.expansionSuccess[host] = False + self.logger.log("Failed to start %s as standby " + "before building." % host) else: + self.logger.debug("Uncompleted build is detected on %s." % + host) + else: + insType, dbStat = self.commonGsCtl.queryInstanceStatus( + hostName, dataNode, self.envFile) + if insType != ROLE_STANDBY: + self.logger.log("Build %s failed." % host) + self.expansionSuccess[host] = False + continue + + # build new host + sshTool = SshTool([host]) + tempShFile = "%s/buildStandby.sh" % self.tempFileDir + # create temporary dir to save gs_ctl build command bashfile. + mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \ + (self.tempFileDir, self.user, self.group, self.tempFileDir) + sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile) + subprocess.getstatusoutput("touch %s; cat /dev/null > %s" % + (tempShFile, tempShFile)) + buildCmd = "gs_ctl build -D %s -M %s" % (dataNode, buildMode) + gs_ctlBuildCmd = "source %s ;nohup " % self.envFile + buildCmd + " 1>/dev/null 2>/dev/null &" + self.logger.debug("[%s] gs_ctlBuildCmd: %s" % (host, gs_ctlBuildCmd)) + with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT, + stat.S_IWUSR | stat.S_IRUSR),'w') as fo: + fo.write("#bash\n") + fo.write(gs_ctlBuildCmd) + fo.close() + # send gs_ctlBuildCmd bashfile to the standby host and execute it. + sshTool.scpFiles(tempShFile, tempShFile, [host], self.envFile) + resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \ + tempShFile, [host], self.envFile) + if resultMap[host] != DefaultValue.SUCCESS: + self.expansionSuccess[host] = False + self.logger.debug("Failed to send gs_ctlBuildCmd bashfile " + "to %s." % host) + self.logger.log("Build %s %s failed." % (hostRole, host)) + continue + # check whether build process has finished + checkProcessExistCmd = "ps x" + while True: + resultMap, outputCollect = sshTool.getSshStatusOutput( + checkProcessExistCmd, [host]) + if buildCmd not in outputCollect: break - - # build standby node - self.addStandbyIpInPrimaryConf() - self.reloadPrimaryConf() - self.commonGsCtl.buildInstance(hostName, dataNode, MODE_STANDBY, - self.envFile) - - # if build failed first time. retry for three times. - start_retry_num = 1 - while start_retry_num <= 3: - insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName, - dataNode, self.envFile) - if dbStat != STAT_NORMAL: - self.logger.debug("Build standby instance failed, \ -retry for %s times" % start_retry_num) - self.addStandbyIpInPrimaryConf() - self.reloadPrimaryConf() - self.commonGsCtl.buildInstance(hostName, dataNode, - MODE_STANDBY, self.envFile) - start_retry_num = start_retry_num + 1 else: - break - + time.sleep(10) + # check build result after build process finished + insType, dbStat = self.commonGsCtl.queryInstanceStatus( + hostName, dataNode, self.envFile) + if insType == hostRole and dbStat == STAT_NORMAL: + if self.context.newHostCasRoleMap[host] == "off": + existingStandbys.append(host) + self.logger.log("Build %s %s success." % (hostRole, host)) + else: + self.expansionSuccess[host] = False + self.logger.log("Build %s %s failed." % (hostRole, host)) + if self._isAllFailed(): + self.rollback() + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "build") + def checkTmpDir(self, hostName): """ if the tmp dir id not exist, create it. @@ -590,7 +701,7 @@ retry for %s times" % start_retry_num) """ generate static_config_files and send to all hosts """ - self.logger.debug("Start to generate and send cluster static file.\n") + self.logger.log("Start to generate and send cluster static file.") primaryHost = self.getPrimaryHostName() result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile) @@ -599,9 +710,11 @@ retry for %s times" % start_retry_num) nodeIp = nodeInfo["backIp"] dataNode = nodeInfo["dataNode"] exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode) + dbNode = self.context.clusterInfo.getDbNodeByName(nodeName) if not re.search(exist_reg, result) and nodeIp not in self.context.newHostList: self.logger.debug("The node ip [%s] will not be added to cluster." % nodeIp) - dbNode = self.context.clusterInfo.getDbNodeByName(nodeName) + self.context.clusterInfo.dbNodes.remove(dbNode) + if nodeIp in self.context.newHostList and not self.expansionSuccess[nodeIp]: self.context.clusterInfo.dbNodes.remove(dbNode) toolPath = self.context.clusterInfoDict["toolPath"] @@ -610,7 +723,7 @@ retry for %s times" % start_retry_num) static_config_dir = "%s/script/static_config_files" % toolPath if not os.path.exists(static_config_dir): os.makedirs(static_config_dir) - + # valid if dynamic config file exists. dynamic_file = "%s/bin/cluster_dynamic_config" % appPath dynamic_file_exist = False @@ -632,104 +745,38 @@ retry for %s times" % start_retry_num) if dynamic_file_exist: refresh_cmd = "gs_om -t refreshconf" hostSsh.getSshStatusOutput(refresh_cmd, [hostName], self.envFile) - self.cleanSshToolFile(hostSsh) - - self.logger.debug("End to generate and send cluster static file.\n") - time.sleep(10) - - # Single-node database need start cluster after expansion - if self.isSingleNodeInstance: - primaryHost = self.getPrimaryHostName() - self.logger.debug("Single-Node instance need restart.\n") - self.commonGsCtl.queryOmCluster(primaryHost, self.envFile) - - # if primary database not normal, restart it - dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"] - insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost, - dataNode, self.envFile) - if insType != MODE_PRIMARY: - self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode, - MODE_PRIMARY, self.envFile) - # if stat if not normal,rebuild standby database - standbyHosts = self.context.newHostList - for host in standbyHosts: - hostName = self.context.backIpNameMap[host] - dataNode = self.context.clusterInfoDict[hostName]["dataNode"] - insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName, - dataNode, self.envFile) - if dbStat != STAT_NORMAL: - self.commonGsCtl.startInstanceWithMode(hostName, dataNode, - MODE_STANDBY, self.envFile) - - self.commonGsCtl.startOmCluster(primaryHost, self.envFile) - - def setGUCOnClusterHosts(self, hostNames=[]): - """ - guc config on all hosts - """ - - gucDict = self.getGUCConfig() + self.logger.log("End to generate and send cluster static file.\n") - tempShFile = "%s/guc.sh" % self.tempFileDir - - if len(hostNames) == 0: - hostNames = self.context.nodeNameList - - for host in hostNames: - - command = "source %s ; " % self.envFile + gucDict[host] - - self.logger.debug(command) - - sshTool = SshTool([host]) - - # create temporary dir to save guc command bashfile. - mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \ - (self.tempFileDir,self.user,self.group,self.tempFileDir) - retmap, output = sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile) - - subprocess.getstatusoutput("mkdir -m a+x -p %s; touch %s; \ - cat /dev/null > %s" % \ - (self.tempFileDir, tempShFile, tempShFile)) - with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT, - stat.S_IWUSR | stat.S_IRUSR),'w') as fo: - fo.write("#bash\n") - fo.write( command ) - fo.close() - - # send guc command bashfile to each host and execute it. - sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host], - self.envFile) - - resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \ - tempShFile, [host], self.envFile) - - self.logger.debug(outputCollect) - self.cleanSshToolFile(sshTool) + self.logger.log("Expansion results:") + for newHost in self.context.newHostList: + if self.expansionSuccess[newHost]: + self.logger.log("%s:\tSuccess" % newHost) + else: + self.logger.log("%s:\tFailed" % newHost) def getGUCConfig(self): """ get guc config of each node: replconninfo[index] - remote_read_mode - replication_type """ - nodeDict = self.context.clusterInfoDict - hostNames = self.context.nodeNameList + clusterInfoDict = self.context.clusterInfoDict + hostIpList = list(self.existingHosts) + for host in self.expansionSuccess: + hostIpList.append(host) + hostNames = [] + for host in hostIpList: + hostNames.append(self.context.backIpNameMap[host]) gucDict = {} - for hostName in hostNames: - - localeHostInfo = nodeDict[hostName] + localeHostInfo = clusterInfoDict[hostName] index = 1 guc_tempate_str = "source %s; " % self.envFile for remoteHost in hostNames: - if(remoteHost == hostName): + if remoteHost == hostName: continue - remoteHostInfo = nodeDict[remoteHost] - + remoteHostInfo = clusterInfoDict[remoteHost] guc_repl_template = """\ gs_guc set -D {dn} -c "replconninfo{index}=\ 'localhost={localhost} localport={localport} \ @@ -749,86 +796,68 @@ remoteservice={remoteservice}'" remotePort=remoteHostInfo["localport"], remoteHeartPort=remoteHostInfo["heartBeatPort"], remoteservice=remoteHostInfo["localservice"]) - guc_tempate_str += guc_repl_template - index += 1 - guc_mode_type = """ - gs_guc set -D {dn} -c 'remote_read_mode=off'; - gs_guc set -D {dn} -c 'replication_type=1'; - """.format(dn=localeHostInfo["dataNode"]) - guc_tempate_str += guc_mode_type - gucDict[hostName] = guc_tempate_str return gucDict def checkLocalModeOnStandbyHosts(self): """ expansion the installed standby node. check standby database. - 1. if the database is normal - 2. if the databases version are same before existing and new + 1. if the database is installed correctly + 2. if the databases version are same before existing and new """ standbyHosts = self.context.newHostList - envfile = self.envFile - - self.logger.log("Checking the database with locale mode.") + envFile = self.envFile + for host in standbyHosts: + self.expansionSuccess[host] = True + self.logger.log("Checking if the database is installed correctly with local mode.") + getversioncmd = "source %s;gaussdb --version" % envFile + primaryHostName = self.getPrimaryHostName() + sshPrimary = SshTool([primaryHostName]) + resultMap, outputCollect = sshPrimary.getSshStatusOutput( + getversioncmd, [], envFile) + if resultMap[primaryHostName] != DefaultValue.SUCCESS: + GaussLog.exitWithError("Fail to check the version of primary.") + ipPattern = re.compile("\[.*\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):") + versionPattern = re.compile("gaussdb \((.*)\) .*") + primaryVersion = versionPattern.findall(outputCollect)[0] + notInstalledHosts = [] + wrongVersionHosts = [] for host in standbyHosts: hostName = self.context.backIpNameMap[host] dataNode = self.context.clusterInfoDict[hostName]["dataNode"] - insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName, - dataNode, self.envFile) - if insType not in (MODE_PRIMARY, MODE_STANDBY, MODE_NORMAL): - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35703"] % - (hostName, self.user, dataNode, dataNode)) - - allHostIp = [] - allHostIp.append(self.context.localIp) - versionDic = {} - - for hostip in standbyHosts: - allHostIp.append(hostip) - sshTool= SshTool(allHostIp) - #get version in the nodes - getversioncmd = "gaussdb --version" - resultMap, outputCollect = sshTool.getSshStatusOutput(getversioncmd, - [], envfile) - self.cleanSshToolFile(sshTool) - versionLines = outputCollect.splitlines() - for verline in versionLines: - if verline[0:9] == '[SUCCESS]': - ipKey = verline[10:-1] - continue - else: - versionStr = "".join(verline) - preVersion = versionStr.split(' ') - versionInfo = preVersion[4] - versionDic[ipKey] = versionInfo[:-2] - for hostip in versionDic: - if hostip == self.context.localIp: - versionCompare = "" - versionCompare = versionDic[hostip] + sshTool = SshTool([host]) + resultMap, outputCollect = sshTool.getSshStatusOutput( + getversioncmd, [], envFile) + if resultMap[host] != DefaultValue.SUCCESS: + self.expansionSuccess[host] = False + notInstalledHosts.append(host) else: - if versionDic[hostip] == versionCompare: - continue - else: - GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35705"] \ - %(hostip, versionDic[hostip])) - - self.logger.log("Successfully checked the database with locale mode.") + version = versionPattern.findall(outputCollect)[0] + if version != primaryVersion: + self.expansionSuccess[host] = False + wrongVersionHosts.append(host) + if notInstalledHosts: + self.logger.log("In local mode, database is not installed " + "correctly on these nodes:\n%s" % ", ".join(notInstalledHosts)) + if wrongVersionHosts: + self.logger.log("In local mode, the database version is not same " + "with primary on these nodes:\n%s" % ", ".join(wrongVersionHosts)) + self.logger.log("End to check the database with locale mode.") def preInstall(self): """ preinstall on new hosts. """ - self.logger.log("Start to preinstall database on the new \ -standby nodes.") + self.logger.log("Start to preinstall database on new nodes.") self.sendSoftToHosts() self.generateAndSendXmlFile() self.preInstallOnHosts() - self.logger.log("Successfully preinstall database on the new \ -standby nodes.") - + self.logger.log("End to preinstall database on new nodes.\n") + if self._isAllFailed(): + GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "preinstall") def clearTmpFile(self): """ @@ -838,16 +867,15 @@ standby nodes.") clearCmd = "if [ -d '%s' ];then rm -rf %s;fi" % \ (self.tempFileDir, self.tempFileDir) hostNames = self.context.nodeNameList - for host in hostNames: - try: - sshTool = SshTool(hostNames) - result, output = sshTool.getSshStatusOutput(clearCmd, - hostNames, self.envFile) - self.logger.debug(output) - self.cleanSshToolFile(sshTool) - except Exception as e: - self.logger.debug(str(e)) - self.cleanSshToolFile(sshTool) + try: + sshTool = SshTool(hostNames) + result, output = sshTool.getSshStatusOutput(clearCmd, + hostNames, self.envFile) + self.logger.debug(output) + self.cleanSshToolFile(sshTool) + except Exception as e: + self.logger.debug(str(e)) + self.cleanSshToolFile(sshTool) def cleanSshToolFile(self, sshTool): @@ -874,11 +902,16 @@ standby nodes.") self.logger.debug("Start to check cluster status.\n") curHostName = socket.gethostname() - command = "su - %s -c 'source %s;gs_om -t status --detail'" % \ - (self.user, self.envFile) + command = "" + if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"): + command = "su - %s -c 'source %s;gs_om -t status --detail'" % \ + (self.user, self.envFile) + else: + command = "su - %s -c 'source /etc/profile;source /home/%s/.bashrc;"\ + "gs_om -t status --detail'" % (self.user, self.user) sshTool = SshTool([curHostName]) - resultMap, outputCollect = sshTool.getSshStatusOutput(command, - [curHostName], self.envFile) + resultMap, outputCollect = sshTool.getSshStatusOutput(command, + [curHostName], self.envFile) if outputCollect.find("Primary Normal") == -1: GaussLog.exitWithError("Unable to query current cluster status. " + \ "Please import environment variables or " +\ @@ -886,42 +919,47 @@ standby nodes.") self.logger.debug("The primary database is normal.\n") + + def _adjustOrderOfNewHostList(self): + """ + Adjust the order of hostlist so that + standby comes first and cascade standby comes last + """ + newHostList = self.context.newHostList + newHostCasRoleMap = self.context.newHostCasRoleMap + i, j = 0, len(newHostList) - 1 + while i < j: + while i < j and newHostCasRoleMap[newHostList[i]] == "off": + i += 1 + while i < j and newHostCasRoleMap[newHostList[j]] == "on": + j -= 1 + newHostList[i], newHostList[j] = newHostList[j], newHostList[i] + i += 1 + j -= 1 + def validNodeInStandbyList(self): """ check if the node has been installed in the cluster. """ - self.logger.debug("Start to check if the nodes in standby list\n") - - curHostName = socket.gethostname() - command = "su - %s -c 'source %s;gs_om -t status --detail'" % \ - (self.user, self.envFile) - sshTool = SshTool([curHostName]) - resultMap, outputCollect = sshTool.getSshStatusOutput(command, - [curHostName], self.envFile) - self.logger.debug(outputCollect) - - newHosts = self.context.newHostList - standbyHosts = [] - existHosts = [] - while len(newHosts) > 0: - hostIp = newHosts.pop() - nodeName = self.context.backIpNameMap[hostIp] - nodeInfo = self.context.clusterInfoDict[nodeName] - dataNode = nodeInfo["dataNode"] - exist_reg = r"(.*)%s[\s]*%s(.*)" % (nodeName, hostIp) - if not re.search(exist_reg, outputCollect): - standbyHosts.append(hostIp) - else: - existHosts.append(hostIp) - self.context.newHostList = standbyHosts - if len(existHosts) > 0: - self.logger.log("The nodes [%s] are already in the cluster. Skip expand these nodes." \ - % ",".join(existHosts)) - self.cleanSshToolFile(sshTool) - if len(standbyHosts) == 0: + self.logger.debug("Start to check if the nodes in standby list.") + self.getExistingHosts() + newHostList = self.context.newHostList + existedNewHosts = \ + [host for host in newHostList if host in self.existingHosts] + if existedNewHosts: + newHostList = \ + [host for host in newHostList if host not in existedNewHosts] + self.context.newHostList = newHostList + self.expansionSuccess = {} + for host in newHostList: + self.expansionSuccess[host] = False + self.logger.log("These nodes [%s] are already in the cluster. " + "Skip expand these nodes." % ",".join(existedNewHosts)) + if len(newHostList) == 0: self.logger.log("There is no node can be expanded.") sys.exit(0) - + self._adjustOrderOfNewHostList() + def checkXmlFileAccessToUser(self): """ Check if the xml config file has readable access to user. @@ -1020,19 +1058,63 @@ standby nodes.") self.changeUser() if not self.context.standbyLocalMode: - self.logger.log("\nStart to install database on the new \ -standby nodes.") + self.logger.log("Start to install database on new nodes.") self.installDatabaseOnHosts() else: - self.logger.log("\nStandby nodes is installed with locale mode.") self.checkLocalModeOnStandbyHosts() - self.logger.log("\nDatabase on standby nodes installed finished. \ -Start to establish the primary-standby relationship.") + self.logger.log("Database on standby nodes installed finished.\n") + self.logger.log("Start to establish the relationship.") self.buildStandbyRelation() # process success pvalue.value = 1 + def rollback(self): + """ + rollback all hosts' replconninfo about failed hosts + """ + existingHosts = self.existingHosts + failedHosts = [] + for host in self.expansionSuccess: + if self.expansionSuccess[host]: + existingHosts.append(host) + else: + failedHosts.append(host) + clusterInfoDict = self.context.clusterInfoDict + for failedHost in failedHosts: + self.logger.debug("Start to rollback replconninfo about %s" % failedHost) + for host in existingHosts: + hostName = self.context.backIpNameMap[host] + dataNode = clusterInfoDict[hostName]["dataNode"] + confFile = os.path.join(dataNode, "postgresql.conf") + rollbackReplconninfoCmd = "sed -i '/remotehost=%s/s/^/#&/' %s" \ + % (failedHost, confFile) + self.logger.debug("[%s] rollbackReplconninfoCmd:%s" % (host, + rollbackReplconninfoCmd)) + sshTool = SshTool([host]) + (statusMap, output) = sshTool.getSshStatusOutput(rollbackReplconninfoCmd, [host]) + pg_hbaFile = os.path.join(dataNode, "pg_hba.conf") + rollbackPg_hbaCmd = "sed -i '/%s/s/^/#&/' %s" \ + % (failedHost, pg_hbaFile) + self.logger.debug("[%s] rollbackPg_hbaCmd:%s" % (host, + rollbackPg_hbaCmd)) + (statusMap, output) = sshTool.getSshStatusOutput(rollbackPg_hbaCmd, [host]) + reloadGUCCommand = "source %s ; gs_ctl reload -D %s " % \ + (self.envFile, dataNode) + resultMap, outputCollect = sshTool.getSshStatusOutput( + reloadGUCCommand, [host], self.envFile) + self.logger.debug(outputCollect) + self.cleanSshToolFile(sshTool) + + def _isAllFailed(self): + """ + check whether all new hosts preinstall/install/build failed + """ + for host in self.expansionSuccess: + if self.expansionSuccess[host]: + return False + return True + def run(self): """ start expansion @@ -1043,8 +1125,7 @@ Start to establish the primary-standby relationship.") self.preInstall() self.installAndExpansion() - - self.logger.log("\nSuccess to expansion standby nodes.") + self.logger.log("Expansion Finish.") class GsCtlCommon: @@ -1103,6 +1184,7 @@ class GsCtlCommon: self.logger.debug(host) self.logger.debug(outputCollect) self.cleanSshToolTmpFile(sshTool) + return resultMap, outputCollect def buildInstance(self, host, datanode, mode, env): command = "source %s ; gs_ctl build -D %s -M %s" % (env, datanode, mode) @@ -1151,7 +1233,4 @@ class GsCtlCommon: try: sshTool.clenSshResultFiles() except Exception as e: - self.logger.debug(str(e)) - - - + self.logger.debug(str(e)) \ No newline at end of file diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index f4a81e87ca3ea32ad2539e2da08d968fe0e90c9d..44ae7dbc14009b95c808766d6ee73770e0d247d5 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -223,16 +223,20 @@ class OmImplOLAP(OmImpl): self.context.g_opts.security_mode) if self.dataDir != "": cmd += " -D %s" % self.dataDir - (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList) + failedOutput = '' for nodeName in hostList: + (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, [nodeName]) if statusMap[nodeName] != 'Success': - raise Exception( - ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, output)) - if re.search("another server might be running", output): - self.logger.log(output) - if re.search("] WARNING:", output): - tmp = '\n'.join(re.findall(".*] WARNING:.*", output)) - self.logger.log(output[0:output.find(":")] + '\n' + tmp) + failedOutput += output + elif re.search("another server might be running", output): + self.logger.log(output) + elif re.search("] WARNING:", output): + tmp = '\n'.join(re.findall(".*] WARNING:.*", output)) + self.logger.log(output[0:output.find(":")] + '\n' + tmp) + if len(failedOutput): + self.logger.log("=========================================") + raise Exception( + ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, failedOutput)) if startType == "cluster": starttime = time.time() cluster_state = "" diff --git a/script/impl/om/OmImpl.py b/script/impl/om/OmImpl.py index ddc6e4a2316c0c9e8b4017f6a57f02b75c2c2ffa..4dcaa744cf44f18209d3588acce7c3b045ad5789 100644 --- a/script/impl/om/OmImpl.py +++ b/script/impl/om/OmImpl.py @@ -199,6 +199,8 @@ class OmImpl: cmd = queryCmd() if (self.context.g_opts.outFile != ""): cmd.outputFile = self.context.g_opts.outFile + else: + cmd.outputFile = self.logger.logFile if (self.context.g_opts.show_detail): if ( self.context.clusterInfo.clusterType diff --git a/script/impl/upgrade/UpgradeConst.py b/script/impl/upgrade/UpgradeConst.py index 680063e39e2ba0b0c7e229459ef1bf51caba289a..0280a1994fc4ba2862fd8c790273a96bf6928748 100644 --- a/script/impl/upgrade/UpgradeConst.py +++ b/script/impl/upgrade/UpgradeConst.py @@ -124,7 +124,9 @@ BACKUP_DIR_LIST = ['global', 'pg_clog', 'pg_xlog', 'pg_multixact', BACKUP_DIR_LIST_BASE = ['global', 'pg_clog', 'pg_csnlog'] BACKUP_DIR_LIST_64BIT_XID = ['pg_multixact', 'pg_replslot', 'pg_notify', 'pg_subtrans', 'pg_twophase'] - +VALUE_OFF = ["off", "false", "0", "no"] +VALUE_ON = ["on", "true", "1", "yes"] +DN_GUC = ["upgrade_mode", "enable_stream_replication"] FIRST_GREY_UPGRADE_NUM = 92 UPGRADE_PRECOMMIT_NUM = 0.001 @@ -156,3 +158,5 @@ COMBIN_NUM = 30 ON_INPLACE_UPGRADE = "IsInplaceUpgrade" MAX_APP_SIZE = 2000 UPGRADE_VERSION_64bit_xid = 91.208 +ENABLE_STREAM_REPLICATION_VERSION = "92.149" +ENABLE_STREAM_REPLICATION_NAME = "enable_stream_replication" diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py index 60d0eac9afad5bb37a04a2e8df2c46b7ad329c34..2a9f918f578f5410a0c9aada665564a7cdeb98d5 100644 --- a/script/impl/upgrade/UpgradeImpl.py +++ b/script/impl/upgrade/UpgradeImpl.py @@ -1067,7 +1067,7 @@ class UpgradeImpl: self.upgradeAgain() except Exception as e: errmsg = ErrorCode.GAUSS_529["GAUSS_52934"] + \ - "You can use -h to upgrade or manually rollback." + "You can use --grey to upgrade or manually rollback." self.context.logger.log(errmsg + str(e)) self.exitWithRetCode(self.context.action, False) else: @@ -1076,7 +1076,7 @@ class UpgradeImpl: def upgradeAgain(self): try: - self.context.logger.log( + self.context.logger.debug( "From this step, you can use -h to upgrade again if failed.") # we have guarantee specified nodes have same step, # so we only need to get one node step @@ -1108,8 +1108,9 @@ class UpgradeImpl: self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG) except Exception as e: - self.context.logger.log("Failed to upgrade, can use -h to " - "upgrade again. Error: %s" % str(e)) + self.context.logger.log("Failed to upgrade, can use --grey to " + "upgrade again after rollback. Error: " + "%s" % str(e)) self.context.logger.debug(traceback.format_exc()) self.exitWithRetCode(self.context.action, False, str(e)) self.context.logger.log( @@ -1407,44 +1408,8 @@ class UpgradeImpl: # when number and node names is empty self.context.logger.debug("Choose the nodes to be upgraded.") self.setClusterDetailInfo() - - if len(self.context.nodeNames) != 0 : - self.context.logger.log( - "Upgrade nodes %s." % self.context.nodeNames) - greyNodeNames = self.getUpgradedNodeNames() - checkH_nodes = \ - [val for val in greyNodeNames if val in self.context.nodeNames] - if len(checkH_nodes) > 0: - raise Exception("The nodes %s have been upgraded" % - checkH_nodes) - # confirm the nodesNum in checkParameter, 1 or specified by -g - elif self.context.upgrade_remain: - greyNodeNames = self.getUpgradedNodeNames() - otherNodeNames = [ - i for i in self.context.clusterNodes if i not in greyNodeNames] - self.context.nodeNames = otherNodeNames - self.context.logger.debug( - "Upgrade remain nodes %s." % self.context.nodeNames) - # specify the node num, try to find matched combination - else: - nodeTotalNum = len(self.context.clusterNodes) - if len(self.context.clusterNodes) == 1: - self.context.nodeNames.append( - self.context.clusterInfo.dbNodes[0].name) - self.context.logger.log( - "Upgrade one node '%s'." % self.context.nodeNames[0]) - # SinglePrimaryMultiStandbyCluster / MasterStandbyCluster with - # more than 1 node - elif self.context.nodesNum == nodeTotalNum: - self.context.nodeNames = self.context.clusterNodes - self.context.logger.log("Upgrade all nodes.") - elif self.context.nodesNum > nodeTotalNum: - raise Exception(ErrorCode.GAUSS_529["GAUSS_52906"]) - else: - self.context.nodeNames = self.findOneMatchedCombin( - self.context.clusterNodes) - self.context.logger.log( - "Upgrade nodes %s." % self.context.nodeNames) + self.context.nodeNames = self.context.clusterNodes + self.context.logger.log("Upgrade all nodes.") def getUpgradedNodeNames(self, step=GreyUpgradeStep.STEP_INIT_STATUS): """ @@ -1695,6 +1660,21 @@ class UpgradeImpl: # Normal and the database could be connected # if not, exit. self.startCluster() + + # uninstall kerberos if has already installed + pghost_path = DefaultValue.getEnvironmentParameterValue( + 'PGHOST', self.context.user) + kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path + if os.path.exists(kerberosflagfile): + self.stopCluster() + self.context.logger.log("Starting uninstall Kerberos.", + "addStep") + cmd = "source %s && " % self.context.userProfile + cmd += "%s -m uninstall -U %s" % (OMCommand.getLocalScript( + "Local_Kerberos"), self.context.user) + self.context.sshTool.executeCommand(cmd, "") + self.context.logger.log("Successfully uninstall Kerberos.") + self.startCluster() if self.unSetClusterReadOnlyMode() != 0: raise Exception("NOTICE: " + ErrorCode.GAUSS_529["GAUSS_52907"]) @@ -1902,9 +1882,43 @@ class UpgradeImpl: self.stopCluster() self.startCluster() + # install Kerberos + self.install_kerberos() self.context.logger.log("Commit binary upgrade succeeded.") self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True) + def install_kerberos(self): + """ + install kerberos after upgrade + :return:NA + """ + pghost_path = DefaultValue.getEnvironmentParameterValue( + 'PGHOST', self.context.user) + kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path + if os.path.exists(kerberosflagfile): + # install kerberos + cmd = "source %s &&" % self.context.userProfile + cmd += "gs_om -t stop && " + cmd += "%s -m install -U %s --krb-server" % ( + OMCommand.getLocalScript("Local_Kerberos"), + self.context.user) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + cmd = "source %s && " % self.context.userProfile + cmd += "%s -m install -U %s --krb-client " % ( + OMCommand.getLocalScript("Local_Kerberos"), self.context.user) + self.context.sshTool.executeCommand( + cmd, "", hostList=self.context.clusterNodes) + self.context.logger.log("Successfully install Kerberos.") + cmd = "source %s && gs_om -t start" % self.context.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 and not self.context.ignoreInstance: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + "Command:%s. Error:\n%s" % (cmd, output)) + os.remove(kerberosflagfile) + def refresh_dynamic_config_file(self): """ refresh dynamic config file @@ -3613,6 +3627,8 @@ class UpgradeImpl: ErrorCode.GAUSS_529["GAUSS_52907"]) self.cleanBinaryUpgradeBakFiles(True) self.cleanInstallPath(Const.NEW) + # install kerberos + self.install_kerberos() except Exception as e: self.context.logger.error(str(e)) self.context.logger.log("Rollback failed.") @@ -4138,6 +4154,30 @@ class UpgradeImpl: else: raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + " Value: %s" % self.context.action) + + # judgment has installed kerberos before action_inplace_upgrade + self.context.logger.debug( + "judgment has installed kerberos before action_inplace_upgrade") + xmlfile = os.path.join(os.path.dirname(self.context.userProfile), + DefaultValue.FI_KRB_XML) + if os.path.exists(xmlfile) and \ + self.context.action == Const.ACTION_AUTO_UPGRADE \ + and self.context.is_grey_upgrade: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % "kerberos") + if os.path.exists(xmlfile) and self.context.is_inplace_upgrade: + pghost_path = DefaultValue.getEnvironmentParameterValue( + 'PGHOST', self.context.user) + destfile = "%s/krb5.conf" % os.path.dirname( + self.context.userProfile) + kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path + cmd = "cp -rf %s %s " % (destfile, kerberosflagfile) + (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5) + if status != 0: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50206"] % kerberosflagfile + + " Error: \n%s" % output) + self.context.logger.debug( + "Successful back up kerberos config file.") except Exception as e: self.context.logger.debug(traceback.format_exc()) self.exitWithRetCode(self.context.action, False, str(e)) @@ -4450,6 +4490,11 @@ class UpgradeImpl: self.context.logger.log("Failed to check upgrade environment.", "constant") raise Exception(str(e)) + if not self.context.forceRollback: + if self.context.oldClusterNumber >= \ + Const.ENABLE_STREAM_REPLICATION_VERSION: + self.check_gucval_is_inval_given( + Const.ENABLE_STREAM_REPLICATION_NAME, Const.VALUE_ON) try: if self.context.action == Const.ACTION_INPLACE_UPGRADE: self.context.logger.log( @@ -4466,6 +4511,19 @@ class UpgradeImpl: self.context.logger.log( "Successfully checked upgrade environment.", "constant") + def check_gucval_is_inval_given(self, guc_name, val_list): + """ + Checks whether a given parameter is a given value list in a + given instance list. + """ + self.context.logger.debug("checks whether the parameter:{0} is " + "the value:{1}.".format(guc_name, val_list)) + guc_str = "{0}:{1}".format(guc_name, ",".join(val_list)) + self.checkParam(guc_str) + self.context.logger.debug("Success to check the parameter:{0} value " + "is in the value:{1}.".format(guc_name, + val_list)) + def checkDifferentVersion(self): """ if the cluster has only one version. no need to check @@ -4509,6 +4567,8 @@ class UpgradeImpl: we can upgrade again :return: """ + if self.context.is_grey_upgrade: + self.check_option_grey() if len(self.context.nodeNames) != 0: self.checkOptionH() elif self.context.upgrade_remain: @@ -4516,6 +4576,24 @@ class UpgradeImpl: else: self.checkOptionG() + def check_option_grey(self): + """ + if nodes have been upgraded, no need to use --grey to upgrade again + :return: + """ + stepFile = os.path.join( + self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) + if not os.path.isfile(stepFile): + self.context.logger.debug( + "File %s does not exists. No need to check." % + Const.GREY_UPGRADE_STEP_FILE) + return + grey_node_names = self.getUpgradedNodeNames() + if grey_node_names: + self.context.logger.log( + "All nodes have been upgrade, no need to upgrade again.") + self.exitWithRetCode(self.action, True) + def checkOptionH(self): self.checkNodeNames() stepFile = os.path.join( diff --git a/script/local/UpgradeUtility.py b/script/local/UpgradeUtility.py index f8f242cecb8df3ff9044a06749960c7e74a56ee5..5d610254eac683d73d926e27b3379acc9d8dbcd0 100644 --- a/script/local/UpgradeUtility.py +++ b/script/local/UpgradeUtility.py @@ -1813,12 +1813,11 @@ def checkGucValue(): """ key = g_opts.gucStr.split(':')[0].strip() value = g_opts.gucStr.split(':')[1].strip() - if key == "upgrade_from": - instances = g_dbNode.cmagents - fileName = "cm_agent.conf" - elif key == "upgrade_mode": - #instances = g_dbNode.coordinators - #instances.extend(g_dbNode.datanodes) + if value in const.VALUE_OFF: + value = const.VALUE_OFF + if value in const.VALUE_ON: + value = const.VALUE_ON + if key in const.DN_GUC: instances = g_dbNode.datanodes fileName = "postgresql.conf" else: @@ -1849,7 +1848,7 @@ def checkGucValue(): realValue = realValue.split('#')[0].strip() g_logger.debug("[key:%s]: Realvalue %s, ExpectValue %s" % ( key, str(realValue), str(value))) - if str(value) != str(realValue): + if str(realValue) not in str(value): raise Exception( ErrorCode.GAUSS_521["GAUSS_52102"] % key + " Real value %s, expect value %s" diff --git a/simpleInstall/one_master_one_slave.sh b/simpleInstall/one_master_one_slave.sh new file mode 100644 index 0000000000000000000000000000000000000000..8c2b416182729f0c58aeae16b0280730c6c79645 --- /dev/null +++ b/simpleInstall/one_master_one_slave.sh @@ -0,0 +1,400 @@ +#!/bin/bash + +if [ `id -u` -ne 0 ];then + echo "only a user with the root permission can run this script." + exit 1 +fi + +declare -r SCRIPT_PATH=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +declare -r SCRIPT_NAME=$0 +echo "SCRIPT_PATH : ${SCRIPT_PATH}" +declare PACKAGE_PATH=`dirname ${SCRIPT_PATH}` +declare USER_NAME="" +declare HOST_IPS="" +declare HOST_IPS_ARR="" +declare HOST_IPS_ARRAY="" +declare HOST_NAMES="" +declare HOST_NAMES_ARRAY= +declare USER_GROUP="dbgrp" +declare PORT="20050" +declare XML_DIR=${SCRIPT_PATH}/one_master_one_slave_template.xml +declare INSTALL_PATH="" +declare SYSTEM_ARCH="" +declare SYSTEM_NAME="" +declare PASSWORD="" + +function print_help() +{ + echo "Usage: $0 [OPTION] + -?|--help show help information + -U|--user_name cluster user + -h|--host_ip intranet ip address of the host in the backend storage network(host1,host2) + -G|--user_grp group of the cluster user(default value dbgrp) + -p|--port database server port(default value 20050) + -D|--install_location installation directory of the openGauss program(default value ~/cluser) + -X|--xml_location cluster xml configuration file path + " +} + +function die() +{ + echo -e "\033[31merror:\033[0m $1" + exit 1 +} + +function warn() +{ + echo -e "\033[33mwarnning:\033[0m $1" + sleep 2s +} + +function info() +{ + echo -e "\033[32minfo:\033[0m $1" +} + +function expect_ssh() +{ + /usr/bin/expect <<-EOF + set timeout -1 + spawn $1 + expect { + "*yes/no" { send "yes\r"; exp_continue } + "*assword:" { send "$2\r"; exp_continue } + "*$3*" { exit } + } + expect eof +EOF + if [ $? == 0 ] + then + return 0 + else + return 1 + fi +} + +function expect_hostname() +{ + expect < expectFile + set timeout -1 + spawn $1 + expect { + "*yes/no" { send "yes\r"; exp_continue } + "*assword:" {send "$2\r"; exp_continue} + } +EOF + if [ $? == 0 ] + then + return 0 + else + return 1 + fi +} + + +function main() +{ + while [ $# -gt 0 ] + do + case "$1" in + -h|--help) + print_help + exit 1 + ;; + -U|--user_name) + if [ "$2"X = X ] + then + die "no cluster user values" + fi + USER_NAME=$2 + shift 2 + ;; + -G|--user_grp) + if [ "$2"X = X ] + then + die "no group values" + fi + USER_GROUP=$2 + shift 2 + ;; + -H|--host_ip) + if [ "$2"X = X ] + then + die "no intranet ip address of the host values" + fi + HOST_IPS=$2 + shift 2 + HOST_IPS_ARR=${HOST_IPS//,/ } + HOST_IPS_ARRAY=(${HOST_IPS_ARR}) + if [ ${#HOST_IPS_ARRAY[*]} != 2 ] + then + die "the current script can be installed only on two nodes, one active node and one standby node" + fi + ;; + -X|--xml_location) + if [ "$2"X = X ] + then + die "no cluster xml configuration file values" + fi + XML_DIR=$2 + shift 2 + ;; + -D|--install_location) + if [ "$2"X = X ] + then + die "no installation directory of the openGauss program values" + fi + INSTALL_PATH=$2 + shift 2 + ;; + -p|--port) + if [ "$2"X = X ] + then + die "the port number cannot be empty." + fi + PORT=$2 + shift 2 + ;; + -P|--password) + if [ "$2"X = X ] + then + die "the password cannot be empty." + fi + PASSWORD=$2 + shift 2 + ;; + *) + echo "Internal Error: option processing error" 1>&2 + echo "please input right paramtenter, the following command may help you" + echo "sh active_standby_nodes_install.sh --help or sh active_standby_nodes_install.sh -h" + exit 1 + esac + done + + if [ "${USER_NAME}"X == X ] + then + die "no cluster user values" + fi + + if [ -z ${INSTALL_PATH} ] + then + INSTALL_PATH="/home/${USER_NAME}" + fi + + if [ "${PASSWORD}"X == X ] + then + echo "please enter the password of the root user&the password of a common user(the two passwords must be the same)" + echo -n "password:" + read PASSWORD + while [ -z ${PASSWORD} ] + do + echo "the value cannot be null, please enter the password again" + echo -n "password:" + read PASSWORD + done + fi + + if [ "${HOST_IPS}"X == X ] + then + die "no intranet ip address values" + else + len=${#HOST_IPS_ARRAY[*]} + index=0 + while [ ${index} -lt ${len} ] + do + expect_hostname "ssh ${HOST_IPS_ARRAY[${index}]} hostname" ${PASSWORD} + if [ $? == 0 ] + then + expectResult=`tail -1 expectFile|head -1| tr -d "\r"| tr -d "\n"` + if [ -z ${expectResult} ] + then + die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}." + fi + HOST_NAMES_ARRAY[${index}]=${expectResult} + else + die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}." + fi + index=$[ ${index} + 1 ] + done + fi + rm -rf expectFile + HOST_NAMES="${HOST_NAMES_ARRAY[0]},${HOST_NAMES_ARRAY[1]}" + SYSTEM_ARCH=`uname -p` + SYSTEM_NAME=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'` + if [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "aarch64" ] + then + info "the current system environment is openEuler + arm" + elif [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is openEuler + x86" + elif [ "${SYSTEM_NAME}" == "centos" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is CentOS + x86" + elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is redhat + x86" + elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "aarch64" ] + then + info "the current system environment is redhat + arm" + elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "x86_64" ] + then + info "the current system environment is kylin + x86" + elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "aarch64" ] + then + info "the current system environment is kylin + arm" + else + warn "the current system environment is ${SYSTEM_NAME} + ${SYSTEM_ARCH}, \ + you are advised to use the centos, openEuler, redhat, or kylin system. because OpenGauss may not adapt to the current system." + fi + info "installation parameter verification completed." +} + +function checks() +{ + system_arch=`uname -p` + system_name=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'` + if [ ${system_arch} != "$8" -o ${system_name} != "$9" ] + then + warn "inconsistency between the system and the execution machine" + fi + + egrep "^$3" /etc/group >& /dev/null + if [ $? != 0 ];then + groupadd $3 + fi + egrep "^$4" /etc/passwd >& /dev/null + if [ $? != 0 ];then + useradd -g $3 -d /home/$4 -m -s /bin/bash $4 2>/dev/null + if [ $? != 0 ] + then + die "failed to create the user on the node $2." + fi + expect_ssh "passwd $4" "$5" "passwd:" + if [ $? != 0 ] + then + die "an error occurred when setting the user password on the node $2" + fi + fi + + sed -i "s/SELINUX=.*/SELINUX=disabled/g" /etc/selinux/config && firewall-cmd --permanent --add-port="$6/tcp" && firewall-cmd --reload + if [ $? != 0 ] + then + warn "some errors occur during system environment setting on host $2" + fi + + INSTALL_PATH=$7 + if [ ! -e ${INSTALL_PATH} ] + then + mkdir -p ${INSTALL_PATH} + else + rm -rf ${INSTALL_PATH}/* + fi + chmod -R 755 ${INSTALL_PATH}/ + chown -R $4:$3 ${INSTALL_PATH}/ + if [ -f /${10} ] + then + mv /${10} $(eval echo ~$4)/ + fi + echo "check end" +} + +function pre_checks() +{ + if [ ${#HOST_IPS_ARRAY[*]} == 0 ] + then + die "the number of internal IP addresses of the host is incorrect." + fi + localips=`/sbin/ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"` + for ip in ${HOST_IPS_ARRAY[@]} + do + info "start to check the installation environment of host ${ip}." + sleep 2s + # standby node + if [[ $localips != *${ip}* ]] + then + sshcmd="scp ${SCRIPT_PATH}/${SCRIPT_NAME} root@${ip}:/" + expect_ssh "${sshcmd}" "${PASSWORD}" "100%" + if [ $? != 0 ] + then + die "an error occurred when copying the script to the target host ${ip}." + fi + sshcmd="ssh ${ip} \"sh /${SCRIPT_NAME} inner ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME}\"" + expect_ssh "${sshcmd}" "${PASSWORD}" "check end" + if [ $? != 0 ] + then + die "an error occurred during the pre-installation check on the target host ${ip}." + fi + else + # local + checks "" ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME} + if [ $? != 0 ] + then + die "an error occurred during the pre-installation check on the target host ${ip}." + fi + fi + info "succeeded in checking the installation environment of host ${ip}." + done + return 0 +} + +function xmlconfig() +{ + info "start to automatically configure the installation file." + install_localtion=${INSTALL_PATH//\//\\\/} + if [ -e ${XML_DIR} ] + then + sed 's/@{nodeNames}/'${HOST_NAMES}'/g' ${XML_DIR} | + sed 's/@{backIpls}/'${HOST_IPS}'/g' | + sed 's/@{clusterName}/'${USER_NAME}'/g' | + sed 's/@{port}/'${PORT}'/g' | + sed 's/@{installPath}/'${install_localtion}'/g' | + sed 's/@{nodeName1}/'${HOST_NAMES_ARRAY[0]}'/g' | + sed 's/@{backIp1}/'${HOST_IPS_ARRAY[0]}'/g' | + sed 's/@{nodeName2}/'${HOST_NAMES_ARRAY[1]}'/g' | + sed 's/@{backIp2}/'${HOST_IPS_ARRAY[1]}'/g' > $(eval echo ~${USER_NAME})/one_master_one_slave.xml + else + die "cannot find one_master_one_slave_template.xml in ${XML_DIR}" + fi + cat $(eval echo ~${USER_NAME})/one_master_one_slave.xml + info "the installation file is automatically configured" + return 0 +} + +function install() +{ + info "preparing for preinstallation" + home_path=$(eval echo ~${USER_NAME}) + export LD_LIBRARY_PATH="${PACKAGE_PATH}/script/gspylib/clib:"$LD_LIBRARY_PATH + sshcmd="python3 "${PACKAGE_PATH}"/script/gs_preinstall -U "${USER_NAME}" \ + -G "${USER_GROUP}" -X "${home_path}"/one_master_one_slave.xml --sep-env-file="${home_path}"/env_master_slave" + info "cmd \"${sshcmd}\"" + expect_ssh "${sshcmd}" "${PASSWORD}" "Preinstallation succeeded" + if [ $? != 0 ] + then + die "preinstall failed." + fi + info "preinstallation succeeded." + chmod 755 ${home_path}'/one_master_one_slave.xml' + chown ${USER_NAME}:${USER_GROUP} ${home_path}'/one_master_one_slave.xml' + info "start the installation." + su - ${USER_NAME} -c"source ${home_path}/env_master_slave;gs_install -X ${home_path}/one_master_one_slave.xml;gs_om -t status --detail" + if [ $? -ne 0 ] + then + die "install failed." + else + info "install success." + fi + exit 0 +} + +if [ $1 == "inner" ] +then + checks $@ +else + main $@ + pre_checks + xmlconfig + install +fi +exit 0 + diff --git a/simpleInstall/one_master_one_slave_template.xml b/simpleInstall/one_master_one_slave_template.xml new file mode 100644 index 0000000000000000000000000000000000000000..9735758dbb83d57ec2bf81d25307b1e8b0b7c593 --- /dev/null +++ b/simpleInstall/one_master_one_slave_template.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +