diff --git a/script/gs_dropnode b/script/gs_dropnode
index 291bcd73739d73099a559685152af0a19c6bd84a..9f4635fe96ba8862a12462acdee179e5a3e4bbc5 100644
--- a/script/gs_dropnode
+++ b/script/gs_dropnode
@@ -137,6 +137,9 @@ General options:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G")
if len(self.hostIpListForDel) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h")
+ # check if upgrade action is exist
+ if DefaultValue.isUnderUpgrade(self.user):
+ GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
try:
pw_user = pwd.getpwnam(self.user)
diff --git a/script/gs_expansion b/script/gs_expansion
index 34bc040af230a41eade2e670393e4128a8e1ac75..f695a4e657360d0f260c99571b62c1620bb4e56c 100644
--- a/script/gs_expansion
+++ b/script/gs_expansion
@@ -21,6 +21,9 @@
import os
import sys
+import subprocess
+
+import socket
package_path = os.path.dirname(os.path.realpath(__file__))
ld_path = package_path + "/gspylib/clib"
if 'LD_LIBRARY_PATH' not in os.environ:
@@ -61,6 +64,8 @@ class Expansion(ParallelBaseOM):
self.newHostList = []
self.clusterInfoDict = {}
self.backIpNameMap = {}
+ self.newHostCasRoleMap = {}
+ self.hostAzNameMap = {}
self.packagepath = os.path.realpath(
os.path.join(os.path.realpath(__file__), "../../"))
@@ -89,7 +94,7 @@ General options:
-V, --version Show version information.
"""
print(self.usage.__doc__)
-
+
def parseCommandLine(self):
"""
parse parameter from command line
@@ -123,50 +128,34 @@ General options:
if (ParaDict.__contains__("nodename")):
self.newHostList = ParaDict.get("nodename")
-
def checkParameters(self):
"""
function: Check parameter from command line
input: NA
output: NA
"""
-
+
# check user | group | xmlfile | node
if len(self.user) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U")
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U")
if len(self.group) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G")
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G")
if len(self.xmlFile) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
if len(self.newHostList) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
-
- clusterInfo = ExpansipnClusterInfo()
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
+ # check if upgrade action is exist
+ if DefaultValue.isUnderUpgrade(self.user):
+ GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
+
+ def _getClusterInfoDict(self):
+ clusterInfo = ExpansionClusterInfo()
self.clusterInfo = clusterInfo
hostNameIpDict = clusterInfo.initFromXml(self.xmlFile)
clusterDict = clusterInfo.getClusterDirectorys()
- backIpList = clusterInfo.getClusterBackIps()
- nodeNameList = clusterInfo.getClusterNodeNames()
+ self.nodeNameList = clusterInfo.getClusterNodeNames()
- # only support single az now.
- azNames = clusterInfo.getazNames()
- self.azName = "AZ1"
- if len(azNames) > 0:
- self.azName = azNames[0]
-
- self.localIp = backIpList[0]
- self.nodeNameList = nodeNameList
- self.backIpNameMap = {}
- for backip in backIpList:
- self.backIpNameMap[backip] = clusterInfo.getNodeNameByBackIp(backip)
-
- # check parameter node must in xml config file
- for nodeid in self.newHostList:
- if nodeid not in backIpList:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \
- nodeid)
-
- # get corepath and toolpath from xml file
+ # get corepath and toolpath from xml file
corePath = clusterInfo.readClustercorePath(self.xmlFile)
toolPath = clusterInfo.getToolPath(self.xmlFile)
# parse xml file and cache node info
@@ -175,20 +164,16 @@ General options:
clusterInfoDict["logPath"] = clusterDict["logPath"][0]
clusterInfoDict["corePath"] = corePath
clusterInfoDict["toolPath"] = toolPath
- for nodeName in nodeNameList:
+ for nodeName in self.nodeNameList:
hostInfo = hostNameIpDict[nodeName]
ipList = hostInfo[0]
portList = hostInfo[1]
- backIp = ""
- sshIp = ""
- if len(ipList) == 1:
- backIp = sshIp = ipList[0]
- elif len(ipList) == 2:
- backIp = ipList[0]
- sshIp = ipList[1]
+ backIp = ipList[0]
+ sshIp = ipList[1]
port = portList[0]
cluster = clusterDict[nodeName]
dataNode = cluster[2]
+ dbNode = clusterInfo.getDbNodeByName(nodeName)
clusterInfoDict[nodeName] = {
"backIp": backIp,
"sshIp": sshIp,
@@ -197,17 +182,17 @@ General options:
"localservice": int(port) + 4,
"heartBeatPort": int(port) + 3,
"dataNode": dataNode,
- "instanceType": -1
+ "instanceType": -1,
+ "azPriority": dbNode.azPriority
}
-
+
nodeIdList = clusterInfo.getClusterNodeIds()
for id in nodeIdList:
insType = clusterInfo.getdataNodeInstanceType(id)
hostName = clusterInfo.getHostNameByNodeId(id)
clusterInfoDict[hostName]["instanceType"] = insType
self.clusterInfoDict = clusterInfoDict
-
-
+
def initLogs(self):
"""
init log file
@@ -223,12 +208,108 @@ General options:
self.initLogger("gs_expansion")
self.logger.ignoreErr = True
+
+ def getExpansionInfo(self):
+ self._getClusterInfoDict()
+ self._getBackIpNameMap()
+ self._getHostAzNameMap()
+ self._getNewHostCasRoleMap()
+
+ def checkXmlIncludeNewHost(self):
+ """
+ check parameter node must in xml config file
+ """
+ backIpList = self.clusterInfo.getClusterBackIps()
+ for nodeIp in self.newHostList:
+ if nodeIp not in backIpList:
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \
+ nodeIp)
-class ExpansipnClusterInfo(dbClusterInfo):
+ def _getBackIpNameMap(self):
+ backIpList = self.clusterInfo.getClusterBackIps()
+ for backip in backIpList:
+ self.backIpNameMap[backip] = \
+ self.clusterInfo.getNodeNameByBackIp(backip)
+
+ def checkExecutingUser(self):
+ """
+ check whether current user executing this command is root
+ """
+ if os.getuid() != 0:
+ GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"])
+
+ def checkExecutingHost(self):
+ """
+ check whether current host is primary host
+ """
+ currentHost = socket.gethostname()
+ primaryHost = ""
+ for nodeName in self.nodeNameList:
+ if self.clusterInfoDict[nodeName]["instanceType"] \
+ == 0:
+ primaryHost = nodeName
+ break
+ if currentHost != primaryHost:
+ GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] % \
+ (currentHost + ", which is not primary."))
+
+ def checkTrust(self, hostList = None):
+ """
+ check trust between primary/current host and every host in hostList
+ """
+ if hostList == None:
+ hostList = self.nodeNameList
+ rootSSHExceptionHosts = []
+ individualSSHExceptionHosts = []
+ sshTool = SshTool(hostList, timeout = 0)
+ retmap, output = sshTool.getSshStatusOutput("pwd")
+ for host in hostList:
+ # check root's trust
+ if retmap[host] != DefaultValue.SUCCESS:
+ rootSSHExceptionHosts.append(host)
+ try:
+ sshTool.clenSshResultFiles()
+ except Exception as e:
+ self.logger.debug(str(e))
+ # check individual user's trust
+ checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, host)
+ (status, output) = subprocess.getstatusoutput(checkUserTrustCmd)
+ if status != 0:
+ individualSSHExceptionHosts.append(host)
+ # output ssh exception info if ssh connect failed
+ if rootSSHExceptionHosts or individualSSHExceptionHosts:
+ sshExceptionInfo = ""
+ if rootSSHExceptionHosts:
+ sshExceptionInfo += "\n"
+ sshExceptionInfo += ", ".join(rootSSHExceptionHosts)
+ sshExceptionInfo += " by root"
+ if individualSSHExceptionHosts:
+ sshExceptionInfo += "\n"
+ sshExceptionInfo += ", ".join(individualSSHExceptionHosts)
+ sshExceptionInfo += " by individual user"
+ GaussLog.exitWithError(ErrorCode.GAUSS_511["GAUSS_51100"] %
+ sshExceptionInfo)
+
+ def _getHostAzNameMap(self):
+ """
+ get azName of all hosts
+ """
+ for dbnode in self.clusterInfo.dbNodes:
+ self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName
+
+ def _getNewHostCasRoleMap(self):
+ """
+ get cascadeRole of newHosts
+ """
+ for dbnode in self.clusterInfo.dbNodes:
+ if dbnode.backIps[0] in self.newHostList:
+ self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole
+
+class ExpansionClusterInfo(dbClusterInfo):
def __init__(self):
dbClusterInfo.__init__(self)
-
+
def getToolPath(self, xmlFile):
"""
function : Read tool path from default xml file
@@ -246,13 +327,18 @@ class ExpansipnClusterInfo(dbClusterInfo):
checkPathVaild(toolPath)
return toolPath
+
if __name__ == "__main__":
"""
"""
expansion = Expansion()
+ expansion.checkExecutingUser()
expansion.parseCommandLine()
expansion.checkParameters()
expansion.initLogs()
+ expansion.getExpansionInfo()
+ expansion.checkXmlIncludeNewHost()
+ expansion.checkExecutingHost()
+ expansion.checkTrust()
expImpl = ExpansionImpl(expansion)
- expImpl.run()
-
+ expImpl.run()
\ No newline at end of file
diff --git a/script/gs_preinstall b/script/gs_preinstall
index 2bcf81ed1e90119f89f358d00d59794ea918e916..5cfdd2bfeab319b83c90303f0b74118812be7b0c 100644
--- a/script/gs_preinstall
+++ b/script/gs_preinstall
@@ -258,19 +258,6 @@ General options:
"be a directory."
% self.mpprcFile)
- try:
- # check the user if exist
- DefaultValue.getUserId(self.user)
- except Exception as e:
- mpprcFileTopPath = os.path.dirname(self.mpprcFile)
- # the mpprc file can not be specified in the /home/user directory
- userpath = "/home/%s/" % self.user
- if (mpprcFilePath.startswith(userpath)):
- GaussLog.exitWithError(
- ErrorCode.GAUSS_500["GAUSS_50004"] % '-sep-env-file' + \
- " Environment variable separation file can not be "
- "created under %s." % mpprcFileTopPath)
-
DefaultValue.checkMpprcFileChange(self.mpprcFile, "", self.mpprcFile)
(checkstatus, checkoutput) = DefaultValue.checkEnvFile(self.mpprcFile)
if (not checkstatus):
diff --git a/script/gs_upgradectl b/script/gs_upgradectl
index f4cddf5ac97f2ee7385b3adf54f591cf0fa998b7..7c4edd8d02e2386219dd8d51eb06e771473d3840 100644
--- a/script/gs_upgradectl
+++ b/script/gs_upgradectl
@@ -96,9 +96,7 @@ Usage:
gs_upgradectl -t chose-strategy [-l LOGFILE]
gs_upgradectl -t commit-upgrade -X XMLFILE [-l LOGFILE]
- gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE]
- gs_upgradectl -t auto-upgrade -X XMLFILE --grey [-l LOGFILE]
- {-h HOSTNAME | -g NODENUMBER | --continue}
+ gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE] [--grey]
gs_upgradectl -t auto-rollback -X XMLFILE [-l LOGFILE] [--force]
General options:
@@ -112,13 +110,8 @@ General options:
later version cluster.
--force Force to rollback when cluster status is
not normal
+ --grey Use grey-binary-upgrade
-Options for grey upgrade
- -h Under grey upgrade, specified nodes name in
- ssh mode.
- -g Under grey upgrade, upgrade node numbers.
- --continue Under grey upgrade, continue to upgrade
- remain nodes.
"""
print(self.usage.__doc__)
@@ -145,12 +138,6 @@ Options for grey upgrade
if "grey" in ParaDict.keys():
self.is_grey_upgrade = True
self.is_inplace_upgrade = False
- if "nodename" in ParaDict.keys():
- self.nodeNames = ParaDict.get("nodename")
- if "nodesNum" in ParaDict.keys():
- self.nodesNum = ParaDict.get("nodesNum")
- if "continue" in ParaDict.keys():
- self.upgrade_remain = True
if "force" in ParaDict.keys():
self.forceRollback = True
@@ -203,14 +190,6 @@ Options for grey upgrade
if not os.path.exists(self.xmlFile):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
self.xmlFile)
-
- # check parameter base on different action
- # check the param which input
- if self.action == Const.ACTION_AUTO_UPGRADE:
- if self.is_grey_upgrade:
- self.checkCommandConflicts(inplace=False)
- else:
- self.checkCommandConflicts()
# check mpprc file path
# get mpprcFile by MPPDB_ENV_SEPARATE_PATH. Even if the return value
# is "" or None, no need to pay attention
@@ -285,30 +264,6 @@ Options for grey upgrade
self.logger.debug("Retry distributing xml command, "
"the {0} time.".format(count))
- def checkCommandConflicts(self, inplace=True):
- """
- function: check the command line for conflict input
- :return:
- """
- conflictPara = 0
- if self.upgrade_remain:
- conflictPara += 1
- if len(self.nodeNames) != 0:
- conflictPara += 1
- if self.nodesNum != -1:
- conflictPara += 1
- if inplace:
- if conflictPara > 0:
- raise Exception("The parameter %s should be used in grey "
- "upgrade" % "'-continue, -h, -g'")
- else:
- if conflictPara > 1:
- GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50005"] % (
- "-continue", "-h, -g"))
- if conflictPara == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] %
- "-continue, -h, -g")
-
if __name__ == '__main__':
"""
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 7aaac20d0f320ea5d956a01f3e956068c3873b2c..6e3fdf04807665d9cef0cdcd3307ee7de56576a8 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -1453,7 +1453,7 @@ class dbClusterInfo():
dnInst.azName)
if dnInst.localRole == "Primary":
outText = outText + (
- "static_connections : %s\n\n" %
+ "static_connections : %s\n" %
dnInst.staticConnections)
outText = outText + (
"HA_state : %s\n" %
@@ -1462,14 +1462,14 @@ class dbClusterInfo():
"instance_role : %s\n" %
dnInst.localRole)
if dnInst.localRole == "Primary":
- outText = outText + "------------------------" \
+ outText = outText + "\n------------------------" \
"---------------" \
"--------------------------------\n\n"
continue
for i_loop in syncInfo:
- if i_loop[11] == '':
- i_loop[11] = 'Unknown'
if i_loop[0] == dnInst.listenIps[0]:
+ if i_loop[11] == '':
+ i_loop[11] = 'Unknown'
outText = outText + (
"HA_state : %s\n" %
i_loop[1])
@@ -1507,11 +1507,10 @@ class dbClusterInfo():
outText = outText + (
"upstream_nodeIp : %s\n" %
i_loop[12])
- outText = outText + ("\n")
- outText = outText + "------------------------" \
- "---------------" \
- "--------------------------------\n\n"
break
+ outText = outText + "\n------------------------" \
+ "---------------" \
+ "--------------------------------\n\n"
if nodeId != 0:
break
else:
@@ -1861,7 +1860,6 @@ class dbClusterInfo():
"receiver_received_location, receiver_write_location," \
"receiver_flush_location, receiver_replay_location," \
"sync_percent, channel from pg_stat_get_wal_receiver();"
- cascadeOutput = ""
if dbNode.name != localHostName:
cmd = "[need_replace_quotes] gsql -m -d postgres -p " \
"%s -A -t -c \"%s\"" % \
@@ -1872,7 +1870,7 @@ class dbClusterInfo():
"failed to connect") >= 0:
continue
else:
- output = cascadeOutput.split('\n')[1:-1]
+ cascadeOutput = cascadeOutput.split('\n')[1:-1]
else:
cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % (
dnInst.port, subsql)
@@ -1962,9 +1960,7 @@ class dbClusterInfo():
with open(fileName, "a") as fp:
fp.write(content)
fp.flush()
-
- else:
- sys.stdout.write(content)
+ sys.stdout.write(content)
def __checkOsUser(self, user):
"""
@@ -3188,36 +3184,24 @@ class dbClusterInfo():
"with cm and etcd") + errMsg)
# create a dictionary
nodeipport[dbNode.name] = [nodeips, nodeports]
- # delete redundant records
- self.__Deduplication(nodeports)
- self.__Deduplication(nodeips)
# check port and ip
self.__checkPortandIP(nodeips, nodeports, dbNode.name)
return nodeipport
- def __Deduplication(self, currentlist):
- """
- function : Delete the deduplication.
- input : []
- output : NA
- """
- currentlist.sort()
- for i in range(len(currentlist) - 2, -1, -1):
- if currentlist.count(currentlist[i]) > 1:
- del currentlist[i]
-
def __checkPortandIP(self, ips, ports, name):
"""
function : Check port and IP.
input : String,int,string
output : NA
"""
- for port in ports:
+ ipsCopy = list(set(ips))
+ portsCopy = list(set(ports))
+ for port in portsCopy:
if (not self.__isPortValid(port)):
raise Exception(ErrorCode.GAUSS_512["GAUSS_51233"]
% (port, name) + " Please check it.")
- for ip in ips:
+ for ip in ipsCopy:
if (not self.__isIpValid(ip)):
raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \
"The IP address is: %s." % ip + " Please "
diff --git a/script/gspylib/common/ErrorCode.py b/script/gspylib/common/ErrorCode.py
index 4f5ca1c05f49270ebcb801a8a7eb50000f8529bd..ad38a224cd21c34b588b3b1089f4c3e8763663d5 100644
--- a/script/gspylib/common/ErrorCode.py
+++ b/script/gspylib/common/ErrorCode.py
@@ -1112,8 +1112,8 @@ class ErrorCode():
"detail.",
"GAUSS_35704": "[GAUSS-35704] %s [%s] does not exist on node [%s].",
"GAUSS_35705": "[GAUSS-35705] Error, the database version is "
- "inconsistent in %s: %s"
-
+ "inconsistent in %s: %s",
+ "GAUSS_35706": "[GAUSS-35706] All new hosts %s failed."
}
##########################################################################
diff --git a/script/gspylib/common/ParameterParsecheck.py b/script/gspylib/common/ParameterParsecheck.py
index 1f2137e6ab004749b74d54b33d58213aa9dc356a..896609243ec00fb887994b3b730ea681211971ad 100644
--- a/script/gspylib/common/ParameterParsecheck.py
+++ b/script/gspylib/common/ParameterParsecheck.py
@@ -130,7 +130,7 @@ gs_upgradectl_chose_strategy = ["-t:", "-?", "--help", "-V", "--version",
"-l:"]
# auto-upgrade parameter lists
gs_upgradectl_auto_upgrade = ["-t:", "-?", "--help", "-V", "--version", "-l:",
- "-X:", "--grey", "-h:", "-g:", "--continue"]
+ "-X:", "--grey"]
# auto-rollback parameter lists
gs_upgradectl_auto_rollback = ["-t:", "-?", "--help", "-V", "--version",
"-l:", "-X:", "--force"]
diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py
index d2a85d94039ab79108352c4b549c016358c4600d..0fe15f1b2f6d3abdf8182af398c2df7910993ac5 100644
--- a/script/gspylib/component/Kernel/Kernel.py
+++ b/script/gspylib/component/Kernel/Kernel.py
@@ -129,7 +129,14 @@ class Kernel(BaseComponent):
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
"instance" + " Error: \n%s." % output)
if output.find("No such process") > 0:
- GaussLog.exitWithError(output)
+ cmd = "ps c -eo pid,euid,cmd | grep gaussdb | grep -v grep | " \
+ "awk '{if($2 == curuid && $1!=\"-n\") " \
+ "print \"/proc/\"$1\"/cwd\"}' curuid=`id -u`|" \
+ " xargs ls -l |awk '{if ($NF==\"%s\") print $(NF-2)}' | " \
+ "awk -F/ '{print $3 }'" % (self.instInfo.datadir)
+ (status, rightpid) = subprocess.getstatusoutput(cmd)
+ if rightpid or status != 0:
+ GaussLog.exitWithError(output)
def isPidFileExist(self):
pidFile = "%s/postmaster.pid" % self.instInfo.datadir
diff --git a/script/gspylib/inspection/lib/checknetspeed/speed_test b/script/gspylib/inspection/lib/checknetspeed/speed_test
index 1607f8f1ed6bd9f9097683c096020a4dabeff588..6505de566f1f03af1ee79e1af3b05353c9b917f2 100644
--- a/script/gspylib/inspection/lib/checknetspeed/speed_test
+++ b/script/gspylib/inspection/lib/checknetspeed/speed_test
@@ -25,45 +25,51 @@ listen_port = 31111
run_mode = 0 # 0:connect, 1:send, 2:recv
def send_main():
- global listen_ip
- global listen_port
- buf = "this is a test !" * 512 # buf 8192 block
- sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
- print(listen_ip+":"+listen_port)
- while(sockets.connect_ex((listen_ip, int(listen_port))) != 0):
- print("connect failed:%m\n")
- time.sleep(1)
- print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
- print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
- i = 0
- while True:
- i = i + 1
- n = sockets.send(buf.encode())
- if n == 0:
- print("send failed:%m\n")
- break
- print("%d send:%s, len=%d\n", i, buf, n)
+ try:
+ global listen_ip
+ global listen_port
+ buf = "this is a test !" * 512 # buf 8192 block
+ sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+ print(listen_ip+":"+listen_port)
+ while(sockets.connect_ex((listen_ip, int(listen_port))) != 0):
+ print("connect failed:%m\n")
+ time.sleep(1)
+ print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
+ print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
+ i = 0
+ while True:
+ i = i + 1
+ n = sockets.send(buf.encode())
+ if n == 0:
+ print("send failed:%m\n")
+ break
+ print("%d send:%s, len=%d\n", i, buf, n)
+ except Exception as e:
+ print(str(e))
def recv_main():
- global listen_ip
- global listen_port
- sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True)
- sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
- sockets.bind((listen_ip, int(listen_port)))
- sockets.listen(128)
- while True:
- client, addr = sockets.accept()
- print('client:', client)
- print('addr:', addr)
+ try:
+ global listen_ip
+ global listen_port
+ sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True)
+ sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+ sockets.bind((listen_ip, int(listen_port)))
+ sockets.listen(128)
while True:
- data = client.recv(8192)
- print(data.decode())
- if not data:
- client.close()
- break
+ client, addr = sockets.accept()
+ print('client:', client)
+ print('addr:', addr)
+ while True:
+ data = client.recv(8192)
+ print(data.decode())
+ if not data:
+ client.close()
+ break
+ except Exception as e:
+ print(str(e))
def connect_main():
sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py
index 40375e63cacf10023076080dd427cdd782d4c0e5..495ec08b4dbd70ba0addaed76adc2a813e9bd163 100644
--- a/script/impl/expansion/ExpansionImpl.py
+++ b/script/impl/expansion/ExpansionImpl.py
@@ -46,10 +46,17 @@ DefaultValue.doConfigForParamiko()
import paramiko
-#mode
+#boot/build mode
MODE_PRIMARY = "primary"
MODE_STANDBY = "standby"
MODE_NORMAL = "normal"
+MODE_CASCADE = "cascade_standby"
+
+# instance local_role
+ROLE_NORMAL = "normal"
+ROLE_PRIMARY = "primary"
+ROLE_STANDBY = "standby"
+ROLE_CASCADE = "cascade standby"
#db state
STAT_NORMAL = "normal"
@@ -78,7 +85,10 @@ class ExpansionImpl():
self.user = self.context.user
self.group = self.context.group
-
+ self.existingHosts = []
+ self.expansionSuccess = {}
+ for newHost in self.context.newHostList:
+ self.expansionSuccess[newHost] = False
self.logger = self.context.logger
envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH")
@@ -102,32 +112,26 @@ class ExpansionImpl():
"""
create software dir and send it on each nodes
"""
- self.logger.debug("Start to send soft to each standby nodes.\n")
- hostNames = self.context.newHostList
- hostList = hostNames
-
- sshTool = SshTool(hostNames)
-
+ self.logger.log("Start to send soft to each standby nodes.")
srcFile = self.context.packagepath
- targetDir = os.path.realpath(
- os.path.join(srcFile, "../"))
-
- ## mkdir package dir and send package to remote nodes.
- sshTool.executeCommand("mkdir -p %s" % srcFile , "", DefaultValue.SUCCESS,
- hostList)
- sshTool.scpFiles(srcFile, targetDir, hostList)
-
- ## change mode of package dir to set privileges for users
+ targetDir = os.path.realpath(os.path.join(srcFile, "../"))
+ # change mode of package dir to set privileges for users
tPathList = os.path.split(targetDir)
path2ChangeMode = targetDir
if len(tPathList) > 2:
path2ChangeMode = os.path.join(tPathList[0],tPathList[1])
- changeModCmd = "chmod -R a+x {srcFile}".format(user=self.user,
- group=self.group,srcFile=path2ChangeMode)
- sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS,
- hostList)
- self.logger.debug("End to send soft to each standby nodes.\n")
- self.cleanSshToolFile(sshTool)
+ changeModCmd = "chmod -R a+x {srcFile}".format(user = self.user,
+ group = self.group, srcFile = path2ChangeMode)
+ for host in self.context.newHostList:
+ sshTool = SshTool([host], timeout = 300)
+ # mkdir package dir and send package to remote nodes.
+ sshTool.executeCommand("mkdir -p %s" % srcFile , "",
+ DefaultValue.SUCCESS, [host])
+ sshTool.scpFiles(srcFile, targetDir, [host])
+ sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS,
+ [host])
+ self.cleanSshToolFile(sshTool)
+ self.logger.log("End to send soft to each standby nodes.")
def generateAndSendXmlFile(self):
"""
@@ -151,7 +155,7 @@ class ExpansionImpl():
fo.write( xmlContent )
fo.close()
# send single deploy xml file to each standby node
- sshTool = SshTool(host)
+ sshTool = SshTool([host])
retmap, output = sshTool.getSshStatusOutput("mkdir -p %s" %
self.tempFileDir , [host], self.envFile)
retmap, output = sshTool.getSshStatusOutput("chown %s:%s %s" %
@@ -167,6 +171,7 @@ class ExpansionImpl():
"""
nodeName = self.context.backIpNameMap[backIp]
nodeInfo = self.context.clusterInfoDict[nodeName]
+ clusterName = self.context.clusterInfo.name
backIp = nodeInfo["backIp"]
sshIp = nodeInfo["sshIp"]
@@ -181,12 +186,14 @@ class ExpansionImpl():
tmpMppdbPath = DefaultValue.getEnv("PGHOST")
if tmpMppdbPath:
mppdbconfig = '' % tmpMppdbPath
+ azName = self.context.hostAzNameMap[backIp]
+ azPriority = nodeInfo["azPriority"]
xmlConfig = """\
-
+
@@ -197,10 +204,10 @@ class ExpansionImpl():
-
+
-
+
@@ -210,10 +217,10 @@ class ExpansionImpl():
- """.format(nodeName=nodeName,backIp=backIp,appPath=appPath,
- logPath=logPath,toolPath=toolPath,corePath=corePath,
- sshIp=sshIp,port=port,dataNode=dataNode,azName=self.context.azName,
- mappdbConfig=mppdbconfig)
+ """.format(clusterName = clusterName, nodeName = nodeName, backIp = backIp,
+ appPath = appPath, logPath = logPath, toolPath = toolPath, corePath = corePath,
+ sshIp = sshIp, port = port, dataNode = dataNode, azName = azName,
+ azPriority = azPriority, mappdbConfig = mppdbconfig)
return xmlConfig
def changeUser(self):
@@ -247,36 +254,62 @@ class ExpansionImpl():
self.logger.log("Authentication failed.")
self.initSshConnect(host, user)
+ def hasNormalStandbyInAZOfCascade(self, cascadeIp, existingStandbys):
+ # check whether there are normal standbies in hostAzNameMap[cascadeIp] azZone
+ hasStandbyWithSameAZ = False
+ hostAzNameMap = self.context.hostAzNameMap
+ for existingStandby in existingStandbys:
+ existingStandbyName = self.context.backIpNameMap[existingStandby]
+ existingStandbyDataNode = \
+ self.context.clusterInfoDict[existingStandbyName]["dataNode"]
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ existingStandby, existingStandbyDataNode, self.envFile)
+ if dbStat != STAT_NORMAL:
+ continue
+ if hostAzNameMap[cascadeIp] != hostAzNameMap[existingStandby]:
+ continue
+ hasStandbyWithSameAZ = True
+ break
+ return hasStandbyWithSameAZ
+
def installDatabaseOnHosts(self):
"""
install database on each standby node
"""
- hostList = self.context.newHostList
- envfile = self.envFile
+ standbyHosts = self.context.newHostList
tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir
- installCmd = "source {envfile} ; gs_install -X {xmlfile} \
- 2>&1".format(envfile=envfile,xmlfile=tempXmlFile)
-
- statusArr = []
-
- for newHost in hostList:
-
- self.logger.log("\ninstalling database on node %s:" % newHost)
- self.logger.debug(installCmd)
-
+ installCmd = "source {envFile} ; gs_install -X {xmlFile} "\
+ "2>&1".format(envFile = self.envFile, xmlFile = tempXmlFile)
+ self.logger.debug(installCmd)
+ primaryHostName = self.getPrimaryHostName()
+ primaryHostIp = self.context.clusterInfoDict[primaryHostName]["backIp"]
+ existingStandbys = list(set(self.existingHosts) - (set([primaryHostIp])))
+ failedInstallHosts = []
+ notInstalledCascadeHosts = []
+ for newHost in standbyHosts:
+ if not self.expansionSuccess[newHost]:
+ continue
+ self.logger.log("Installing database on node %s:" % newHost)
hostName = self.context.backIpNameMap[newHost]
sshIp = self.context.clusterInfoDict[hostName]["sshIp"]
+ if self.context.newHostCasRoleMap[newHost] == "on":
+ # check whether there are normal standbies in hostAzNameMap[host] azZone
+ hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(newHost,
+ existingStandbys)
+ if not hasStandbyWithSameAZ:
+ notInstalledCascadeHosts.append(newHost)
+ self.expansionSuccess[newHost] = False
+ continue
self.initSshConnect(sshIp, self.user)
-
stdin, stdout, stderr = self.sshClient.exec_command(installCmd,
- get_pty=True)
+ get_pty=True)
channel = stdout.channel
echannel = stderr.channel
while not channel.exit_status_ready():
try:
recvOut = channel.recv(1024)
- outDecode = recvOut.decode("utf-8");
+ outDecode = recvOut.decode("utf-8")
outStr = outDecode.strip()
if(len(outStr) == 0):
continue
@@ -309,187 +342,200 @@ class ExpansionImpl():
not channel.recv_ready():
channel.close()
break
-
stdout.close()
stderr.close()
- status = channel.recv_exit_status()
- statusArr.append(status)
-
- isBothSuccess = True
- for status in statusArr:
- if status != 0:
- isBothSuccess = False
- break
- if isBothSuccess:
- self.logger.log("\nSuccessfully install database on node %s" %
- hostList)
- else:
- sys.exit(1)
+ if channel.recv_exit_status() != 0:
+ self.expansionSuccess[newHost] = False
+ failedInstallHosts.append(newHost)
+ else:
+ if self.context.newHostCasRoleMap[newHost] == "off":
+ existingStandbys.append(newHost)
+ self.logger.log("%s install success." % newHost)
+ if notInstalledCascadeHosts:
+ self.logger.log("OpenGauss won't be installed on cascade_standby"
+ " %s, because there is no Normal standby in the same azZone." %
+ ", ".join(notInstalledCascadeHosts))
+ if failedInstallHosts:
+ self.logger.log(ErrorCode.GAUSS_527["GAUSS_52707"] %
+ ", ".join(failedInstallHosts))
+ self.logger.log("Finish to install database on all nodes.")
+ if self._isAllFailed():
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "install")
def preInstallOnHosts(self):
"""
execute preinstall step
"""
- self.logger.debug("Start to preinstall database step.\n")
- newBackIps = self.context.newHostList
- newHostNames = []
- for host in newBackIps:
- newHostNames.append(self.context.backIpNameMap[host])
- envfile = self.envFile
+ self.logger.log("Start to preinstall database step.")
tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir
- userpath = pwd.getpwnam(self.user).pw_dir
- mpprcFile = os.path.join(userpath, ".bashrc")
- if envfile == mpprcFile:
- preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \
- -X {xmlfile} --non-interactive 2>&1\
- ".format(softpath=self.context.packagepath,user=self.user,
- group=self.group,xmlfile=tempXmlFile)
+ if not DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"):
+ preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\
+ "-X {xmlFile} --non-interactive 2>&1".format(
+ softPath = self.context.packagepath, user = self.user,
+ group = self.group, xmlFile = tempXmlFile)
else:
- preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \
- -X {xmlfile} --sep-env-file={envfile} \
- --non-interactive 2>&1\
- ".format(softpath=self.context.packagepath,user=self.user,
- group=self.group,xmlfile=tempXmlFile,envfile=envfile)
-
- sshTool = SshTool(newHostNames)
-
- status, output = sshTool.getSshStatusOutput(preinstallCmd , [], envfile)
- statusValues = status.values()
- if STATUS_FAIL in statusValues:
- GaussLog.exitWithError(output)
-
- self.logger.debug("End to preinstall database step.\n")
- self.cleanSshToolFile(sshTool)
-
+ preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\
+ "-X {xmlFile} --sep-env-file={envFile} --non-interactive 2>&1".format(
+ softPath = self.context.packagepath, user = self.user,
+ group = self.group, xmlFile = tempXmlFile, envFile = self.envFile)
+
+ failedPreinstallHosts = []
+ for host in self.context.newHostList:
+ sshTool = SshTool([host], timeout = 300)
+ resultMap, output = sshTool.getSshStatusOutput(preinstallCmd, [], self.envFile)
+ if resultMap[host] == DefaultValue.SUCCESS:
+ self.expansionSuccess[host] = True
+ self.logger.log("Preinstall %s success" % host)
+ else:
+ failedPreinstallHosts.append(host)
+ self.cleanSshToolFile(sshTool)
+ if failedPreinstallHosts:
+ self.logger.log("Failed to preinstall on: \n%s" % ", ".join(failedPreinstallHosts))
+ self.logger.log("End to preinstall database step.")
def buildStandbyRelation(self):
"""
func: after install single database on standby nodes.
build the relation with primary and standby nodes.
step:
- 1. restart primary node with Primary Mode
- (only used to Single-Node instance)
- 2. set guc config to primary node
- 3. restart standby node with Standby Mode
- 4. set guc config to standby node
- 5. generate cluster static file and send to each node.
- """
- self.queryPrimaryClusterDetail()
- self.setPrimaryGUCConfig()
- self.setStandbyGUCConfig()
- self.addTrustOnExistNodes()
- self.restartSingleDbWithPrimaryMode()
+ 1. set all nodes' guc config parameter: replconninfo, available_zone(only for new)
+ 2. add trust on all hosts
+ 3. generate GRPC cert on new hosts, and primary if current cluster is single instance
+ 4. build new hosts :
+ (1) restart new instance with standby mode
+ (2) build new instances
+ 5. rollback guc config of existing hosts if build failed
+ 6. generate cluster static file and send to each node.
+ """
+ self.setGucConfig()
+ self.addTrust()
+ self.generateGRPCCert()
self.buildStandbyHosts()
+ self.rollback()
self.generateClusterStaticFile()
- def queryPrimaryClusterDetail(self):
+ def getExistingHosts(self):
"""
- get current cluster type.
- single-node or primary-standby
+ get the exiting hosts
"""
- self.logger.debug("Query primary database instance mode.\n")
- self.isSingleNodeInstance = True
+ self.logger.debug("Get the existing hosts.")
primaryHost = self.getPrimaryHostName()
- result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
- instance = re.findall(r"node\s+node_ip\s+instance\s+state", result)
- if len(instance) > 1:
- self.isSingleNodeInstance = False
- self.logger.debug("Original instance mode is %s" %
- self.isSingleNodeInstance)
+ command = ""
+ if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"):
+ command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
+ (self.user, self.envFile)
+ else:
+ command = "su - %s -c 'source /etc/profile;source /home/%s/.bashrc;"\
+ "gs_om -t status --detail'" % (self.user, self.user)
+ sshTool = SshTool([primaryHost])
+ resultMap, outputCollect = sshTool.getSshStatusOutput(command,
+ [primaryHost], self.envFile)
+ self.logger.debug(outputCollect)
+ if resultMap[primaryHost] != DefaultValue.SUCCESS:
+ GaussLog.exitWithError("Unable to query current cluster state.")
+ instances = re.split('(?:\|)|(?:\n)', outputCollect)
+ self.existingHosts = []
+ pattern = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*')
+ for inst in instances:
+ existingHosts = pattern.findall(inst)
+ if len(existingHosts) != 0:
+ self.existingHosts.append(existingHosts[0])
- def setPrimaryGUCConfig(self):
+ def setGucConfig(self):
"""
+ set replconninfo on all hosts
"""
- self.logger.debug("Start to set primary node GUC config.\n")
- primaryHost = self.getPrimaryHostName()
+ self.logger.debug("Start to set GUC config on all hosts.\n")
+ gucDict = self.getGUCConfig()
+ tempShFile = "%s/guc.sh" % self.tempFileDir
+ hostIpList = list(self.existingHosts)
+ for host in self.expansionSuccess:
+ hostIpList.append(host)
- self.setGUCOnClusterHosts([primaryHost])
- self.addStandbyIpInPrimaryConf()
-
-
- def setStandbyGUCConfig(self):
- """
- set the expansion standby node db guc config
- """
- self.logger.debug("Stat to set standby node GUC config.\n")
- nodeList = self.context.nodeNameList
- primaryHost = self.getPrimaryHostName()
- standbyHostNames = list(set(nodeList).difference(set([primaryHost])))
- self.setGUCOnClusterHosts(standbyHostNames)
-
- def addTrustOnExistNodes(self):
- """
- add host trust in pg_hba.conf on existing standby node.
- """
- self.logger.debug("Start to set host trust on existing node.")
- allNodeNames = self.context.nodeNameList
- newNodeIps = self.context.newHostList
- newNodeNames = []
- trustCmd = []
- for node in newNodeIps:
- nodeName = self.context.backIpNameMap[node]
- newNodeNames.append(nodeName)
- cmd = 'host all all %s/32 trust' % node
- trustCmd.append(cmd)
- existNodes = list(set(allNodeNames).difference(set(newNodeNames)))
- for node in existNodes:
- dataNode = self.context.clusterInfoDict[node]["dataNode"]
- cmd = ""
- for trust in trustCmd:
- cmd += "source %s; gs_guc set -D %s -h '%s';" % \
- (self.envFile, dataNode, trust)
- sshTool = SshTool([node])
- resultMap, outputCollect = sshTool.getSshStatusOutput(cmd,
- [node], self.envFile)
+ nodeDict = self.context.clusterInfoDict
+ backIpNameMap = self.context.backIpNameMap
+ hostAzNameMap = self.context.hostAzNameMap
+ for host in hostIpList:
+ hostName = backIpNameMap[host]
+ # set Available_zone for the new standby
+ if host in self.context.newHostList:
+ dataNode = nodeDict[hostName]["dataNode"]
+ gucDict[hostName] += """\
+gs_guc set -D {dn} -c "available_zone='{azName}'"
+ """.format(dn=dataNode, azName=hostAzNameMap[host])
+ command = "source %s ; " % self.envFile + gucDict[hostName]
+ self.logger.debug("[%s] gucCommand:%s" % (host, command))
+
+ sshTool = SshTool([host])
+ # create temporary dir to save guc command bashfile.
+ mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
+ (self.tempFileDir, self.user, self.group, self.tempFileDir)
+ sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile)
+ subprocess.getstatusoutput("touch %s; cat /dev/null > %s" %
+ (tempShFile, tempShFile))
+ with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
+ stat.S_IWUSR | stat.S_IRUSR), 'w') as fo:
+ fo.write("#bash\n")
+ fo.write(command)
+ fo.close()
+
+ # send guc command bashfile to each host and execute it.
+ sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host],
+ self.envFile)
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ "sh %s" % tempShFile, [host], self.envFile)
+
+ self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
- self.logger.debug("End to set host trust on existing node.")
-
- def restartSingleDbWithPrimaryMode(self):
+
+ def addTrust(self):
+ """
+ add authentication rules about new host ip in existing hosts and
+ add authentication rules about other all hosts ip in new hosts
+ """
+ self.logger.debug("Start to set host trust on all node.")
+ allHosts = self.existingHosts + self.context.newHostList
+ for hostExec in allHosts:
+ hostExecName = self.context.backIpNameMap[hostExec]
+ dataNode = self.context.clusterInfoDict[hostExecName]["dataNode"]
+ cmd = "source %s;gs_guc set -D %s" % (self.envFile, dataNode)
+ if hostExec in self.existingHosts:
+ for hostParam in self.context.newHostList:
+ cmd += " -h 'host all all %s/32 trust'" % \
+ hostParam
+ else:
+ for hostParam in allHosts:
+ if hostExec != hostParam:
+ cmd += " -h 'host all all %s/32 trust'" % \
+ hostParam
+ self.logger.debug("[%s] trustCmd:%s" % (hostExec, cmd))
+ sshTool = SshTool([hostExec])
+ resultMap, outputCollect = sshTool.getSshStatusOutput(cmd,
+ [hostExec], self.envFile)
+ self.cleanSshToolFile(sshTool)
+ self.logger.debug("End to set host trust on all node.")
+
+ def generateGRPCCert(self):
"""
+ generate GRPC cert for single node
"""
primaryHost = self.getPrimaryHostName()
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
-
+ needGRPCHosts = []
+ for host in self.expansionSuccess:
+ if self.expansionSuccess[host]:
+ needGRPCHosts.append(host)
insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
- dataNode,self.envFile)
+ dataNode,self.envFile)
if insType != MODE_PRIMARY:
- self.commonGsCtl.stopInstance(primaryHost, dataNode, self.envFile)
- self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
- MODE_PRIMARY,self.envFile)
-
- # start db to primary state for three times max
- start_retry_num = 1
- while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
- dataNode, self.envFile)
- if insType == MODE_PRIMARY:
- break
- self.logger.debug("Start database as Primary mode failed, \
-retry for %s times" % start_retry_num)
- self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
- MODE_PRIMARY, self.envFile)
- start_retry_num = start_retry_num + 1
-
- def addStandbyIpInPrimaryConf(self):
- """
- add standby hosts ip in primary node pg_hba.conf
- """
-
- standbyHosts = self.context.newHostList
- primaryHost = self.getPrimaryHostName()
- command = ''
- for host in standbyHosts:
- hostName = self.context.backIpNameMap[host]
- dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- command += ("source %s; gs_guc set -D %s -h 'host all all %s/32 " + \
- "trust';") % (self.envFile, dataNode, host)
- self.logger.debug(command)
- sshTool = SshTool([primaryHost])
- resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [primaryHost], self.envFile)
- self.logger.debug(outputCollect)
- self.cleanSshToolFile(sshTool)
+ primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"]
+ needGRPCHosts.append(primaryHostIp)
+ self.logger.debug("Start to generate GRPC cert.")
+ if needGRPCHosts:
+ self.context.initSshTool(needGRPCHosts)
+ self.context.createGrpcCa(needGRPCHosts)
+ self.logger.debug("End to generate GRPC cert.")
def reloadPrimaryConf(self):
"""
@@ -500,7 +546,7 @@ retry for %s times" % start_retry_num)
sshTool = SshTool([primaryHost])
self.logger.debug(command)
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [primaryHost], self.envFile)
+ [primaryHost], self.envFile)
self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
@@ -510,7 +556,7 @@ retry for %s times" % start_retry_num)
primaryHost = ""
for nodeName in self.context.nodeNameList:
if self.context.clusterInfoDict[nodeName]["instanceType"] \
- == MASTER_INSTANCE:
+ == MASTER_INSTANCE:
primaryHost = nodeName
break
return primaryHost
@@ -520,56 +566,121 @@ retry for %s times" % start_retry_num)
"""
stop the new standby host`s database and build it as standby mode
"""
- self.logger.debug("start to build standby node...\n")
+ self.logger.debug("Start to build new nodes.")
standbyHosts = self.context.newHostList
+ hostAzNameMap = self.context.hostAzNameMap
+ primaryHostName = self.getPrimaryHostName()
+ primaryHost = self.context.clusterInfoDict[primaryHostName]["backIp"]
+ existingStandbys = list(set(self.existingHosts).difference(set([primaryHost])))
+ primaryDataNode = self.context.clusterInfoDict[primaryHostName]["dataNode"]
+ self.reloadPrimaryConf()
+ time.sleep(10)
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ primaryHost, primaryDataNode, self.envFile)
+ primaryExceptionInfo = ""
+ if insType != ROLE_PRIMARY:
+ primaryExceptionInfo = "The server mode of primary host" \
+ "is not primary."
+ if dbStat != STAT_NORMAL:
+ primaryExceptionInfo = "The primary is not in Normal state."
+ if primaryExceptionInfo != "":
+ self.rollback()
+ GaussLog.exitWithError(primaryExceptionInfo)
for host in standbyHosts:
+ if not self.expansionSuccess[host]:
+ continue
hostName = self.context.backIpNameMap[host]
dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
-
+ buildMode = ""
+ hostRole = ""
+ if self.context.newHostCasRoleMap[host] == "on":
+ buildMode = MODE_CASCADE
+ hostRole = ROLE_CASCADE
+ # check whether there are normal standbies in hostAzNameMap[host] azZone
+ hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(host,
+ existingStandbys)
+ if not hasStandbyWithSameAZ:
+ self.logger.log("There is no Normal standby in %s" %
+ hostAzNameMap[host])
+ self.expansionSuccess[host] = False
+ continue
+ else:
+ buildMode = MODE_STANDBY
+ hostRole = ROLE_STANDBY
+ self.logger.log("Start to build %s %s." % (hostRole, host))
self.checkTmpDir(hostName)
-
+ # start new host as standby mode
self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile)
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
-
- # start standby as standby mode for three times max.
- start_retry_num = 1
- while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if insType != MODE_STANDBY:
- self.logger.debug("Start database as Standby mode failed, \
-retry for %s times" % start_retry_num)
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
- start_retry_num = start_retry_num + 1
+ result, output = self.commonGsCtl.startInstanceWithMode(host,
+ dataNode, MODE_STANDBY, self.envFile)
+ if result[host] != DefaultValue.SUCCESS:
+ if "uncompleted build is detected" not in output:
+ self.expansionSuccess[host] = False
+ self.logger.log("Failed to start %s as standby "
+ "before building." % host)
else:
+ self.logger.debug("Uncompleted build is detected on %s." %
+ host)
+ else:
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName, dataNode, self.envFile)
+ if insType != ROLE_STANDBY:
+ self.logger.log("Build %s failed." % host)
+ self.expansionSuccess[host] = False
+ continue
+
+ # build new host
+ sshTool = SshTool([host])
+ tempShFile = "%s/buildStandby.sh" % self.tempFileDir
+ # create temporary dir to save gs_ctl build command bashfile.
+ mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
+ (self.tempFileDir, self.user, self.group, self.tempFileDir)
+ sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile)
+ subprocess.getstatusoutput("touch %s; cat /dev/null > %s" %
+ (tempShFile, tempShFile))
+ buildCmd = "gs_ctl build -D %s -M %s" % (dataNode, buildMode)
+ gs_ctlBuildCmd = "source %s ;nohup " % self.envFile + buildCmd + " 1>/dev/null 2>/dev/null &"
+ self.logger.debug("[%s] gs_ctlBuildCmd: %s" % (host, gs_ctlBuildCmd))
+ with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
+ stat.S_IWUSR | stat.S_IRUSR),'w') as fo:
+ fo.write("#bash\n")
+ fo.write(gs_ctlBuildCmd)
+ fo.close()
+ # send gs_ctlBuildCmd bashfile to the standby host and execute it.
+ sshTool.scpFiles(tempShFile, tempShFile, [host], self.envFile)
+ resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \
+ tempShFile, [host], self.envFile)
+ if resultMap[host] != DefaultValue.SUCCESS:
+ self.expansionSuccess[host] = False
+ self.logger.debug("Failed to send gs_ctlBuildCmd bashfile "
+ "to %s." % host)
+ self.logger.log("Build %s %s failed." % (hostRole, host))
+ continue
+ # check whether build process has finished
+ checkProcessExistCmd = "ps x"
+ while True:
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ checkProcessExistCmd, [host])
+ if buildCmd not in outputCollect:
break
-
- # build standby node
- self.addStandbyIpInPrimaryConf()
- self.reloadPrimaryConf()
- self.commonGsCtl.buildInstance(hostName, dataNode, MODE_STANDBY,
- self.envFile)
-
- # if build failed first time. retry for three times.
- start_retry_num = 1
- while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if dbStat != STAT_NORMAL:
- self.logger.debug("Build standby instance failed, \
-retry for %s times" % start_retry_num)
- self.addStandbyIpInPrimaryConf()
- self.reloadPrimaryConf()
- self.commonGsCtl.buildInstance(hostName, dataNode,
- MODE_STANDBY, self.envFile)
- start_retry_num = start_retry_num + 1
else:
- break
-
+ time.sleep(10)
+ # check build result after build process finished
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName, dataNode, self.envFile)
+ if insType == hostRole and dbStat == STAT_NORMAL:
+ if self.context.newHostCasRoleMap[host] == "off":
+ existingStandbys.append(host)
+ self.logger.log("Build %s %s success." % (hostRole, host))
+ else:
+ self.expansionSuccess[host] = False
+ self.logger.log("Build %s %s failed." % (hostRole, host))
+ if self._isAllFailed():
+ self.rollback()
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "build")
+
def checkTmpDir(self, hostName):
"""
if the tmp dir id not exist, create it.
@@ -590,7 +701,7 @@ retry for %s times" % start_retry_num)
"""
generate static_config_files and send to all hosts
"""
- self.logger.debug("Start to generate and send cluster static file.\n")
+ self.logger.log("Start to generate and send cluster static file.")
primaryHost = self.getPrimaryHostName()
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
@@ -599,9 +710,11 @@ retry for %s times" % start_retry_num)
nodeIp = nodeInfo["backIp"]
dataNode = nodeInfo["dataNode"]
exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode)
+ dbNode = self.context.clusterInfo.getDbNodeByName(nodeName)
if not re.search(exist_reg, result) and nodeIp not in self.context.newHostList:
self.logger.debug("The node ip [%s] will not be added to cluster." % nodeIp)
- dbNode = self.context.clusterInfo.getDbNodeByName(nodeName)
+ self.context.clusterInfo.dbNodes.remove(dbNode)
+ if nodeIp in self.context.newHostList and not self.expansionSuccess[nodeIp]:
self.context.clusterInfo.dbNodes.remove(dbNode)
toolPath = self.context.clusterInfoDict["toolPath"]
@@ -610,7 +723,7 @@ retry for %s times" % start_retry_num)
static_config_dir = "%s/script/static_config_files" % toolPath
if not os.path.exists(static_config_dir):
os.makedirs(static_config_dir)
-
+
# valid if dynamic config file exists.
dynamic_file = "%s/bin/cluster_dynamic_config" % appPath
dynamic_file_exist = False
@@ -632,104 +745,38 @@ retry for %s times" % start_retry_num)
if dynamic_file_exist:
refresh_cmd = "gs_om -t refreshconf"
hostSsh.getSshStatusOutput(refresh_cmd, [hostName], self.envFile)
-
self.cleanSshToolFile(hostSsh)
-
- self.logger.debug("End to generate and send cluster static file.\n")
- time.sleep(10)
-
- # Single-node database need start cluster after expansion
- if self.isSingleNodeInstance:
- primaryHost = self.getPrimaryHostName()
- self.logger.debug("Single-Node instance need restart.\n")
- self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
-
- # if primary database not normal, restart it
- dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
- dataNode, self.envFile)
- if insType != MODE_PRIMARY:
- self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
- MODE_PRIMARY, self.envFile)
- # if stat if not normal,rebuild standby database
- standbyHosts = self.context.newHostList
- for host in standbyHosts:
- hostName = self.context.backIpNameMap[host]
- dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if dbStat != STAT_NORMAL:
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
-
- self.commonGsCtl.startOmCluster(primaryHost, self.envFile)
-
- def setGUCOnClusterHosts(self, hostNames=[]):
- """
- guc config on all hosts
- """
-
- gucDict = self.getGUCConfig()
+ self.logger.log("End to generate and send cluster static file.\n")
- tempShFile = "%s/guc.sh" % self.tempFileDir
-
- if len(hostNames) == 0:
- hostNames = self.context.nodeNameList
-
- for host in hostNames:
-
- command = "source %s ; " % self.envFile + gucDict[host]
-
- self.logger.debug(command)
-
- sshTool = SshTool([host])
-
- # create temporary dir to save guc command bashfile.
- mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
- (self.tempFileDir,self.user,self.group,self.tempFileDir)
- retmap, output = sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile)
-
- subprocess.getstatusoutput("mkdir -m a+x -p %s; touch %s; \
- cat /dev/null > %s" % \
- (self.tempFileDir, tempShFile, tempShFile))
- with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
- stat.S_IWUSR | stat.S_IRUSR),'w') as fo:
- fo.write("#bash\n")
- fo.write( command )
- fo.close()
-
- # send guc command bashfile to each host and execute it.
- sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host],
- self.envFile)
-
- resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \
- tempShFile, [host], self.envFile)
-
- self.logger.debug(outputCollect)
- self.cleanSshToolFile(sshTool)
+ self.logger.log("Expansion results:")
+ for newHost in self.context.newHostList:
+ if self.expansionSuccess[newHost]:
+ self.logger.log("%s:\tSuccess" % newHost)
+ else:
+ self.logger.log("%s:\tFailed" % newHost)
def getGUCConfig(self):
"""
get guc config of each node:
replconninfo[index]
- remote_read_mode
- replication_type
"""
- nodeDict = self.context.clusterInfoDict
- hostNames = self.context.nodeNameList
+ clusterInfoDict = self.context.clusterInfoDict
+ hostIpList = list(self.existingHosts)
+ for host in self.expansionSuccess:
+ hostIpList.append(host)
+ hostNames = []
+ for host in hostIpList:
+ hostNames.append(self.context.backIpNameMap[host])
gucDict = {}
-
for hostName in hostNames:
-
- localeHostInfo = nodeDict[hostName]
+ localeHostInfo = clusterInfoDict[hostName]
index = 1
guc_tempate_str = "source %s; " % self.envFile
for remoteHost in hostNames:
- if(remoteHost == hostName):
+ if remoteHost == hostName:
continue
- remoteHostInfo = nodeDict[remoteHost]
-
+ remoteHostInfo = clusterInfoDict[remoteHost]
guc_repl_template = """\
gs_guc set -D {dn} -c "replconninfo{index}=\
'localhost={localhost} localport={localport} \
@@ -749,86 +796,68 @@ remoteservice={remoteservice}'"
remotePort=remoteHostInfo["localport"],
remoteHeartPort=remoteHostInfo["heartBeatPort"],
remoteservice=remoteHostInfo["localservice"])
-
guc_tempate_str += guc_repl_template
-
index += 1
- guc_mode_type = """
- gs_guc set -D {dn} -c 'remote_read_mode=off';
- gs_guc set -D {dn} -c 'replication_type=1';
- """.format(dn=localeHostInfo["dataNode"])
- guc_tempate_str += guc_mode_type
-
gucDict[hostName] = guc_tempate_str
return gucDict
def checkLocalModeOnStandbyHosts(self):
"""
expansion the installed standby node. check standby database.
- 1. if the database is normal
- 2. if the databases version are same before existing and new
+ 1. if the database is installed correctly
+ 2. if the databases version are same before existing and new
"""
standbyHosts = self.context.newHostList
- envfile = self.envFile
-
- self.logger.log("Checking the database with locale mode.")
+ envFile = self.envFile
+ for host in standbyHosts:
+ self.expansionSuccess[host] = True
+ self.logger.log("Checking if the database is installed correctly with local mode.")
+ getversioncmd = "source %s;gaussdb --version" % envFile
+ primaryHostName = self.getPrimaryHostName()
+ sshPrimary = SshTool([primaryHostName])
+ resultMap, outputCollect = sshPrimary.getSshStatusOutput(
+ getversioncmd, [], envFile)
+ if resultMap[primaryHostName] != DefaultValue.SUCCESS:
+ GaussLog.exitWithError("Fail to check the version of primary.")
+ ipPattern = re.compile("\[.*\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):")
+ versionPattern = re.compile("gaussdb \((.*)\) .*")
+ primaryVersion = versionPattern.findall(outputCollect)[0]
+ notInstalledHosts = []
+ wrongVersionHosts = []
for host in standbyHosts:
hostName = self.context.backIpNameMap[host]
dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if insType not in (MODE_PRIMARY, MODE_STANDBY, MODE_NORMAL):
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35703"] %
- (hostName, self.user, dataNode, dataNode))
-
- allHostIp = []
- allHostIp.append(self.context.localIp)
- versionDic = {}
-
- for hostip in standbyHosts:
- allHostIp.append(hostip)
- sshTool= SshTool(allHostIp)
- #get version in the nodes
- getversioncmd = "gaussdb --version"
- resultMap, outputCollect = sshTool.getSshStatusOutput(getversioncmd,
- [], envfile)
- self.cleanSshToolFile(sshTool)
- versionLines = outputCollect.splitlines()
- for verline in versionLines:
- if verline[0:9] == '[SUCCESS]':
- ipKey = verline[10:-1]
- continue
- else:
- versionStr = "".join(verline)
- preVersion = versionStr.split(' ')
- versionInfo = preVersion[4]
- versionDic[ipKey] = versionInfo[:-2]
- for hostip in versionDic:
- if hostip == self.context.localIp:
- versionCompare = ""
- versionCompare = versionDic[hostip]
+ sshTool = SshTool([host])
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ getversioncmd, [], envFile)
+ if resultMap[host] != DefaultValue.SUCCESS:
+ self.expansionSuccess[host] = False
+ notInstalledHosts.append(host)
else:
- if versionDic[hostip] == versionCompare:
- continue
- else:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35705"] \
- %(hostip, versionDic[hostip]))
-
- self.logger.log("Successfully checked the database with locale mode.")
+ version = versionPattern.findall(outputCollect)[0]
+ if version != primaryVersion:
+ self.expansionSuccess[host] = False
+ wrongVersionHosts.append(host)
+ if notInstalledHosts:
+ self.logger.log("In local mode, database is not installed "
+ "correctly on these nodes:\n%s" % ", ".join(notInstalledHosts))
+ if wrongVersionHosts:
+ self.logger.log("In local mode, the database version is not same "
+ "with primary on these nodes:\n%s" % ", ".join(wrongVersionHosts))
+ self.logger.log("End to check the database with locale mode.")
def preInstall(self):
"""
preinstall on new hosts.
"""
- self.logger.log("Start to preinstall database on the new \
-standby nodes.")
+ self.logger.log("Start to preinstall database on new nodes.")
self.sendSoftToHosts()
self.generateAndSendXmlFile()
self.preInstallOnHosts()
- self.logger.log("Successfully preinstall database on the new \
-standby nodes.")
-
+ self.logger.log("End to preinstall database on new nodes.\n")
+ if self._isAllFailed():
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "preinstall")
def clearTmpFile(self):
"""
@@ -838,16 +867,15 @@ standby nodes.")
clearCmd = "if [ -d '%s' ];then rm -rf %s;fi" % \
(self.tempFileDir, self.tempFileDir)
hostNames = self.context.nodeNameList
- for host in hostNames:
- try:
- sshTool = SshTool(hostNames)
- result, output = sshTool.getSshStatusOutput(clearCmd,
- hostNames, self.envFile)
- self.logger.debug(output)
- self.cleanSshToolFile(sshTool)
- except Exception as e:
- self.logger.debug(str(e))
- self.cleanSshToolFile(sshTool)
+ try:
+ sshTool = SshTool(hostNames)
+ result, output = sshTool.getSshStatusOutput(clearCmd,
+ hostNames, self.envFile)
+ self.logger.debug(output)
+ self.cleanSshToolFile(sshTool)
+ except Exception as e:
+ self.logger.debug(str(e))
+ self.cleanSshToolFile(sshTool)
def cleanSshToolFile(self, sshTool):
@@ -874,11 +902,16 @@ standby nodes.")
self.logger.debug("Start to check cluster status.\n")
curHostName = socket.gethostname()
- command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
- (self.user, self.envFile)
+ command = ""
+ if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"):
+ command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
+ (self.user, self.envFile)
+ else:
+ command = "su - %s -c 'source /etc/profile;source /home/%s/.bashrc;"\
+ "gs_om -t status --detail'" % (self.user, self.user)
sshTool = SshTool([curHostName])
- resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [curHostName], self.envFile)
+ resultMap, outputCollect = sshTool.getSshStatusOutput(command,
+ [curHostName], self.envFile)
if outputCollect.find("Primary Normal") == -1:
GaussLog.exitWithError("Unable to query current cluster status. " + \
"Please import environment variables or " +\
@@ -886,42 +919,47 @@ standby nodes.")
self.logger.debug("The primary database is normal.\n")
+
+ def _adjustOrderOfNewHostList(self):
+ """
+ Adjust the order of hostlist so that
+ standby comes first and cascade standby comes last
+ """
+ newHostList = self.context.newHostList
+ newHostCasRoleMap = self.context.newHostCasRoleMap
+ i, j = 0, len(newHostList) - 1
+ while i < j:
+ while i < j and newHostCasRoleMap[newHostList[i]] == "off":
+ i += 1
+ while i < j and newHostCasRoleMap[newHostList[j]] == "on":
+ j -= 1
+ newHostList[i], newHostList[j] = newHostList[j], newHostList[i]
+ i += 1
+ j -= 1
+
def validNodeInStandbyList(self):
"""
check if the node has been installed in the cluster.
"""
- self.logger.debug("Start to check if the nodes in standby list\n")
-
- curHostName = socket.gethostname()
- command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
- (self.user, self.envFile)
- sshTool = SshTool([curHostName])
- resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [curHostName], self.envFile)
- self.logger.debug(outputCollect)
-
- newHosts = self.context.newHostList
- standbyHosts = []
- existHosts = []
- while len(newHosts) > 0:
- hostIp = newHosts.pop()
- nodeName = self.context.backIpNameMap[hostIp]
- nodeInfo = self.context.clusterInfoDict[nodeName]
- dataNode = nodeInfo["dataNode"]
- exist_reg = r"(.*)%s[\s]*%s(.*)" % (nodeName, hostIp)
- if not re.search(exist_reg, outputCollect):
- standbyHosts.append(hostIp)
- else:
- existHosts.append(hostIp)
- self.context.newHostList = standbyHosts
- if len(existHosts) > 0:
- self.logger.log("The nodes [%s] are already in the cluster. Skip expand these nodes." \
- % ",".join(existHosts))
- self.cleanSshToolFile(sshTool)
- if len(standbyHosts) == 0:
+ self.logger.debug("Start to check if the nodes in standby list.")
+ self.getExistingHosts()
+ newHostList = self.context.newHostList
+ existedNewHosts = \
+ [host for host in newHostList if host in self.existingHosts]
+ if existedNewHosts:
+ newHostList = \
+ [host for host in newHostList if host not in existedNewHosts]
+ self.context.newHostList = newHostList
+ self.expansionSuccess = {}
+ for host in newHostList:
+ self.expansionSuccess[host] = False
+ self.logger.log("These nodes [%s] are already in the cluster. "
+ "Skip expand these nodes." % ",".join(existedNewHosts))
+ if len(newHostList) == 0:
self.logger.log("There is no node can be expanded.")
sys.exit(0)
-
+ self._adjustOrderOfNewHostList()
+
def checkXmlFileAccessToUser(self):
"""
Check if the xml config file has readable access to user.
@@ -1020,19 +1058,63 @@ standby nodes.")
self.changeUser()
if not self.context.standbyLocalMode:
- self.logger.log("\nStart to install database on the new \
-standby nodes.")
+ self.logger.log("Start to install database on new nodes.")
self.installDatabaseOnHosts()
else:
- self.logger.log("\nStandby nodes is installed with locale mode.")
self.checkLocalModeOnStandbyHosts()
- self.logger.log("\nDatabase on standby nodes installed finished. \
-Start to establish the primary-standby relationship.")
+ self.logger.log("Database on standby nodes installed finished.\n")
+ self.logger.log("Start to establish the relationship.")
self.buildStandbyRelation()
# process success
pvalue.value = 1
+ def rollback(self):
+ """
+ rollback all hosts' replconninfo about failed hosts
+ """
+ existingHosts = self.existingHosts
+ failedHosts = []
+ for host in self.expansionSuccess:
+ if self.expansionSuccess[host]:
+ existingHosts.append(host)
+ else:
+ failedHosts.append(host)
+ clusterInfoDict = self.context.clusterInfoDict
+ for failedHost in failedHosts:
+ self.logger.debug("Start to rollback replconninfo about %s" % failedHost)
+ for host in existingHosts:
+ hostName = self.context.backIpNameMap[host]
+ dataNode = clusterInfoDict[hostName]["dataNode"]
+ confFile = os.path.join(dataNode, "postgresql.conf")
+ rollbackReplconninfoCmd = "sed -i '/remotehost=%s/s/^/#&/' %s" \
+ % (failedHost, confFile)
+ self.logger.debug("[%s] rollbackReplconninfoCmd:%s" % (host,
+ rollbackReplconninfoCmd))
+ sshTool = SshTool([host])
+ (statusMap, output) = sshTool.getSshStatusOutput(rollbackReplconninfoCmd, [host])
+ pg_hbaFile = os.path.join(dataNode, "pg_hba.conf")
+ rollbackPg_hbaCmd = "sed -i '/%s/s/^/#&/' %s" \
+ % (failedHost, pg_hbaFile)
+ self.logger.debug("[%s] rollbackPg_hbaCmd:%s" % (host,
+ rollbackPg_hbaCmd))
+ (statusMap, output) = sshTool.getSshStatusOutput(rollbackPg_hbaCmd, [host])
+ reloadGUCCommand = "source %s ; gs_ctl reload -D %s " % \
+ (self.envFile, dataNode)
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ reloadGUCCommand, [host], self.envFile)
+ self.logger.debug(outputCollect)
+ self.cleanSshToolFile(sshTool)
+
+ def _isAllFailed(self):
+ """
+ check whether all new hosts preinstall/install/build failed
+ """
+ for host in self.expansionSuccess:
+ if self.expansionSuccess[host]:
+ return False
+ return True
+
def run(self):
"""
start expansion
@@ -1043,8 +1125,7 @@ Start to establish the primary-standby relationship.")
self.preInstall()
self.installAndExpansion()
-
- self.logger.log("\nSuccess to expansion standby nodes.")
+ self.logger.log("Expansion Finish.")
class GsCtlCommon:
@@ -1103,6 +1184,7 @@ class GsCtlCommon:
self.logger.debug(host)
self.logger.debug(outputCollect)
self.cleanSshToolTmpFile(sshTool)
+ return resultMap, outputCollect
def buildInstance(self, host, datanode, mode, env):
command = "source %s ; gs_ctl build -D %s -M %s" % (env, datanode, mode)
@@ -1151,7 +1233,4 @@ class GsCtlCommon:
try:
sshTool.clenSshResultFiles()
except Exception as e:
- self.logger.debug(str(e))
-
-
-
+ self.logger.debug(str(e))
\ No newline at end of file
diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py
index f4a81e87ca3ea32ad2539e2da08d968fe0e90c9d..44ae7dbc14009b95c808766d6ee73770e0d247d5 100644
--- a/script/impl/om/OLAP/OmImplOLAP.py
+++ b/script/impl/om/OLAP/OmImplOLAP.py
@@ -223,16 +223,20 @@ class OmImplOLAP(OmImpl):
self.context.g_opts.security_mode)
if self.dataDir != "":
cmd += " -D %s" % self.dataDir
- (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList)
+ failedOutput = ''
for nodeName in hostList:
+ (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, [nodeName])
if statusMap[nodeName] != 'Success':
- raise Exception(
- ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, output))
- if re.search("another server might be running", output):
- self.logger.log(output)
- if re.search("] WARNING:", output):
- tmp = '\n'.join(re.findall(".*] WARNING:.*", output))
- self.logger.log(output[0:output.find(":")] + '\n' + tmp)
+ failedOutput += output
+ elif re.search("another server might be running", output):
+ self.logger.log(output)
+ elif re.search("] WARNING:", output):
+ tmp = '\n'.join(re.findall(".*] WARNING:.*", output))
+ self.logger.log(output[0:output.find(":")] + '\n' + tmp)
+ if len(failedOutput):
+ self.logger.log("=========================================")
+ raise Exception(
+ ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, failedOutput))
if startType == "cluster":
starttime = time.time()
cluster_state = ""
diff --git a/script/impl/om/OmImpl.py b/script/impl/om/OmImpl.py
index ddc6e4a2316c0c9e8b4017f6a57f02b75c2c2ffa..4dcaa744cf44f18209d3588acce7c3b045ad5789 100644
--- a/script/impl/om/OmImpl.py
+++ b/script/impl/om/OmImpl.py
@@ -199,6 +199,8 @@ class OmImpl:
cmd = queryCmd()
if (self.context.g_opts.outFile != ""):
cmd.outputFile = self.context.g_opts.outFile
+ else:
+ cmd.outputFile = self.logger.logFile
if (self.context.g_opts.show_detail):
if (
self.context.clusterInfo.clusterType
diff --git a/script/impl/upgrade/UpgradeConst.py b/script/impl/upgrade/UpgradeConst.py
index 680063e39e2ba0b0c7e229459ef1bf51caba289a..0280a1994fc4ba2862fd8c790273a96bf6928748 100644
--- a/script/impl/upgrade/UpgradeConst.py
+++ b/script/impl/upgrade/UpgradeConst.py
@@ -124,7 +124,9 @@ BACKUP_DIR_LIST = ['global', 'pg_clog', 'pg_xlog', 'pg_multixact',
BACKUP_DIR_LIST_BASE = ['global', 'pg_clog', 'pg_csnlog']
BACKUP_DIR_LIST_64BIT_XID = ['pg_multixact', 'pg_replslot', 'pg_notify',
'pg_subtrans', 'pg_twophase']
-
+VALUE_OFF = ["off", "false", "0", "no"]
+VALUE_ON = ["on", "true", "1", "yes"]
+DN_GUC = ["upgrade_mode", "enable_stream_replication"]
FIRST_GREY_UPGRADE_NUM = 92
UPGRADE_PRECOMMIT_NUM = 0.001
@@ -156,3 +158,5 @@ COMBIN_NUM = 30
ON_INPLACE_UPGRADE = "IsInplaceUpgrade"
MAX_APP_SIZE = 2000
UPGRADE_VERSION_64bit_xid = 91.208
+ENABLE_STREAM_REPLICATION_VERSION = "92.149"
+ENABLE_STREAM_REPLICATION_NAME = "enable_stream_replication"
diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py
index 60d0eac9afad5bb37a04a2e8df2c46b7ad329c34..2a9f918f578f5410a0c9aada665564a7cdeb98d5 100644
--- a/script/impl/upgrade/UpgradeImpl.py
+++ b/script/impl/upgrade/UpgradeImpl.py
@@ -1067,7 +1067,7 @@ class UpgradeImpl:
self.upgradeAgain()
except Exception as e:
errmsg = ErrorCode.GAUSS_529["GAUSS_52934"] + \
- "You can use -h to upgrade or manually rollback."
+ "You can use --grey to upgrade or manually rollback."
self.context.logger.log(errmsg + str(e))
self.exitWithRetCode(self.context.action, False)
else:
@@ -1076,7 +1076,7 @@ class UpgradeImpl:
def upgradeAgain(self):
try:
- self.context.logger.log(
+ self.context.logger.debug(
"From this step, you can use -h to upgrade again if failed.")
# we have guarantee specified nodes have same step,
# so we only need to get one node step
@@ -1108,8 +1108,9 @@ class UpgradeImpl:
self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG)
except Exception as e:
- self.context.logger.log("Failed to upgrade, can use -h to "
- "upgrade again. Error: %s" % str(e))
+ self.context.logger.log("Failed to upgrade, can use --grey to "
+ "upgrade again after rollback. Error: "
+ "%s" % str(e))
self.context.logger.debug(traceback.format_exc())
self.exitWithRetCode(self.context.action, False, str(e))
self.context.logger.log(
@@ -1407,44 +1408,8 @@ class UpgradeImpl:
# when number and node names is empty
self.context.logger.debug("Choose the nodes to be upgraded.")
self.setClusterDetailInfo()
-
- if len(self.context.nodeNames) != 0 :
- self.context.logger.log(
- "Upgrade nodes %s." % self.context.nodeNames)
- greyNodeNames = self.getUpgradedNodeNames()
- checkH_nodes = \
- [val for val in greyNodeNames if val in self.context.nodeNames]
- if len(checkH_nodes) > 0:
- raise Exception("The nodes %s have been upgraded" %
- checkH_nodes)
- # confirm the nodesNum in checkParameter, 1 or specified by -g
- elif self.context.upgrade_remain:
- greyNodeNames = self.getUpgradedNodeNames()
- otherNodeNames = [
- i for i in self.context.clusterNodes if i not in greyNodeNames]
- self.context.nodeNames = otherNodeNames
- self.context.logger.debug(
- "Upgrade remain nodes %s." % self.context.nodeNames)
- # specify the node num, try to find matched combination
- else:
- nodeTotalNum = len(self.context.clusterNodes)
- if len(self.context.clusterNodes) == 1:
- self.context.nodeNames.append(
- self.context.clusterInfo.dbNodes[0].name)
- self.context.logger.log(
- "Upgrade one node '%s'." % self.context.nodeNames[0])
- # SinglePrimaryMultiStandbyCluster / MasterStandbyCluster with
- # more than 1 node
- elif self.context.nodesNum == nodeTotalNum:
- self.context.nodeNames = self.context.clusterNodes
- self.context.logger.log("Upgrade all nodes.")
- elif self.context.nodesNum > nodeTotalNum:
- raise Exception(ErrorCode.GAUSS_529["GAUSS_52906"])
- else:
- self.context.nodeNames = self.findOneMatchedCombin(
- self.context.clusterNodes)
- self.context.logger.log(
- "Upgrade nodes %s." % self.context.nodeNames)
+ self.context.nodeNames = self.context.clusterNodes
+ self.context.logger.log("Upgrade all nodes.")
def getUpgradedNodeNames(self, step=GreyUpgradeStep.STEP_INIT_STATUS):
"""
@@ -1695,6 +1660,21 @@ class UpgradeImpl:
# Normal and the database could be connected
# if not, exit.
self.startCluster()
+
+ # uninstall kerberos if has already installed
+ pghost_path = DefaultValue.getEnvironmentParameterValue(
+ 'PGHOST', self.context.user)
+ kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
+ if os.path.exists(kerberosflagfile):
+ self.stopCluster()
+ self.context.logger.log("Starting uninstall Kerberos.",
+ "addStep")
+ cmd = "source %s && " % self.context.userProfile
+ cmd += "%s -m uninstall -U %s" % (OMCommand.getLocalScript(
+ "Local_Kerberos"), self.context.user)
+ self.context.sshTool.executeCommand(cmd, "")
+ self.context.logger.log("Successfully uninstall Kerberos.")
+ self.startCluster()
if self.unSetClusterReadOnlyMode() != 0:
raise Exception("NOTICE: "
+ ErrorCode.GAUSS_529["GAUSS_52907"])
@@ -1902,9 +1882,43 @@ class UpgradeImpl:
self.stopCluster()
self.startCluster()
+ # install Kerberos
+ self.install_kerberos()
self.context.logger.log("Commit binary upgrade succeeded.")
self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True)
+ def install_kerberos(self):
+ """
+ install kerberos after upgrade
+ :return:NA
+ """
+ pghost_path = DefaultValue.getEnvironmentParameterValue(
+ 'PGHOST', self.context.user)
+ kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
+ if os.path.exists(kerberosflagfile):
+ # install kerberos
+ cmd = "source %s &&" % self.context.userProfile
+ cmd += "gs_om -t stop && "
+ cmd += "%s -m install -U %s --krb-server" % (
+ OMCommand.getLocalScript("Local_Kerberos"),
+ self.context.user)
+ (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5)
+ if status != 0:
+ raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
+ "Command:%s. Error:\n%s" % (cmd, output))
+ cmd = "source %s && " % self.context.userProfile
+ cmd += "%s -m install -U %s --krb-client " % (
+ OMCommand.getLocalScript("Local_Kerberos"), self.context.user)
+ self.context.sshTool.executeCommand(
+ cmd, "", hostList=self.context.clusterNodes)
+ self.context.logger.log("Successfully install Kerberos.")
+ cmd = "source %s && gs_om -t start" % self.context.userProfile
+ (status, output) = subprocess.getstatusoutput(cmd)
+ if status != 0 and not self.context.ignoreInstance:
+ raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
+ "Command:%s. Error:\n%s" % (cmd, output))
+ os.remove(kerberosflagfile)
+
def refresh_dynamic_config_file(self):
"""
refresh dynamic config file
@@ -3613,6 +3627,8 @@ class UpgradeImpl:
ErrorCode.GAUSS_529["GAUSS_52907"])
self.cleanBinaryUpgradeBakFiles(True)
self.cleanInstallPath(Const.NEW)
+ # install kerberos
+ self.install_kerberos()
except Exception as e:
self.context.logger.error(str(e))
self.context.logger.log("Rollback failed.")
@@ -4138,6 +4154,30 @@ class UpgradeImpl:
else:
raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' +
" Value: %s" % self.context.action)
+
+ # judgment has installed kerberos before action_inplace_upgrade
+ self.context.logger.debug(
+ "judgment has installed kerberos before action_inplace_upgrade")
+ xmlfile = os.path.join(os.path.dirname(self.context.userProfile),
+ DefaultValue.FI_KRB_XML)
+ if os.path.exists(xmlfile) and \
+ self.context.action == Const.ACTION_AUTO_UPGRADE \
+ and self.context.is_grey_upgrade:
+ raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % "kerberos")
+ if os.path.exists(xmlfile) and self.context.is_inplace_upgrade:
+ pghost_path = DefaultValue.getEnvironmentParameterValue(
+ 'PGHOST', self.context.user)
+ destfile = "%s/krb5.conf" % os.path.dirname(
+ self.context.userProfile)
+ kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
+ cmd = "cp -rf %s %s " % (destfile, kerberosflagfile)
+ (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5)
+ if status != 0:
+ raise Exception(
+ ErrorCode.GAUSS_502["GAUSS_50206"] % kerberosflagfile
+ + " Error: \n%s" % output)
+ self.context.logger.debug(
+ "Successful back up kerberos config file.")
except Exception as e:
self.context.logger.debug(traceback.format_exc())
self.exitWithRetCode(self.context.action, False, str(e))
@@ -4450,6 +4490,11 @@ class UpgradeImpl:
self.context.logger.log("Failed to check upgrade environment.",
"constant")
raise Exception(str(e))
+ if not self.context.forceRollback:
+ if self.context.oldClusterNumber >= \
+ Const.ENABLE_STREAM_REPLICATION_VERSION:
+ self.check_gucval_is_inval_given(
+ Const.ENABLE_STREAM_REPLICATION_NAME, Const.VALUE_ON)
try:
if self.context.action == Const.ACTION_INPLACE_UPGRADE:
self.context.logger.log(
@@ -4466,6 +4511,19 @@ class UpgradeImpl:
self.context.logger.log(
"Successfully checked upgrade environment.", "constant")
+ def check_gucval_is_inval_given(self, guc_name, val_list):
+ """
+ Checks whether a given parameter is a given value list in a
+ given instance list.
+ """
+ self.context.logger.debug("checks whether the parameter:{0} is "
+ "the value:{1}.".format(guc_name, val_list))
+ guc_str = "{0}:{1}".format(guc_name, ",".join(val_list))
+ self.checkParam(guc_str)
+ self.context.logger.debug("Success to check the parameter:{0} value "
+ "is in the value:{1}.".format(guc_name,
+ val_list))
+
def checkDifferentVersion(self):
"""
if the cluster has only one version. no need to check
@@ -4509,6 +4567,8 @@ class UpgradeImpl:
we can upgrade again
:return:
"""
+ if self.context.is_grey_upgrade:
+ self.check_option_grey()
if len(self.context.nodeNames) != 0:
self.checkOptionH()
elif self.context.upgrade_remain:
@@ -4516,6 +4576,24 @@ class UpgradeImpl:
else:
self.checkOptionG()
+ def check_option_grey(self):
+ """
+ if nodes have been upgraded, no need to use --grey to upgrade again
+ :return:
+ """
+ stepFile = os.path.join(
+ self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE)
+ if not os.path.isfile(stepFile):
+ self.context.logger.debug(
+ "File %s does not exists. No need to check." %
+ Const.GREY_UPGRADE_STEP_FILE)
+ return
+ grey_node_names = self.getUpgradedNodeNames()
+ if grey_node_names:
+ self.context.logger.log(
+ "All nodes have been upgrade, no need to upgrade again.")
+ self.exitWithRetCode(self.action, True)
+
def checkOptionH(self):
self.checkNodeNames()
stepFile = os.path.join(
diff --git a/script/local/UpgradeUtility.py b/script/local/UpgradeUtility.py
index f8f242cecb8df3ff9044a06749960c7e74a56ee5..5d610254eac683d73d926e27b3379acc9d8dbcd0 100644
--- a/script/local/UpgradeUtility.py
+++ b/script/local/UpgradeUtility.py
@@ -1813,12 +1813,11 @@ def checkGucValue():
"""
key = g_opts.gucStr.split(':')[0].strip()
value = g_opts.gucStr.split(':')[1].strip()
- if key == "upgrade_from":
- instances = g_dbNode.cmagents
- fileName = "cm_agent.conf"
- elif key == "upgrade_mode":
- #instances = g_dbNode.coordinators
- #instances.extend(g_dbNode.datanodes)
+ if value in const.VALUE_OFF:
+ value = const.VALUE_OFF
+ if value in const.VALUE_ON:
+ value = const.VALUE_ON
+ if key in const.DN_GUC:
instances = g_dbNode.datanodes
fileName = "postgresql.conf"
else:
@@ -1849,7 +1848,7 @@ def checkGucValue():
realValue = realValue.split('#')[0].strip()
g_logger.debug("[key:%s]: Realvalue %s, ExpectValue %s" % (
key, str(realValue), str(value)))
- if str(value) != str(realValue):
+ if str(realValue) not in str(value):
raise Exception(
ErrorCode.GAUSS_521["GAUSS_52102"] % key
+ " Real value %s, expect value %s"
diff --git a/simpleInstall/one_master_one_slave.sh b/simpleInstall/one_master_one_slave.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8c2b416182729f0c58aeae16b0280730c6c79645
--- /dev/null
+++ b/simpleInstall/one_master_one_slave.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+
+if [ `id -u` -ne 0 ];then
+ echo "only a user with the root permission can run this script."
+ exit 1
+fi
+
+declare -r SCRIPT_PATH=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+declare -r SCRIPT_NAME=$0
+echo "SCRIPT_PATH : ${SCRIPT_PATH}"
+declare PACKAGE_PATH=`dirname ${SCRIPT_PATH}`
+declare USER_NAME=""
+declare HOST_IPS=""
+declare HOST_IPS_ARR=""
+declare HOST_IPS_ARRAY=""
+declare HOST_NAMES=""
+declare HOST_NAMES_ARRAY=
+declare USER_GROUP="dbgrp"
+declare PORT="20050"
+declare XML_DIR=${SCRIPT_PATH}/one_master_one_slave_template.xml
+declare INSTALL_PATH=""
+declare SYSTEM_ARCH=""
+declare SYSTEM_NAME=""
+declare PASSWORD=""
+
+function print_help()
+{
+ echo "Usage: $0 [OPTION]
+ -?|--help show help information
+ -U|--user_name cluster user
+ -h|--host_ip intranet ip address of the host in the backend storage network(host1,host2)
+ -G|--user_grp group of the cluster user(default value dbgrp)
+ -p|--port database server port(default value 20050)
+ -D|--install_location installation directory of the openGauss program(default value ~/cluser)
+ -X|--xml_location cluster xml configuration file path
+ "
+}
+
+function die()
+{
+ echo -e "\033[31merror:\033[0m $1"
+ exit 1
+}
+
+function warn()
+{
+ echo -e "\033[33mwarnning:\033[0m $1"
+ sleep 2s
+}
+
+function info()
+{
+ echo -e "\033[32minfo:\033[0m $1"
+}
+
+function expect_ssh()
+{
+ /usr/bin/expect <<-EOF
+ set timeout -1
+ spawn $1
+ expect {
+ "*yes/no" { send "yes\r"; exp_continue }
+ "*assword:" { send "$2\r"; exp_continue }
+ "*$3*" { exit }
+ }
+ expect eof
+EOF
+ if [ $? == 0 ]
+ then
+ return 0
+ else
+ return 1
+ fi
+}
+
+function expect_hostname()
+{
+ expect < expectFile
+ set timeout -1
+ spawn $1
+ expect {
+ "*yes/no" { send "yes\r"; exp_continue }
+ "*assword:" {send "$2\r"; exp_continue}
+ }
+EOF
+ if [ $? == 0 ]
+ then
+ return 0
+ else
+ return 1
+ fi
+}
+
+
+function main()
+{
+ while [ $# -gt 0 ]
+ do
+ case "$1" in
+ -h|--help)
+ print_help
+ exit 1
+ ;;
+ -U|--user_name)
+ if [ "$2"X = X ]
+ then
+ die "no cluster user values"
+ fi
+ USER_NAME=$2
+ shift 2
+ ;;
+ -G|--user_grp)
+ if [ "$2"X = X ]
+ then
+ die "no group values"
+ fi
+ USER_GROUP=$2
+ shift 2
+ ;;
+ -H|--host_ip)
+ if [ "$2"X = X ]
+ then
+ die "no intranet ip address of the host values"
+ fi
+ HOST_IPS=$2
+ shift 2
+ HOST_IPS_ARR=${HOST_IPS//,/ }
+ HOST_IPS_ARRAY=(${HOST_IPS_ARR})
+ if [ ${#HOST_IPS_ARRAY[*]} != 2 ]
+ then
+ die "the current script can be installed only on two nodes, one active node and one standby node"
+ fi
+ ;;
+ -X|--xml_location)
+ if [ "$2"X = X ]
+ then
+ die "no cluster xml configuration file values"
+ fi
+ XML_DIR=$2
+ shift 2
+ ;;
+ -D|--install_location)
+ if [ "$2"X = X ]
+ then
+ die "no installation directory of the openGauss program values"
+ fi
+ INSTALL_PATH=$2
+ shift 2
+ ;;
+ -p|--port)
+ if [ "$2"X = X ]
+ then
+ die "the port number cannot be empty."
+ fi
+ PORT=$2
+ shift 2
+ ;;
+ -P|--password)
+ if [ "$2"X = X ]
+ then
+ die "the password cannot be empty."
+ fi
+ PASSWORD=$2
+ shift 2
+ ;;
+ *)
+ echo "Internal Error: option processing error" 1>&2
+ echo "please input right paramtenter, the following command may help you"
+ echo "sh active_standby_nodes_install.sh --help or sh active_standby_nodes_install.sh -h"
+ exit 1
+ esac
+ done
+
+ if [ "${USER_NAME}"X == X ]
+ then
+ die "no cluster user values"
+ fi
+
+ if [ -z ${INSTALL_PATH} ]
+ then
+ INSTALL_PATH="/home/${USER_NAME}"
+ fi
+
+ if [ "${PASSWORD}"X == X ]
+ then
+ echo "please enter the password of the root user&the password of a common user(the two passwords must be the same)"
+ echo -n "password:"
+ read PASSWORD
+ while [ -z ${PASSWORD} ]
+ do
+ echo "the value cannot be null, please enter the password again"
+ echo -n "password:"
+ read PASSWORD
+ done
+ fi
+
+ if [ "${HOST_IPS}"X == X ]
+ then
+ die "no intranet ip address values"
+ else
+ len=${#HOST_IPS_ARRAY[*]}
+ index=0
+ while [ ${index} -lt ${len} ]
+ do
+ expect_hostname "ssh ${HOST_IPS_ARRAY[${index}]} hostname" ${PASSWORD}
+ if [ $? == 0 ]
+ then
+ expectResult=`tail -1 expectFile|head -1| tr -d "\r"| tr -d "\n"`
+ if [ -z ${expectResult} ]
+ then
+ die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}."
+ fi
+ HOST_NAMES_ARRAY[${index}]=${expectResult}
+ else
+ die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}."
+ fi
+ index=$[ ${index} + 1 ]
+ done
+ fi
+ rm -rf expectFile
+ HOST_NAMES="${HOST_NAMES_ARRAY[0]},${HOST_NAMES_ARRAY[1]}"
+ SYSTEM_ARCH=`uname -p`
+ SYSTEM_NAME=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'`
+ if [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "aarch64" ]
+ then
+ info "the current system environment is openEuler + arm"
+ elif [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is openEuler + x86"
+ elif [ "${SYSTEM_NAME}" == "centos" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is CentOS + x86"
+ elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is redhat + x86"
+ elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "aarch64" ]
+ then
+ info "the current system environment is redhat + arm"
+ elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is kylin + x86"
+ elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "aarch64" ]
+ then
+ info "the current system environment is kylin + arm"
+ else
+ warn "the current system environment is ${SYSTEM_NAME} + ${SYSTEM_ARCH}, \
+ you are advised to use the centos, openEuler, redhat, or kylin system. because OpenGauss may not adapt to the current system."
+ fi
+ info "installation parameter verification completed."
+}
+
+function checks()
+{
+ system_arch=`uname -p`
+ system_name=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'`
+ if [ ${system_arch} != "$8" -o ${system_name} != "$9" ]
+ then
+ warn "inconsistency between the system and the execution machine"
+ fi
+
+ egrep "^$3" /etc/group >& /dev/null
+ if [ $? != 0 ];then
+ groupadd $3
+ fi
+ egrep "^$4" /etc/passwd >& /dev/null
+ if [ $? != 0 ];then
+ useradd -g $3 -d /home/$4 -m -s /bin/bash $4 2>/dev/null
+ if [ $? != 0 ]
+ then
+ die "failed to create the user on the node $2."
+ fi
+ expect_ssh "passwd $4" "$5" "passwd:"
+ if [ $? != 0 ]
+ then
+ die "an error occurred when setting the user password on the node $2"
+ fi
+ fi
+
+ sed -i "s/SELINUX=.*/SELINUX=disabled/g" /etc/selinux/config && firewall-cmd --permanent --add-port="$6/tcp" && firewall-cmd --reload
+ if [ $? != 0 ]
+ then
+ warn "some errors occur during system environment setting on host $2"
+ fi
+
+ INSTALL_PATH=$7
+ if [ ! -e ${INSTALL_PATH} ]
+ then
+ mkdir -p ${INSTALL_PATH}
+ else
+ rm -rf ${INSTALL_PATH}/*
+ fi
+ chmod -R 755 ${INSTALL_PATH}/
+ chown -R $4:$3 ${INSTALL_PATH}/
+ if [ -f /${10} ]
+ then
+ mv /${10} $(eval echo ~$4)/
+ fi
+ echo "check end"
+}
+
+function pre_checks()
+{
+ if [ ${#HOST_IPS_ARRAY[*]} == 0 ]
+ then
+ die "the number of internal IP addresses of the host is incorrect."
+ fi
+ localips=`/sbin/ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"`
+ for ip in ${HOST_IPS_ARRAY[@]}
+ do
+ info "start to check the installation environment of host ${ip}."
+ sleep 2s
+ # standby node
+ if [[ $localips != *${ip}* ]]
+ then
+ sshcmd="scp ${SCRIPT_PATH}/${SCRIPT_NAME} root@${ip}:/"
+ expect_ssh "${sshcmd}" "${PASSWORD}" "100%"
+ if [ $? != 0 ]
+ then
+ die "an error occurred when copying the script to the target host ${ip}."
+ fi
+ sshcmd="ssh ${ip} \"sh /${SCRIPT_NAME} inner ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME}\""
+ expect_ssh "${sshcmd}" "${PASSWORD}" "check end"
+ if [ $? != 0 ]
+ then
+ die "an error occurred during the pre-installation check on the target host ${ip}."
+ fi
+ else
+ # local
+ checks "" ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME}
+ if [ $? != 0 ]
+ then
+ die "an error occurred during the pre-installation check on the target host ${ip}."
+ fi
+ fi
+ info "succeeded in checking the installation environment of host ${ip}."
+ done
+ return 0
+}
+
+function xmlconfig()
+{
+ info "start to automatically configure the installation file."
+ install_localtion=${INSTALL_PATH//\//\\\/}
+ if [ -e ${XML_DIR} ]
+ then
+ sed 's/@{nodeNames}/'${HOST_NAMES}'/g' ${XML_DIR} |
+ sed 's/@{backIpls}/'${HOST_IPS}'/g' |
+ sed 's/@{clusterName}/'${USER_NAME}'/g' |
+ sed 's/@{port}/'${PORT}'/g' |
+ sed 's/@{installPath}/'${install_localtion}'/g' |
+ sed 's/@{nodeName1}/'${HOST_NAMES_ARRAY[0]}'/g' |
+ sed 's/@{backIp1}/'${HOST_IPS_ARRAY[0]}'/g' |
+ sed 's/@{nodeName2}/'${HOST_NAMES_ARRAY[1]}'/g' |
+ sed 's/@{backIp2}/'${HOST_IPS_ARRAY[1]}'/g' > $(eval echo ~${USER_NAME})/one_master_one_slave.xml
+ else
+ die "cannot find one_master_one_slave_template.xml in ${XML_DIR}"
+ fi
+ cat $(eval echo ~${USER_NAME})/one_master_one_slave.xml
+ info "the installation file is automatically configured"
+ return 0
+}
+
+function install()
+{
+ info "preparing for preinstallation"
+ home_path=$(eval echo ~${USER_NAME})
+ export LD_LIBRARY_PATH="${PACKAGE_PATH}/script/gspylib/clib:"$LD_LIBRARY_PATH
+ sshcmd="python3 "${PACKAGE_PATH}"/script/gs_preinstall -U "${USER_NAME}" \
+ -G "${USER_GROUP}" -X "${home_path}"/one_master_one_slave.xml --sep-env-file="${home_path}"/env_master_slave"
+ info "cmd \"${sshcmd}\""
+ expect_ssh "${sshcmd}" "${PASSWORD}" "Preinstallation succeeded"
+ if [ $? != 0 ]
+ then
+ die "preinstall failed."
+ fi
+ info "preinstallation succeeded."
+ chmod 755 ${home_path}'/one_master_one_slave.xml'
+ chown ${USER_NAME}:${USER_GROUP} ${home_path}'/one_master_one_slave.xml'
+ info "start the installation."
+ su - ${USER_NAME} -c"source ${home_path}/env_master_slave;gs_install -X ${home_path}/one_master_one_slave.xml;gs_om -t status --detail"
+ if [ $? -ne 0 ]
+ then
+ die "install failed."
+ else
+ info "install success."
+ fi
+ exit 0
+}
+
+if [ $1 == "inner" ]
+then
+ checks $@
+else
+ main $@
+ pre_checks
+ xmlconfig
+ install
+fi
+exit 0
+
diff --git a/simpleInstall/one_master_one_slave_template.xml b/simpleInstall/one_master_one_slave_template.xml
new file mode 100644
index 0000000000000000000000000000000000000000..9735758dbb83d57ec2bf81d25307b1e8b0b7c593
--- /dev/null
+++ b/simpleInstall/one_master_one_slave_template.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+