Základní soubor se vygeneruje vždy, když dojde k selhání služby u brány. Diagnostické svazky vygenerované v nástroji Orchestrator je vhodné shromáždit co nejdříve po vygenerování základního souboru, aby bylo možné stáhnout základní soubor a předat související protokoly podpoře VMware.
Následuje vzorový skript psaný v Pythonu, který lze použít pro vyhledání nedávných základních souborů:
#! /usr/bin/env python
import subprocess, traceback, os, os.path,glob,datetime,time,sys,re
from pynag.Plugins import PluginHelper,ok,warning,critical,unknown
from subprocess import Popen, PIPE
import time
import os
import commands
import json
helper = PluginHelper()
helper.parse_arguments()
def diag_check():
regex_patern = "^.*\s+Uploading diag-201[0-9]-.*"
re_nat = re.compile(regex_patern)
cmd = 'grep "Uploading diag-201[0-9]" /var/log/mgd.log'
p1 = subprocess.Popen([cmd], stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
stdout_value, stderr_value = p1.communicate()
m = re_nat.search(stdout_value)
if m:
return True
else:
return False
def vco_vcg_version():
with open("/opt/vc/.gateway.info") as data:
d=json.loads((data.read()))
vcg=d["gatewayInfo"]["name"]
#build_number=d["gatewayInfo"]["buildNumber"]
status,output = commands.getstatusoutput("sudo /opt/vc/sbin/gwd -v 2>&1 | grep rev")
if status == 0:
build_number=output.split()[2].rstrip('\n')
vco=d["configuration"]["managementPlane"]["data"]["managementPlaneProxy"]["primary"]
return vcg,build_number,vco
status_file = "/tmp/coredump_status_file"
warning_file = "/tmp/warning_file"
if not os.path.isfile(status_file) and not os.access(status_file, os.R_OK):
os.system("touch /tmp/coredump_status_file")
os.system("chown nagios:nagios /tmp/coredump_status_file")
if not os.path.isfile(warning_file) and not os.access(status_file, os.R_OK):
os.system("touch /tmp/warning_file")
os.system("chown nagios:nagios /tmp/warning_file")
if not os.path.isfile(warning_file) and not os.access(status_file, os.R_OK):
os.system("touch /tmp/crashlist.txt")
os.system("chown nagios:nagios /tmp/crashlist.txt")
command = "cat /tmp/coredump_status_file"
command1 = "cat /tmp/warning_file"
files = ["crashlist.txt","warning_file","coredump_status_file","coredump_message"]
for item in files:
if os.path.isfile("/tmp/"+item):
st=os.stat("/tmp/"+item)
if st.st_uid == 0:
commands.getstatusoutput("sudo chown nagios:nagios /tmp/"+item)
status,output = commands.getstatusoutput(command)
if output == "1":
status_message = ""
os.system("chown nagios:nagios /tmp/coredump_message")
with open("/tmp/coredump_message", "r") as data:
for line in data.readlines():
status_message += line
mtime = os.path.getmtime("/tmp/coredump_status_file")
cur_time = time.time()
if int(cur_time) - int(mtime) >= 300:
os.system('echo -n "0" > /tmp/coredump_status_file')
helper.status(critical)
helper.add_summary(status_message)
helper.exit()
sys.exit(0)
status_message = ""
newcore = 0
try:
crashlistpath = '/tmp/crashlist.txt'
cmd = "stat -c '%Y %n' /velocloud/core/*core.tgz"
if not os.path.isfile(crashlistpath) and not os.access(crashlistpath, os.R_OK):
os.system("find /velocloud/core/ -name *core.tgz > /tmp/crashlist.txt")
with open(crashlistpath, "a+") as f:
oldcrashlist = f.read()
corelist = glob.glob("/velocloud/core/*core.tgz")
corecount = len(corelist)
if corecount > 0 :
for line in corelist:
file_modified = datetime.datetime.fromtimestamp(os.path.getmtime(line))
if datetime.datetime.now() - file_modified > datetime.timedelta(hours=42*24):
os.remove(line)
if not line in oldcrashlist:
newcore +=1
status_message += '\n' + "Core:" +str(newcore) +" " + line.rsplit('/',1)[1] + " "
f.write(line+'\n')
cmd1 = "tar -xvf " + line.rstrip('\n') + " -C /tmp --wildcards --no-anchored '*.txt' "
crash = subprocess.Popen(cmd1, shell=True, stdout=subprocess.PIPE)
crash.wait()
for line1 in crash.stdout:
btcmd = "awk '/^Thread 1 /,/^----/' /tmp/" + line1.rstrip('\n') + " | egrep '^#' | sed 's/ 0x0.* in //' | sed 's/ (.*/ /'"
bt = subprocess.Popen(btcmd, shell=True, stdout=subprocess.PIPE)
status_message += '\n'+ bt.communicate()[0]
else:
helper.status(ok)
status_message = "No Core file"
f.close()
except Exception as e:
traceback.print_exc()
helper.exit(summary="Nagios check could not complete", long_output=str(e), exit_code=unknown, perfdata='')
if corecount and not newcore:
helper.status(ok)
status_message = str(corecount)+ " old core file found in /velocloud/core"
os.system('echo -n "0" > /tmp/coredump_status_file')
elif newcore > 0:
output = vco_vcg_version()
vcg_data = "%s; VCG_Build_Number:%s; VCO:%s\n" %(output)
status_message = vcg_data + str(newcore)+ " New Core\n"+ status_message
with open("/tmp/coredump_message", "w") as data:
data.writelines(status_message)
os.system('echo -n "1" > /tmp/warning_file')
os.system('echo -n "1" > /tmp/coredump_status_file')
helper.status(critical)
helper.add_summary(status_message)
helper.exit()
sys.exit(0)
status,output_warn = commands.getstatusoutput(command1)
if output_warn == "1" :
helper.status(warning)
status_message = "Please generate gateway diag bundle from the VCO if required"
result = diag_check()
if result == False:
if not os.path.isfile("/tmp/coredump_start_time"):
os.system("touch /tmp/coredump_start_time")
os.system("chown nagios:nagios /tmp/coredump_start_time")
start_time = time.time()
with open("/tmp/coredump_start_time", "w") as data:
data.write(str(start_time))
end_time = time.time()
cmd = "cat /tmp/coredump_start_time"
status,start_time = commands.getstatusoutput(cmd)
total_time = end_time - float(start_time)
if total_time > 10800:
result = True
if result == True:
os.system('echo -n "0" > /tmp/warning_file')
os.remove ("/tmp/coredump_start_time")
helper.status(warning)
status_message = "Please generate the diagbundle for the last crash. if it is taken already, please ignore this message"
helper.add_summary(status_message)
helper.exit()