Κάθε φορά που διακόπτεται η λειτουργία μιας υπηρεσίας στην πύλη, δημιουργείται ένα αρχείο πυρήνα. Οι διαγνωστικές δέσμες που δημιουργούνται από το Orchestrator θα πρέπει να ανακτηθούν το συντομότερο δυνατό μετά τη δημιουργία ενός αρχείου πυρήνα, για τη λήψη του αρχείου πυρήνα και για την παροχή των σχετικών αρχείων καταγραφής στην Υποστήριξη του VMware.
Το παρακάτω παράδειγμα απεικονίζει μια δέσμη ενεργειών Python για τον έλεγχο για πρόσφατα αρχεία πυρήνα:
#! /usr/bin/env python import subprocess, traceback, os, os.path,glob,datetime,time,sys,re from pynag.Plugins import PluginHelper,ok,warning,critical,unknown from subprocess import Popen, PIPE import time import os import commands import json helper = PluginHelper() helper.parse_arguments() def diag_check(): regex_patern = "^.*\s+Uploading diag-201[0-9]-.*" re_nat = re.compile(regex_patern) cmd = 'grep "Uploading diag-201[0-9]" /var/log/mgd.log' p1 = subprocess.Popen([cmd], stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) stdout_value, stderr_value = p1.communicate() m = re_nat.search(stdout_value) if m: return True else: return False def vco_vcg_version(): with open("/opt/vc/.gateway.info") as data: d=json.loads((data.read())) vcg=d["gatewayInfo"]["name"] #build_number=d["gatewayInfo"]["buildNumber"] status,output = commands.getstatusoutput("sudo /opt/vc/sbin/gwd -v 2>&1 | grep rev") if status == 0: build_number=output.split()[2].rstrip('\n') vco=d["configuration"]["managementPlane"]["data"]["managementPlaneProxy"]["primary"] return vcg,build_number,vco status_file = "/tmp/coredump_status_file" warning_file = "/tmp/warning_file" if not os.path.isfile(status_file) and not os.access(status_file, os.R_OK): os.system("touch /tmp/coredump_status_file") os.system("chown nagios:nagios /tmp/coredump_status_file") if not os.path.isfile(warning_file) and not os.access(status_file, os.R_OK): os.system("touch /tmp/warning_file") os.system("chown nagios:nagios /tmp/warning_file") if not os.path.isfile(warning_file) and not os.access(status_file, os.R_OK): os.system("touch /tmp/crashlist.txt") os.system("chown nagios:nagios /tmp/crashlist.txt") command = "cat /tmp/coredump_status_file" command1 = "cat /tmp/warning_file" files = ["crashlist.txt","warning_file","coredump_status_file","coredump_message"] for item in files: if os.path.isfile("/tmp/"+item): st=os.stat("/tmp/"+item) if st.st_uid == 0: commands.getstatusoutput("sudo chown nagios:nagios /tmp/"+item) status,output = commands.getstatusoutput(command) if output == "1": status_message = "" os.system("chown nagios:nagios /tmp/coredump_message") with open("/tmp/coredump_message", "r") as data: for line in data.readlines(): status_message += line mtime = os.path.getmtime("/tmp/coredump_status_file") cur_time = time.time() if int(cur_time) - int(mtime) >= 300: os.system('echo -n "0" > /tmp/coredump_status_file') helper.status(critical) helper.add_summary(status_message) helper.exit() sys.exit(0) status_message = "" newcore = 0 try: crashlistpath = '/tmp/crashlist.txt' cmd = "stat -c '%Y %n' /velocloud/core/*core.tgz" if not os.path.isfile(crashlistpath) and not os.access(crashlistpath, os.R_OK): os.system("find /velocloud/core/ -name *core.tgz > /tmp/crashlist.txt") with open(crashlistpath, "a+") as f: oldcrashlist = f.read() corelist = glob.glob("/velocloud/core/*core.tgz") corecount = len(corelist) if corecount > 0 : for line in corelist: file_modified = datetime.datetime.fromtimestamp(os.path.getmtime(line)) if datetime.datetime.now() - file_modified > datetime.timedelta(hours=42*24): os.remove(line) if not line in oldcrashlist: newcore +=1 status_message += '\n' + "Core:" +str(newcore) +" " + line.rsplit('/',1)[1] + " " f.write(line+'\n') cmd1 = "tar -xvf " + line.rstrip('\n') + " -C /tmp --wildcards --no-anchored '*.txt' " crash = subprocess.Popen(cmd1, shell=True, stdout=subprocess.PIPE) crash.wait() for line1 in crash.stdout: btcmd = "awk '/^Thread 1 /,/^----/' /tmp/" + line1.rstrip('\n') + " | egrep '^#' | sed 's/ 0x0.* in //' | sed 's/ (.*/ /'" bt = subprocess.Popen(btcmd, shell=True, stdout=subprocess.PIPE) status_message += '\n'+ bt.communicate()[0] else: helper.status(ok) status_message = "No Core file" f.close() except Exception as e: traceback.print_exc() helper.exit(summary="Nagios check could not complete", long_output=str(e), exit_code=unknown, perfdata='') if corecount and not newcore: helper.status(ok) status_message = str(corecount)+ " old core file found in /velocloud/core" os.system('echo -n "0" > /tmp/coredump_status_file') elif newcore > 0: output = vco_vcg_version() vcg_data = "%s; VCG_Build_Number:%s; VCO:%s\n" %(output) status_message = vcg_data + str(newcore)+ " New Core\n"+ status_message with open("/tmp/coredump_message", "w") as data: data.writelines(status_message) os.system('echo -n "1" > /tmp/warning_file') os.system('echo -n "1" > /tmp/coredump_status_file') helper.status(critical) helper.add_summary(status_message) helper.exit() sys.exit(0) status,output_warn = commands.getstatusoutput(command1) if output_warn == "1" : helper.status(warning) status_message = "Please generate gateway diag bundle from the VCO if required" result = diag_check() if result == False: if not os.path.isfile("/tmp/coredump_start_time"): os.system("touch /tmp/coredump_start_time") os.system("chown nagios:nagios /tmp/coredump_start_time") start_time = time.time() with open("/tmp/coredump_start_time", "w") as data: data.write(str(start_time)) end_time = time.time() cmd = "cat /tmp/coredump_start_time" status,start_time = commands.getstatusoutput(cmd) total_time = end_time - float(start_time) if total_time > 10800: result = True if result == True: os.system('echo -n "0" > /tmp/warning_file') os.remove ("/tmp/coredump_start_time") helper.status(warning) status_message = "Please generate the diagbundle for the last crash. if it is taken already, please ignore this message" helper.add_summary(status_message) helper.exit()