Friday, November 22, 2013

Java Garbage Collection Log File Scraper

'''
 
Created by Deniz Turan
 
Oracle Java (tested on 7), young generation garbage collection activity scraper. 
Extracts following fields from GC log file and save to a csv file.

Count,LogTime,logGCOffsetTime,logGCOffsetTime2, 
YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,     # Young generation
OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime   # Old generation 

 
'''
from subprocess import call
import glob
import os
 
logDir="C:\\temp\\gc\\"
finalResultFileName=logDir+"finalResults.csv"
filterExtension="*.log";
 
def getLogFileList(search_dir):
        files = filter(os.path.isfile, glob.glob(search_dir + filterExtension))
        files.sort(key=lambda x: os.path.getmtime(x))       
        return files
 
def openResultFile():
    print "Creating result file : %s"% (finalResultFileName)
    # remove previous file
    call("rm "+finalResultFileName,shell=True)
    resultFileFD = open( finalResultFileName ,"a")
    ## create header
    resultFileFD.write("Count,LogTime,logGCOffsetTime,logGCOffsetTime2,")
    resultFileFD.write("YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,")
    resultFileFD.write("OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime\n")
    return resultFileFD
       
def closeResultFile(resultFileFD):   
    print "Closing result file "
    resultFileFD.close();
 
def getFieldValue(strVal):
    index=strVal.index("K")
    index2=strVal.index("K", index+1)
    index3=strVal.index("K", index2+1)
    
    part1=strVal[:index]
    part2=strVal[index+3:index2]
    part3=strVal[index2+2:index3]
    return (part1,part2,part3)
 
#####################################################
# Main
#####################################################
if __name__ == '__main__':
    # prepare result file   
    resultFileFD=openResultFile ()
    
    print "Started to process log files"
    logFileList=getLogFileList(logDir)
    count=0
    for f in logFileList:
        print "Processing GC Log file %s"%f
        logFD = open(f)
        line = logFD.readline()
        while (line != "" ):           
            if "ParNew" in line :
                    count=count+1                   
                    fields=line.split(" ")
                    logTime=fields[0]
                    logGCOffsetTime=fields[1]
                    logGCOffsetTime2=fields[3]
                    res=getFieldValue(fields[5])
                    YGPreSize,YGPostSize,YGTotalSize=res
                    YGElapsedTime=fields[6]
                    res=getFieldValue(fields[8])
                    OLDPreSize,OLDPostSize,OLDTotalSize=res
                    OLDElapsedTime=fields[9]
                    print line
                   
                    print "%d %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(count,logTime,logGCOffsetTime,logGCOffsetTime2, \
                                                                   YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,\
                                                                   OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime)
                    # print to file as CSV now
                    resultFileFD.write("%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(count,logTime,logGCOffsetTime,logGCOffsetTime2, \
                                                                   YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,\
                                                                   OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime))                   
                    
            line = logFD.readline()
            continue
        logFD.close();
    closeResultFile(resultFileFD);           
    print "finished processing log files"
    pass

No comments: