00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 import getopt, os, re, string, sys
00032
00033 KBYTE = 1024
00034
00035 DEFAULT_BLOCKSIZE = 256 * KBYTE
00036
00037 infile = None
00038 outfile = None
00039 title = None
00040 debug = 0
00041
00042 def error(msg):
00043 sys.stderr.write(msg + "\n")
00044 sys.exit(1)
00045
00046 def warn(msg):
00047 sys.stderr.write(msg + "\n")
00048
00049 def usage():
00050 sys.stderr.write("Usage: plot-md5.py <options> infile\n"
00051 + "\t-o <output file>\n"
00052 + "\t-t <title>\n"
00053 )
00054 sys.exit(1)
00055
00056 def commandLine():
00057 global debug, infile, outfile, title
00058
00059 opts, args = getopt.getopt(sys.argv[1:], "do:t:")
00060 for opt, val in opts:
00061 if opt == "-d":
00062 debug = 1
00063 elif opt == "-o":
00064 outfile = val
00065 elif opt == "-t":
00066 title = val
00067
00068 if len(args) == 1:
00069 infile, = args
00070 else:
00071 usage()
00072
00073 def readHashes(path):
00074 hashes = {}
00075 fp = file(path, "r")
00076 for line in fp:
00077 line = line.strip()
00078 id, md5 = line.split()
00079 if not (md5 in hashes):
00080 hashes[md5] = 1
00081 else:
00082 hashes[md5] += 1
00083 fp.close()
00084 return hashes
00085
00086 def byCount((m1, c1), (m2, c2)):
00087 return cmp(c1, c2)
00088
00089 def plot(hashes, outfile, title):
00090 if outfile == None:
00091 outfile = "/dev/null"
00092 persist = 1
00093 output = "x11"
00094 else:
00095 persist = 0
00096 output = "png"
00097
00098 if title == None:
00099 title = infile
00100
00101 if debug:
00102 fp = sys.stdout
00103 elif persist:
00104 fp = os.popen("gnuplot -persist >%s" % outfile, "w")
00105 else:
00106 fp = os.popen("gnuplot >%s" % outfile, "w")
00107 fp.write("set title \"%s\"\n" % title)
00108 fp.write("set nokey\n")
00109 fp.write("set terminal %s\n" % output)
00110 fp.write("set xzeroaxis\n")
00111 fp.write("set xlabel \"MD5 values\"\n")
00112 fp.write("set ylabel \"Blocks covered\"\n")
00113 fp.write("set xrange [0:]\n")
00114 fp.write("plot \"-\" using ($0 + 1):1 with linespoints\n")
00115
00116 items = hashes.items()
00117 items.sort(byCount)
00118 cum = 0
00119 for md5, count in items:
00120 assert count > 0
00121 cum += count
00122 fp.write("%u\n" % cum)
00123
00124
00125 fp.close()
00126
00127 def main():
00128 commandLine()
00129 hashes = readHashes(infile);
00130 plot(hashes, outfile, title)
00131
00132 if __name__ == "__main__": main()