# coding: utf-8 # In[107]: import pandas # In[141]: #Download this file with wget from http://ganglia.lcg.cscs.ch/ganglia/files_cms.html fi = open("/Users/joosep/files_cms.html") lines = fi.readlines() lines = filter(lambda x: "pnfs" in x, lines) fi.close() # In[125]: rows = [] for li in lines: data = map(lambda x: x.strip(), li.strip().split("|")) rows += [[float(data[0])/1024.0/1024.0, int(data[1]), pandas.to_datetime(data[3]), pandas.to_datetime(data[4]), data[5].replace("/pnfs/lcg.cscs.ch/", "")]] # In[126]: table = pandas.DataFrame(rows, columns=["size", "depth", "access", "modify", "path"]) # In[157]: subdirs = map(lambda x: "/data/" in x, table["path"]) depth = table["depth"] == 8 table[subdirs & depth].sort_values(by="size", ascending=False)[:20][["size", "path"]] # In[156]: table[subdirs & depth].sort_values(by="size", ascending=False)[:20]["size"].sum() # In[150]: subdirs = map(lambda x: "/mc/" in x, table["path"]) depth = table["depth"] == 7 table[subdirs & depth].sort_values(by="size", ascending=False)[:10][["size", "path"]] # In[144]: subdirs = map(lambda x: "/user/" in x, table["path"]) depth = table["depth"] == 6 more_than_x = table["size"] > 10.0 table[subdirs & depth & more_than_x].sort_values(by="size", ascending=False)[:20][["size", "path"]] # In[146]: table[subdirs & depth & more_than_x].sort_values(by="size", ascending=False)[:20][["size"]].sum() # In[148]: subdirs = map(lambda x: "user/jpata/tth" in x, table["path"]) depth = table["depth"] == 8 table[subdirs & depth].sort_values(by="size", ascending=False)[:10][["size", "path"]] # In[ ]: