import json
def id_key(val):
return '{}'.format(val)
def to_key(val):
return '{}_{}'.format(val[0], val[1])
def from_key(key):
vals = key.split('_')
return [ int(vals[0]), int(vals[1])]
def remap_list(l):
d = {}
for e in l:
d[e] = [0, 0]
return d
use_paid = False
domain_to_retain = 'http://www.website.com'
fname = './tracker_output2.txt'
f = open(fname)
out_f = open('output.txt', 'w')
#print 'processing file {}'.format(fname)
num_processed_lines = 0
num_skipped = 0
num_errorline = 0
total_imps = [0]*10
total_clicks = [0]*10
for content in f:
num_processed_lines = num_processed_lines + 1
record = json.loads(content)
scores = record['match_scores']
url = record['url']
if ((domain_to_retain) and (not url.startswith(domain_to_retain))):
num_skipped = num_skipped + 1
continue
if num_processed_lines % 1000 == 0:
print 'num_processed_lines={}'.format(num_processed_lines)
if use_paid:
if record['num_paid']>0:
tuples = record['paid']
# print'{}'.format(tuples)
# print'{}'.format(scores)
else:
continue
else:
if record['num_free']>0:
tuples = record['free']
else:
continue
if (isinstance(tuples, list)):
tuples = remap_list(tuples)
num_errorline += 1
for key, val in scores.items():
score_id = id_key(key)
score_val = id_key(val)
for key, val in tuples.items():
val_key = id_key(key)
val_item = to_key(val)
val_click_imp = from_key(val_item)
if score_id == val_key:
# print'{},{},{},{}'.format(val_key,val_click_imp[0],val_click_imp[1],score_val)
score_bucket = int(float(score_val)*10)
total_clicks[score_bucket] += val_click_imp[0]
total_imps[score_bucket] += val_click_imp[1]
#print'{},{},{},{}'.format(val_key,val_click_imp[0],val_click_imp[1],score_val)
# out_f.write('{},{},{},{}\n'.format(val_key,val_click_imp[0],val_click_imp[1],score_val))
ctr = [0]*10
for i in range(0,10):
if total_imps[i] == 0:
ctr[i] = 0
else:
ctr[i] = float(total_clicks[i])/total_imps[i]
out_f.write('clicks={}\n'.format(total_clicks))
out_f.write('imps={}\n'.format(total_imps))
out_f.write('ctr={}'.format(ctr))