Wednesday, July 25, 2012

[Python] sample code

import json

def id_key(val):
    return '{}'.format(val)

def to_key(val):
    return '{}_{}'.format(val[0], val[1])

def from_key(key):
    vals = key.split('_')
    return [ int(vals[0]), int(vals[1])]



def remap_list(l):
    d = {}
    for e in l:
        d[e] = [0, 0]
    return d
       

use_paid = False
domain_to_retain = 'http://www.website.com'
fname = './tracker_output2.txt'

f = open(fname)
out_f = open('output.txt', 'w')

#print 'processing file {}'.format(fname)

num_processed_lines = 0
num_skipped = 0
num_errorline = 0

total_imps = [0]*10
total_clicks = [0]*10


for content in f:
    num_processed_lines = num_processed_lines + 1


    record = json.loads(content)
    scores = record['match_scores']

    url = record['url']

    if ((domain_to_retain) and (not url.startswith(domain_to_retain))):
        num_skipped = num_skipped + 1
        continue


    if num_processed_lines % 1000 == 0:
        print 'num_processed_lines={}'.format(num_processed_lines)

    if use_paid:
        if record['num_paid']>0:
            tuples = record['paid']
 #           print'{}'.format(tuples)
 #           print'{}'.format(scores)
        else:
            continue
    else:
        if record['num_free']>0:
            tuples = record['free']
        else:
            continue


       
    if (isinstance(tuples, list)):
        tuples = remap_list(tuples)
        num_errorline += 1






   
    for key, val in scores.items():
        score_id = id_key(key)
        score_val = id_key(val)
        for key, val in tuples.items():
            val_key = id_key(key)
            val_item = to_key(val)
            val_click_imp = from_key(val_item)
            if score_id == val_key:
 #               print'{},{},{},{}'.format(val_key,val_click_imp[0],val_click_imp[1],score_val)
                score_bucket = int(float(score_val)*10)
                total_clicks[score_bucket] += val_click_imp[0]
                total_imps[score_bucket] += val_click_imp[1]






               
                #print'{},{},{},{}'.format(val_key,val_click_imp[0],val_click_imp[1],score_val)
 #               out_f.write('{},{},{},{}\n'.format(val_key,val_click_imp[0],val_click_imp[1],score_val))
           
ctr = [0]*10
for i in range(0,10):
    if total_imps[i] == 0:
        ctr[i] = 0
    else:
        ctr[i] = float(total_clicks[i])/total_imps[i]


out_f.write('clicks={}\n'.format(total_clicks))
out_f.write('imps={}\n'.format(total_imps))
out_f.write('ctr={}'.format(ctr))
                                                               

No comments:

Post a Comment