'''
Created on 2011/7/1

@author: RUKAWA22
'''

import sys
import csv
import math

def normalize( list ):   
    templist = []
    maxValue = float( max(list) )
    minValue = float( min(list) )
    diff = float(maxValue) - float(minValue)
    for i in list:
        temp = ( float(i) - minValue ) / diff
        templist.append(temp)
    return templist 

def distrTypes( list, l):
    countDict = {}
    for idx in range( len(list[0]) ): # numbers of data points
        typeDict = {}
        for i in range(1, 2**l + 1):
            lastNum = float(i-1)/2**l 
            thisNum = float(i)/2**l 
            for j in range(1, len(list)):
                s = 'feature' + str(j)
                if float(list[j][idx]) >= lastNum and float(list[j][idx]) < thisNum:
                    typeDict[s] = i - 1
                elif float(list[j][idx]) == 1:
                    typeDict[s] = i - 1
                
        typeKey = str(typeDict)
        if countDict.has_key(typeKey):
            count = countDict[typeKey]
            count += 1
            countDict[typeKey] = count
        else:
            countDict[typeKey] = 1
    return countDict
        
def cal_l( d ):
    h = 0
    while (2**h) <= d:
        h = h + 1
    return h-1

            
if __name__ == '__main__':
    
    filepath = sys.argv[1]
#    filepath = "E:\workspace\FDV\src\\test123.csv"
    g = csv.reader(open("%s"%filepath))
    
    data = []
    for line in g:
        data.append(line)
    
    list = []
    for idx in range(len(data[0])):
        list.append(idx)
        list[idx] = []
    
    for idx in range(len(data)):
        for x in range(len(data[0])):
            list[x].append(data[idx][x])
                
    
    for idx in range(len(list)):
        list[idx] = normalize(list[idx])
    
    
    l = cal_l(len(list[0]))
    
    output = distrTypes( list, l )
    
    cfv = 0
    
    for i in output.keys():
        c = output[i] ** 2  # c^2
        cfv = cfv + c
#        print i + " : " + str(output[i])
    
    cfv = abs( math.log(cfv) / math.log( 1 / 2 ** float(l) ) )
    print '\ncfv = ' + str(cfv)
    



    