@diyer22
2016-11-28T13:53:24.000000Z
字数 2768
阅读 2516
未分类
from __future__ import unicode_literalsdef log(d):show_len = 40if isinstance(d,list):for i in d:strr = str(i)if len(strr) > show_len:print strr[:show_len] + '...'else:print strrreturnif isinstance(d,dict):for i in d:strr = str(i)+' = '+str(d[i])if len(strr) > show_len:print strr[:show_len] + '...'else:print strrreturnprint dMAX_L = 22def p_tree(t,level = 0):if level > MAX_L:returnfor i in t:if not isinstance(i,int):continueprint level*'| '+'%d :%d'%(i, t[i]['c'])p_tree(t[i],level+1)min_sup = 0.01min_conf = 0.5path = './retail2.dat'f = open(path, 'r')db = f.readlines()f.close()db = map(lambda strr:map(lambda x:int(x), strr[:-1].split(' ')[:-1]), db)min_sup *= len(db)dic = {} # 记录it 频率dicc = {} # 记录所有频繁项集min_f = [] # 记录最小边缘频繁项集a_r = [] # 记录边缘强关系for items in db:for item in items:dic[item] = 1 if item not in dic else dic[item] + 1# 删除小于 min_sup 的item[dic.pop(k) if not dic[k]>= min_sup else None for k in dic.keys()]root = {}record = {}def add_tree(its, tree, count, record, dic):'''将 count个 items 加入 tree如果record_flag为True 则记录tree 的结构s selff fatherc count[\d] child'''its = filter(lambda x: x in dic, its)its.sort(lambda x, y: 1 if dic[x] < dic[y] else -1 )for it in its:if it not in tree:tree[it] = {}tree[it]['f'] = treetree[it]['s'] = itrecord[it]=[tree[it]] if it not in record else record[it]+[tree[it]]tree = tree[it]tree['c'] = count if 'c' not in tree else tree['c'] + countfor items in db:add_tree(items, root, 1, record, dic)#print 'dic',#log(dic)print '\nrecord',log('')#p_tree(root)fp_l = dic.items()fp_l.sort(lambda x, y: 1 if x[1]<y[1] else -1)def creat_its(t):l=[]while 1:t = t['f']if 'f' not in t:breakl += [t['s']]return l[::-1]dicc ={}def is_singl_tree(t):def c_n(n, listt, l, b, r): # 递归生成全排列''' n 剩余选择数量listt 全排列目标b 起始值l 临时记录'''if n == 0: # 生成关系并计算支持度r.append(tuple(l))returnfor i in range(b, len(listt)-n+1):l += [listt[i]]c_n(n-1, listt, l, i+1,r)l.pop()returndef combination(its): # 对边缘频繁项集 递归生成排列n = len(its)l = []if n == 1:return [its]for lenth in range(1, n+1):c_n(lenth, its, [],0,l)print 'its',its,lreturn ll = []root = twhile 1:tmp = filter(lambda x:isinstance(x, int), t.keys())n = len(tmp)if n != 1 and n != 0:# print l,p_tree(root)return Noneif n == 0 :if 'f' in t:# print 'tmp',l# print tmp,root.keys(),l,combination(l)return [list(i) for i in combination(l)]return Nonel += tmpt=t[tmp[0]]def header_to_db(fp_list, record):l = []# print 'fp',fp_listfor it in fp_list[::-1]:it = it[0]db = []for t in record[it]:db += [(creat_its(t), t['c'])]# print 'db',dbdic = {}for items in db:for item in items[0]:dic[item] = items[1] if item not in dic else dic[item] + items[1]# 删除小于 min_sup 的item[dic.pop(k) if not dic[k]>= min_sup else None for k in dic.keys()]t = {'s':'root'}header = {}for its in db:add_tree(its[0], t, its[1], header, dic)r = is_singl_tree(t)# print 'it',it,r,dicif r != None: # 若已经是 single 了 则不必继续递归# print 'r',rl += [i+[it] for i in r]+[[it]]# print 'add',[i+[it] for i in r]continuefp_l = dic.items()fp_l.sort(lambda x, y: 1 if x[1]<y[1] else -1)r = header_to_db(fp_l, header)l += [i+[it] for i in r]+[[it]]return l#for it in fp_l[::-1]:# f(it[0])r = header_to_db(fp_l, record)#print 'resoult',rprint 'frequent itemset', len(r)