1 # -*- coding: utf-8 -*- 2 """ 3 Created on Mon Nov 05 22:50:13 2018 4 5 @author: ZhuChaochao 6 """ 7 def loadDataSet(): 8 f = open("F:/Python CODE/zcc/1.txt",‘r‘) #读取的数据 9 source = f.readlines() 10 f.close() 11 dataset = [] 12 for line in source: 13 line=line.strip().split(‘ ‘) 14 curLine = list(map(int,line)) 15 dataset.append(curLine) 16 return dataset 17 18 19 def createC1(dataset): 20 C1 = [] 21 for transaction in dataset: 22 for item in transaction: 23 if not [item] in C1: 24 C1.append([item]) 25 C1.sort() 26 return map(frozenset,C1) 27 28 def scanD(D,CK,minSupport): 29 ssCnt = {} 30 for tid in D: 31 for can in CK: 32 if can.issubset(tid): 33 if not can in ssCnt: 34 ssCnt[can] = 1 35 else: 36 ssCnt[can] += 1 37 38 D=list(map(set,D)) 39 numItems = float(len(D)) 40 retList = [] 41 supportData = {} 42 for key in ssCnt: 43 try: 44 support = ssCnt[key]/numItems 45 except (ZeroDivisionError,ValueError) as e: 46 print(e) 47 if support >= minSupport: 48 retList.insert(0,key) 49 supportData[key] = support 50 return retList,supportData 51 52 def aprioriGen(Lk, k): 53 retList = [] 54 lenLk = len(Lk) 55 for i in range(lenLk): 56 for j in range(i+1, lenLk): 57 L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2] 58 L1.sort(); L2.sort() 59 if L1==L2: 60 retList.append(Lk[i] | Lk[j]) 61 return retList 62 63 def apriori(dataSet, minSupport = 0.5): 64 C1 = createC1(dataSet) 65 D = map(set, dataSet) 66 L1, supportData = scanD(D, C1, minSupport) 67 L = [L1] 68 k = 2 69 while (len(L[k-2]) > 0): 70 Ck = aprioriGen(L[k-2], k) 71 Lk, supK = scanD(D, Ck, minSupport) 72 supportData.update(supK) 73 L.append(Lk) 74 k += 1 75 L, supportDatareturn
source = f.readlines()
f.close()
dataset = []
for line in source:
line=line.strip().split(‘ ‘)
curLine = list(map(int,line))
dataset.append(curLine)
return dataset
def createC1(dataset):
C1 = []
for transaction in dataset:
for item in transaction:
if not [item] in C1:
C1.append([item])
C1.sort()
return map(frozenset,C1)
def scanD(D,CK,minSupport):
ssCnt = {}
for tid in D:
for can in CK:
if can.issubset(tid):
if not can in ssCnt:
ssCnt[can] = 1
else:
ssCnt[can] += 1
D=list(map(set,D))
numItems = float(len(D))
retList = []
supportData = {}
for key in ssCnt:
try:
support = ssCnt[key]/numItems
except (ZeroDivisionError,ValueError) as e:
print(e)
if support >= minSupport:
retList.insert(0,key)
supportData[key] = support
return retList,supportData
def aprioriGen(Lk, k):
retList = []
lenLk = len(Lk)
for i in range(lenLk):
for j in range(i+1, lenLk):
L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]
L1.sort(); L2.sort()
if L1==L2:
retList.append(Lk[i] | Lk[j])
return retList
def apriori(dataSet, minSupport = 0.5):
C1 = createC1(dataSet)
D = map(set, dataSet)
L1, supportData = scanD(D, C1, minSupport)
L = [L1]
k = 2
while (len(L[k-2]) > 0):
Ck = aprioriGen(L[k-2], k)
Lk, supK = scanD(D, Ck, minSupport)
supportData.update(supK)
L.append(Lk)
k += 1
L, supportDatareturn
原文地址:https://www.cnblogs.com/USTC-ZCC/p/9982134.html