-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathApriori.java
369 lines (329 loc) · 15.1 KB
/
Apriori.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/**
*
* @author Nathan Magnus, under the supervision of Howard Hamilton
* Copyright: University of Regina, Nathan Magnus and Su Yibin, June 2009.
* No reproduction in whole or part without maintaining this copyright notice
* and imposing this condition on any subsequent users.
*
* File:
* Input files needed:
* 1. config.txt - three lines, each line is an integer
* line 1 - number of items per transaction
* line 2 - number of transactions
* line 3 - minsup
* 2. transa.txt - transaction file, each line is a transaction, items are separated by a space
*/
package apriori;
import java.io.*;
import java.util.*;
public class Apriori {
public static void main(String[] args) {
AprioriCalculation ap = new AprioriCalculation();
ap.aprioriProcess();
}
}
/******************************************************************************
* Class Name : AprioriCalculation
* Purpose : generate Apriori itemsets
*****************************************************************************/
class AprioriCalculation
{
Vector<String> candidates=new Vector<String>(); //the current candidates
String configFile="config.txt"; //configuration file
String transaFile="transa.txt"; //transaction file
String outputFile="apriori-output.txt";//output file
int numItems; //number of items per transaction
int numTransactions; //number of transactions
double minSup; //minimum support for a frequent itemset
String oneVal[]; //array of value per column that will be treated as a '1'
String itemSep = " "; //the separator value for items in the database
/************************************************************************
* Method Name : aprioriProcess
* Purpose : Generate the apriori itemsets
* Parameters : None
* Return : None
*************************************************************************/
public void aprioriProcess()
{
Date d; //date object for timing purposes
long start, end; //start and end time
int itemsetNumber=0; //the current itemset being looked at
//get config
getConfig();
System.out.println("Apriori algorithm has started.\n");
//start timer
d = new Date();
start = d.getTime();
//while not complete
do
{
//increase the itemset that is being looked at
itemsetNumber++;
//generate the candidates
generateCandidates(itemsetNumber);
//determine and display frequent itemsets
calculateFrequentItemsets(itemsetNumber);
if(candidates.size()!=0)
{
System.out.println("Frequent " + itemsetNumber + "-itemsets");
System.out.println(candidates);
}
//if there are <=1 frequent items, then its the end. This prevents reading through the database again. When there is only one frequent itemset.
}while(candidates.size()>1);
//end timer
d = new Date();
end = d.getTime();
//display the execution time
System.out.println("Execution time is: "+((double)(end-start)/1000) + " seconds.");
}
/************************************************************************
* Method Name : getInput
* Purpose : get user input from System.in
* Parameters : None
* Return : String value of the users input
*************************************************************************/
public static String getInput()
{
String input="";
//read from System.in
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
//try to get users input, if there is an error print the message
try
{
input = reader.readLine();
}
catch (Exception e)
{
System.out.println(e);
}
return input;
}
/************************************************************************
* Method Name : getConfig
* Purpose : get the configuration information (config filename, transaction filename)
* : configFile and transaFile will be change appropriately
* Parameters : None
* Return : None
*************************************************************************/
private void getConfig()
{
FileWriter fw;
BufferedWriter file_out;
String input="";
//ask if want to change the config
System.out.println("Default Configuration: ");
System.out.println("\tRegular transaction file with '" + itemSep + "' item separator.");
System.out.println("\tConfig File: " + configFile);
System.out.println("\tTransa File: " + transaFile);
System.out.println("\tOutput File: " + outputFile);
System.out.println("\nPress 'C' to change the item separator, configuration file and transaction files");
System.out.print("or any other key to continue. ");
input=getInput();
if(input.compareToIgnoreCase("c")==0)
{
System.out.print("Enter new transaction filename (return for '"+transaFile+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
transaFile=input;
System.out.print("Enter new configuration filename (return for '"+configFile+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
configFile=input;
System.out.print("Enter new output filename (return for '"+outputFile+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
outputFile=input;
System.out.println("Filenames changed");
System.out.print("Enter the separating character(s) for items (return for '"+itemSep+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
itemSep=input;
}
try
{
FileInputStream file_in = new FileInputStream(configFile);
BufferedReader data_in = new BufferedReader(new InputStreamReader(file_in));
//number of items
numItems=Integer.valueOf(data_in.readLine()).intValue();
//number of transactions
numTransactions=Integer.valueOf(data_in.readLine()).intValue();
//minsup
minSup=(Double.valueOf(data_in.readLine()).doubleValue());
//output config info to the user
System.out.print("\nInput configuration: "+numItems+" items, "+numTransactions+" transactions, ");
System.out.println("minsup = "+minSup+"%");
System.out.println();
minSup/=100.0;
oneVal = new String[numItems];
System.out.print("Enter 'y' to change the value each row recognizes as a '1':");
if(getInput().compareToIgnoreCase("y")==0)
{
for(int i=0; i<oneVal.length; i++)
{
System.out.print("Enter value for column #" + (i+1) + ": ");
oneVal[i] = getInput();
}
}
else
for(int i=0; i<oneVal.length; i++)
oneVal[i]="1";
//create the output file
fw= new FileWriter(outputFile);
file_out = new BufferedWriter(fw);
//put the number of transactions into the output file
file_out.write(numTransactions + "\n");
file_out.write(numItems + "\n******\n");
file_out.close();
}
//if there is an error, print the message
catch(IOException e)
{
System.out.println(e);
}
}
/************************************************************************
* Method Name : generateCandidates
* Purpose : Generate all possible candidates for the n-th itemsets
* : these candidates are stored in the candidates class vector
* Parameters : n - integer value representing the current itemsets to be created
* Return : None
*************************************************************************/
private void generateCandidates(int n)
{
Vector<String> tempCandidates = new Vector<String>(); //temporary candidate string vector
String str1, str2; //strings that will be used for comparisons
StringTokenizer st1, st2; //string tokenizers for the two itemsets being compared
//if its the first set, candidates are just the numbers
if(n==1)
{
for(int i=1; i<=numItems; i++)
{
tempCandidates.add(Integer.toString(i));
}
}
else if(n==2) //second itemset is just all combinations of itemset 1
{
//add each itemset from the previous frequent itemsets together
for(int i=0; i<candidates.size(); i++)
{
st1 = new StringTokenizer(candidates.get(i));
str1 = st1.nextToken();
for(int j=i+1; j<candidates.size(); j++)
{
st2 = new StringTokenizer(candidates.elementAt(j));
str2 = st2.nextToken();
tempCandidates.add(str1 + " " + str2);
}
}
}
else
{
//for each itemset
for(int i=0; i<candidates.size(); i++)
{
//compare to the next itemset
for(int j=i+1; j<candidates.size(); j++)
{
//create the strigns
str1 = new String();
str2 = new String();
//create the tokenizers
st1 = new StringTokenizer(candidates.get(i));
st2 = new StringTokenizer(candidates.get(j));
//make a string of the first n-2 tokens of the strings
for(int s=0; s<n-2; s++)
{
str1 = str1 + " " + st1.nextToken();
str2 = str2 + " " + st2.nextToken();
}
//if they have the same n-2 tokens, add them together
if(str2.compareToIgnoreCase(str1)==0)
tempCandidates.add((str1 + " " + st1.nextToken() + " " + st2.nextToken()).trim());
}
}
}
//clear the old candidates
candidates.clear();
//set the new ones
candidates = new Vector<String>(tempCandidates);
tempCandidates.clear();
}
/************************************************************************
* Method Name : calculateFrequentItemsets
* Purpose : Determine which candidates are frequent in the n-th itemsets
* : from all possible candidates
* Parameters : n - iteger representing the current itemsets being evaluated
* Return : None
*************************************************************************/
private void calculateFrequentItemsets(int n)
{
Vector<String> frequentCandidates = new Vector<String>(); //the frequent candidates for the current itemset
FileInputStream file_in; //file input stream
BufferedReader data_in; //data input stream
FileWriter fw;
BufferedWriter file_out;
StringTokenizer st, stFile; //tokenizer for candidate and transaction
boolean match; //whether the transaction has all the items in an itemset
boolean trans[] = new boolean[numItems]; //array to hold a transaction so that can be checked
int count[] = new int[candidates.size()]; //the number of successful matches
try
{
//output file
fw= new FileWriter(outputFile, true);
file_out = new BufferedWriter(fw);
//load the transaction file
file_in = new FileInputStream(transaFile);
data_in = new BufferedReader(new InputStreamReader(file_in));
//for each transaction
for(int i=0; i<numTransactions; i++)
{
//System.out.println("Got here " + i + " times"); //useful to debug files that you are unsure of the number of line
stFile = new StringTokenizer(data_in.readLine(), itemSep); //read a line from the file to the tokenizer
//put the contents of that line into the transaction array
for(int j=0; j<numItems; j++)
{
trans[j]=(stFile.nextToken().compareToIgnoreCase(oneVal[j])==0); //if it is not a 0, assign the value to true
}
//check each candidate
for(int c=0; c<candidates.size(); c++)
{
match = false; //reset match to false
//tokenize the candidate so that we know what items need to be present for a match
st = new StringTokenizer(candidates.get(c));
//check each item in the itemset to see if it is present in the transaction
while(st.hasMoreTokens())
{
match = (trans[Integer.valueOf(st.nextToken())-1]);
if(!match) //if it is not present in the transaction stop checking
break;
}
if(match) //if at this point it is a match, increase the count
count[c]++;
}
}
for(int i=0; i<candidates.size(); i++)
{
// System.out.println("Candidate: " + candidates.get(c) + " with count: " + count + " % is: " + (count/(double)numItems));
//if the count% is larger than the minSup%, add to the candidate to the frequent candidates
if((count[i]/(double)numTransactions)>=minSup)
{
frequentCandidates.add(candidates.get(i));
//put the frequent itemset into the output file
file_out.write(candidates.get(i) + "," + count[i]/(double)numTransactions + "\n");
}
}
file_out.write("-\n");
file_out.close();
}
//if error at all in this process, catch it and print the error messate
catch(IOException e)
{
System.out.println(e);
}
//clear old candidates
candidates.clear();
//new candidates are the old frequent candidates
candidates = new Vector<String>(frequentCandidates);
frequentCandidates.clear();
}
}