00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #define DEBUG
00039
00040 #include <iostream>
00041 #include <fstream>
00042 #include <time.h>
00043 #include <stdio.h>
00044 #include <list>
00045 #include <vector>
00046 #include <algorithm>
00047 #include <string>
00048 #include <assert.h>
00049 #include <math.h>
00050 #include <map>
00051 #include "Bitmap.h"
00052 #include "TreeNode.h"
00053 #include "Transaction.h"
00054 #include "ItemsetOutput.h"
00055
00056 using namespace std;
00057
00058
00059
00060
00061 typedef vector<TreeNode *> NodeList;
00062 typedef vector<TreeNode *> BranchList;
00063 typedef vector<Bitmap *> BitmapList;
00064 typedef vector<BaseBitmap *> BaseBitmapList;
00065 typedef vector<int> ItemSet;
00066 typedef map<long, ItemSet *> HashTable;
00067
00068
00069 class SubtreeEstimate {
00070 public:
00071 int Count;
00072 int Sum;
00073 SubtreeEstimate () {
00074 Count = 0;
00075 Sum = 0;
00076 }
00077 };
00078
00079
00080 class TailElement {
00081 public:
00082 int Count;
00083 int Item;
00084
00085 TailElement () {
00086 Count = 0;
00087 Item = 0;
00088 }
00089
00090 bool operator < (const TailElement& rhs) const {
00091 return this->Count < rhs.Count;
00092 };
00093 };
00094
00095
00096
00097
00098 string method;
00099 char* outFilename;
00100 ItemsetOutput *outFile;
00101 bool outputMFI = false;
00102 bool MethodIsFI = false;
00103 bool MethodIsFCI = false;
00104 int ItemCount;
00105 int TransCount;
00106 double MSF;
00107 int MS;
00108 int VerScale = 1;
00109 int HorScale = 1;
00110 bool GoFHUT = true;
00111 bool HUTMFI = true;
00112 bool PEPrune = true;
00113 bool Reorder = true;
00114
00115
00116
00117
00118
00119
00120 int CountFHUT = 0;
00121 int CountNodes = 0;
00122 int CountCounts = 0;
00123 int CountAnds = 0;
00124 int CountSmallAnds = 0;
00125 int CountPEPrunes = 0;
00126 int CountCheckPosition = 0;
00127 int CountHUTMFI = 0;
00128 int CountHUTMFISuccess = 0;
00129 int CountRebuilds;
00130
00131
00132
00133
00134
00135
00136 int maxtail = 0;
00137 int MFISize = 0;
00138 int MFIDepth = 0;
00139 int F1size = 0;
00140 int FullF1size = 0;
00141 int k = 50;
00142 int MAX_compID = 1;
00143 int projectDepth = -1;
00144 int EstimateSize;
00145 int EstimateDiv = 5;
00146 int maxItemsetSize = 0;
00147
00148
00149
00150
00151
00152
00153 NodeList F1;
00154 BitmapList TransBuffy;
00155 BaseBitmapList NameBuffy;
00156 NodeList NodeBuffy;
00157 TreeNode *Root;
00158 TailElement *gTail;
00159 TailElement *TailBuffy;
00160 Bitmap *NullTrans;
00161 int *ItemMap;
00162 int *ReverseItemMap;
00163 BaseBitmapList MFI;
00164 HashTable HT;
00165 vector <int> SupportCountList;
00166 BaseBitmap* TempName;
00167 SubtreeEstimate* EstimateBuffy;
00168 int *MFIBySizes;
00169 int *ItemsetBuffy;
00170
00171
00172
00173
00174
00175
00176 time_t total_start, total_finish;
00177 double total_time;
00178 time_t read_start, read_finish;
00179 double read_time;
00180 time_t algorithm_start, algorithm_finish;
00181 double algorithm_time;
00182 time_t print_start, print_finish;
00183 double print_time;
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 void AddToF1(TreeNode *newNode) {
00201 if (F1.empty())
00202 F1.push_back(newNode);
00203 else {
00204
00205 NodeList::iterator noli = F1.begin();
00206 while (noli != F1.end()) {
00207 if ((*noli)->Trans->_count >= newNode->Trans->_count) {
00208 F1.insert(noli, newNode);
00209 return ;
00210 }
00211 noli++;
00212 }
00213
00214
00215 F1.push_back(newNode);
00216 }
00217 }
00218
00219
00220
00221
00222
00223
00224 void F1UsingProb(double P) {
00225 int blah = 0;
00226
00227
00228
00229 for (int i = 0; i < ItemCount / HorScale; i++) {
00230
00231 Bitmap *trans = new Bitmap(TransCount);
00232 trans->FillRand(P);
00233
00234
00235 if (trans->Count(blah) >= MS) {
00236 TreeNode *node = new TreeNode(NULL, trans, 1, -1, i, -1, 0);
00237 for (int r = 0; r < HorScale; r++)
00238 AddToF1(node);
00239 } else
00240 delete trans;
00241 }
00242 }
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254 void F1FromFile(char *filename, bool isAsciiFile) {
00255 TransCount = 0;
00256 int MAXitemID = -1;
00257 int *Counters = new int[MAX_NUM_ITEMS];
00258 int *F1IndexList = new int[MAX_NUM_ITEMS];
00259 int *InvF1IndexList = new int[MAX_NUM_ITEMS];
00260 int itemIndex = 0;
00261
00262
00263 for (int ct = 0; ct < MAX_NUM_ITEMS; ++ct) {
00264 Counters[ct] = 0;
00265 F1IndexList[ct] = -1;
00266 InvF1IndexList[ct] = -1;
00267 }
00268
00269 time(&read_start);
00270 BitmapList Trans;
00271
00272 int *itemlist = new int[MAX_NUM_ITEMS];
00273 InputData *inData = new InputData(filename, itemlist, isAsciiFile);
00274 if (!inData->isOpen()) {
00275 cerr << "Input file not found!" << endl;
00276 exit(1);
00277 }
00278
00279 Transaction *newTransaction = inData->getNextTransaction();
00280 while (newTransaction != NULL) {
00281 for (itemIndex = 0; itemIndex < newTransaction->length; itemIndex++) {
00282
00283 if (newTransaction->itemlist[itemIndex] >= MAX_NUM_ITEMS) {
00284 cerr << "Read item_id=" << newTransaction->itemlist[itemIndex]
00285 << " which is more than the max item_id allowed (" << MAX_NUM_ITEMS << ")";
00286 exit(1);
00287 }
00288
00289 if (newTransaction->itemlist[itemIndex] > MAXitemID)
00290 MAXitemID = newTransaction->itemlist[itemIndex];
00291
00292 Counters[newTransaction->itemlist[itemIndex]]++;
00293 }
00294
00295 TransCount++;
00296 delete newTransaction;
00297 newTransaction = inData->getNextTransaction();
00298 }
00299
00300 delete newTransaction;
00301 delete inData;
00302
00303 MS = (int)ceil(MSF * (double)TransCount);
00304
00305
00306 ItemCount = MAXitemID + 1;
00307
00308 int F1items = 0;
00309
00310 for (itemIndex = 0; itemIndex <= MAXitemID; itemIndex++) {
00311 if (Counters[itemIndex] >= MS) {
00312 F1IndexList[F1items++] = itemIndex;
00313 InvF1IndexList[itemIndex] = F1items - 1;
00314 Bitmap *trans = new Bitmap(TransCount);
00315 trans->_count = Counters[itemIndex];
00316 Trans.push_back(trans);
00317 }
00318 }
00319
00320 int transIndex = 0;
00321 InputData *inData2 = new InputData(filename, itemlist, isAsciiFile);
00322 newTransaction = inData2->getNextTransaction();
00323 while (newTransaction != NULL) {
00324 for (int itemIndex = 0; itemIndex < newTransaction->length; itemIndex++) {
00325 if (InvF1IndexList[newTransaction->itemlist[itemIndex]] != -1) {
00326 Trans[InvF1IndexList[newTransaction->itemlist[itemIndex]]]->FillEmptyPosition(transIndex);
00327 }
00328 }
00329
00330 transIndex++;
00331 delete newTransaction;
00332 newTransaction = inData2->getNextTransaction();
00333 }
00334
00335 delete newTransaction;
00336 delete inData2;
00337 delete [] itemlist;
00338
00339 time(&read_finish);
00340 read_time = difftime(read_finish, read_start);
00341 printf("Reading input time: %.2f seconds.\n", read_time);
00342
00343
00344 BitmapList::iterator bli = Trans.begin();
00345 itemIndex = 0;
00346
00347 while (bli != Trans.end()) {
00348 TreeNode *node = new TreeNode(
00349 NULL,
00350 (*bli),
00351 1,
00352 -1,
00353 F1IndexList[itemIndex],
00354 -1,
00355 0);
00356 AddToF1(node);
00357 bli++;
00358 itemIndex++;
00359 }
00360
00361 delete [] Counters;
00362 delete [] F1IndexList;
00363 delete [] InvF1IndexList;
00364 }
00365
00366
00367
00368
00369
00370
00371 void PrintMFI() {
00372
00373 outFile = new ItemsetOutput(outFilename);
00374 if (!outFile->isOpen()) {
00375 cerr << "Output file not open!" << endl;
00376 exit(1);
00377 }
00378
00379 if (FullF1size != 0) {
00380
00381 int* ITEMS = new int[FullF1size];
00382 for (int i = 0; i < MFISize; i++) {
00383 int j = 0;
00384 for (int cc = 0; cc < FullF1size; cc++) {
00385 if (MFI[i]->CheckPosition(cc, CountCheckPosition) > 0) {
00386 ITEMS[j] = ItemMap[cc];
00387 j++;
00388 }
00389 }
00390
00391 outFile->printSet(MFI[i]->_count, ITEMS, SupportCountList[i]);
00392 }
00393 delete [] ITEMS;
00394 } else {
00395 outFile->printSet(0, NULL, TransCount);
00396 }
00397
00398 delete outFile;
00399 }
00400
00401 void cerrMFI() {
00402 if (FullF1size != 0) {
00403
00404 int* ITEMS = new int[FullF1size];
00405 for (int i = 0; i < MFISize; i++) {
00406 int j = 0;
00407 cerr << "MFI i=" << i << ":";
00408 for (int cc = 0; cc < FullF1size; cc++) {
00409 if (MFI[i]->CheckPosition(cc, CountCheckPosition) > 0) {
00410 cerr << " " << cc;
00411 j++;
00412 }
00413 }
00414
00415 cerr << endl;
00416 }
00417 delete [] ITEMS;
00418 }
00419 }
00420
00421 void PrintItemset(BaseBitmap *printSet) {
00422 if (FullF1size != 0) {
00423 for (int cc = 0; cc < FullF1size; cc++) {
00424 if (printSet->CheckPosition(cc, CountCheckPosition) > 0) {
00425 cerr << cc << " ";
00426 }
00427 }
00428 cerr << endl;
00429 }
00430 }
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449 bool LMFISuperSet(TreeNode* location) {
00450 return (location->rEnd > location->rBegin);
00451 }
00452
00453
00454
00455
00456
00457
00458 void AddToFI(TreeNode *C) {
00459 int itemsetIndex = 0;
00460 for (int cc = 0; cc < FullF1size; cc++) {
00461 if (C->Name->CheckPosition(cc, CountCheckPosition) > 0) {
00462 ItemsetBuffy[itemsetIndex] = ItemMap[cc];
00463 itemsetIndex++;
00464 }
00465 }
00466
00467 MFIBySizes[C->Name->_count]++;
00468 if (C->Name->_count > maxItemsetSize)
00469 maxItemsetSize = C->Name->_count;
00470
00471 if (outputMFI)
00472 outFile->printSet(C->Name->_count, ItemsetBuffy, C->Trans->_count);
00473
00474
00475 MFIDepth += C->Name->_count;
00476 MFISize++;
00477 }
00478
00479
00480
00481
00482
00483
00484 void AddToMFI(TreeNode *C) {
00485
00486 BaseBitmap *name = new BaseBitmap(*C->Name);
00487
00488
00489 MFI.push_back(name);
00490
00491
00492 C->rEnd++;
00493
00494
00495 MFIDepth += C->Name->_count;
00496 MFISize++;
00497
00498 SupportCountList.push_back(C->Trans->_count);
00499 MFIBySizes[name->_count]++;
00500 if (name->_count > maxItemsetSize)
00501 maxItemsetSize = name->_count;
00502 }
00503
00504
00505
00506
00507
00508
00509 void AddToFCI(TreeNode *C) {
00510
00511 HashTable::iterator h = HT.find(C->Trans->_count);
00512
00513
00514 if (h == HT.end()) {
00515
00516 ItemSet* newList = new ItemSet();
00517 newList->reserve(500);
00518 newList->push_back(MFI.size());
00519 HT.insert(HashTable::value_type(C->Trans->_count, newList));
00520
00521
00522 } else {
00523 for (ItemSet::reverse_iterator goli = (*h).second->rbegin();
00524 goli != (*h).second->rend();
00525 goli++)
00526 if (MFI[*goli]->Superset(C->Name))
00527 return ;
00528
00529
00530 (*h).second->push_back(MFI.size());
00531 }
00532
00533
00534 BaseBitmap *name = new BaseBitmap(*C->Name);
00535
00536
00537 MFI.push_back(name);
00538
00539
00540 MFIDepth += C->Name->_count;
00541 MFISize++;
00542
00543 SupportCountList.push_back(C->Trans->_count);
00544 MFIBySizes[name->_count]++;
00545 if (name->_count > maxItemsetSize)
00546 maxItemsetSize = name->_count;
00547 }
00548
00549 int SortLMFI(int rBegin, int rEnd, int sortBy) {
00550 int o = rBegin;
00551 int p = rEnd - 1;
00552 while ( o <= p) {
00553 while (o < rEnd && !MFI[o]->CheckPosition(sortBy, CountCheckPosition))
00554 o++;
00555 while (p >= rBegin && MFI[p]->CheckPosition(sortBy, CountCheckPosition))
00556 p--;
00557 if (o < p) {
00558
00559
00560 BaseBitmap* tempBitmap = MFI[o];
00561 MFI[o] = MFI[p];
00562 MFI[p] = tempBitmap;
00563 tempBitmap = NULL;
00564 o++;
00565 p--;
00566 }
00567 }
00568
00569 return o;
00570 }
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580 bool CheckHUTMFI(TreeNode *C, int iTAIL) {
00581
00582
00583
00584 int rBegin = C->rBegin;
00585 int rEnd = C->rEnd;
00586 for (; iTAIL < C->tEnd; iTAIL++) {
00587 rBegin = SortLMFI(rBegin, rEnd, F1[gTail[iTAIL].Item]->Prefix);
00588
00589 if (rEnd <= rBegin)
00590 return false;
00591 }
00592
00593 return true;
00594 }
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608 void ReorderTail(
00609 TreeNode *C,
00610 int &iTAIL,
00611 bool useComp,
00612 bool &NoChild,
00613 bool &AllFreq) {
00614
00615 int tailIndex = 0;
00616 int lol = 0;
00617
00618 for (lol = iTAIL; lol < C->tEnd; lol++) {
00619 Bitmap *trans = TransBuffy[C->Depth];
00620 int theCount = 0;
00621
00622
00623 if (useComp && (F1[gTail[lol].Item]->compID != C->compID)) {
00624 F1[gTail[lol].Item]->Trans->BuildRelComp(
00625 *TransBuffy[projectDepth]);
00626 F1[gTail[lol].Item]->compID = C->compID;
00627 CountRebuilds++;
00628 }
00629
00630
00631 if (useComp) {
00632
00633 trans->AndCompOnly(
00634 *C->Trans,
00635 *F1[gTail[lol].Item]->Trans,
00636 CountSmallAnds);
00637 theCount = trans->SmallCount(CountCounts);
00638
00639
00640 } else {
00641
00642 trans->AndOnly(*C->Trans, *F1[gTail[lol].Item]->Trans, CountAnds);
00643 theCount = trans->Count(CountCounts);
00644 }
00645
00646
00647 if (theCount >= MS) {
00648
00649 if (PEPrune && (trans->_count == C->Trans->_count)) {
00650
00651 C->Name->Or(*C->Name, *F1[gTail[lol].Item]->Name);
00652 CountPEPrunes++;
00653
00654
00655 } else {
00656 NoChild = false;
00657
00658
00659 TailBuffy[tailIndex].Count = theCount;
00660 TailBuffy[tailIndex].Item = gTail[lol].Item;
00661 tailIndex++;
00662 }
00663 } else
00664 AllFreq = false;
00665 }
00666
00667 sort(TailBuffy, TailBuffy + tailIndex);
00668
00669
00670 iTAIL = C->tEnd;
00671 C->tEnd = iTAIL + tailIndex;
00672 C->tBegin = iTAIL;
00673 int r = 0;
00674
00675
00676 for (lol = iTAIL; lol < C->tEnd; lol++) {
00677 gTail[lol] = TailBuffy[r];
00678 r++;
00679 }
00680 }
00681
00682
00683
00684
00685
00686
00687
00688
00689 void NoorderTail(
00690 TreeNode *C,
00691 int &iTAIL) {
00692
00693
00694 iTAIL = C->tEnd;
00695 C->tEnd = C->tEnd - C->tBegin + C->tEnd;
00696 C->tBegin = iTAIL;
00697 int r = 0;
00698
00699
00700 for (int lol = iTAIL; lol < C->tEnd; lol++) {
00701 gTail[lol] = gTail[lol - C->tEnd + C->tBegin];
00702 r++;
00703 }
00704 }
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715 void MAFIA(TreeNode *C, bool HUT, bool &FHUT, bool useComp) {
00716 CountNodes++;
00717 int iTAIL = C->tBegin;
00718 bool NoChild = true;
00719 bool AllFreq = true;
00720 FHUT = false;
00721 int beforeCountNodes = CountNodes;
00722 int frequentTailSize = C->tEnd - iTAIL;
00723
00724 if (iTAIL < C->tEnd) {
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735 if (C != Root)
00736 if (Reorder)
00737 ReorderTail(C, iTAIL, useComp, NoChild, AllFreq);
00738 else
00739 NoorderTail(C, iTAIL);
00740
00741 frequentTailSize = C->tEnd - iTAIL;
00742
00743 int estimateTail = (int)(frequentTailSize / (double)EstimateDiv);
00744 if (estimateTail > 0 && C->Trans->_count != TransCount) {
00745 double estimateSubTree = EstimateBuffy[estimateTail].Sum / (double)EstimateBuffy[estimateTail].Count;
00746 double support = C->Trans->_count / (double) TransCount;
00747 double factor = 11.597 - 29.914 * (support - .52392) * (support - .52392);
00748 double cost = factor * frequentTailSize / (1 - support);
00749
00750
00751 if ((!useComp) && (estimateSubTree > cost)) {
00752
00753
00754 C->Trans->BuildSource();
00755
00756
00757 projectDepth = C->Depth - 1;
00758
00759
00760 C->compID = MAX_compID;
00761 MAX_compID++;
00762 useComp = true;
00763 }
00764 }
00765
00766
00767
00768
00769
00770 while (iTAIL < C->tEnd) {
00771
00772 Bitmap *trans = TransBuffy[C->Depth];
00773 BaseBitmap *name = NameBuffy[C->Depth];
00774 TreeNode *node = NodeBuffy[C->Depth];
00775
00776
00777 name->Or(*C->Name, *F1[gTail[iTAIL].Item]->Name);
00778
00779
00780 if (useComp && (F1[gTail[iTAIL].Item]->compID != C->compID)) {
00781
00782 F1[gTail[iTAIL].Item]->Trans->BuildRelComp(
00783 *TransBuffy[projectDepth]);
00784 F1[gTail[iTAIL].Item]->compID = C->compID;
00785 CountRebuilds++;
00786 }
00787
00788 int theCount = 0;
00789
00790
00791 if (useComp) {
00792
00793 trans->AndCompOnly(
00794 *C->Trans,
00795 *F1[gTail[iTAIL].Item]->Trans,
00796 CountSmallAnds);
00797
00798 if (Reorder)
00799 trans->_count = gTail[iTAIL].Count;
00800 else
00801 theCount = trans->SmallCount(CountCounts);
00802 } else {
00803
00804 trans->AndOnly(
00805 *C->Trans,
00806 *F1[gTail[iTAIL].Item]->Trans,
00807 CountAnds);
00808
00809 if (Reorder)
00810 trans->_count = gTail[iTAIL].Count;
00811 else
00812 theCount = trans->Count(CountCounts);
00813 }
00814 if (!Reorder && PEPrune && (theCount == C->Trans->_count)) {
00815 CountPEPrunes++;
00816 C->Name->Or(*C->Name, *F1[gTail[iTAIL].Item]->Name);
00817 iTAIL++;
00818 continue;
00819 }
00820
00821
00822
00823 if ((iTAIL != C->tBegin) && (C != Root))
00824 HUT = 0;
00825 else
00826 HUT = 1;
00827
00828 if (!AllFreq)
00829 HUT = 0;
00830
00831 if (Reorder || (theCount >= MS)) {
00832
00833 node->setTreeNode(name,
00834 trans,
00835 C->Depth + 1,
00836 C->compID,
00837 F1[gTail[iTAIL].Item]->Prefix,
00838 iTAIL + 1,
00839 C->tEnd);
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849 node->rEnd = C->rEnd;
00850 node->rBegin = SortLMFI(C->rBegin, C->rEnd, node->Prefix);
00851
00852
00853 if (HUTMFI && node->tBegin != node->tEnd && !HUT) {
00854 CountHUTMFI++;
00855
00856 if (CheckHUTMFI(node, node->tBegin)) {
00857
00858 CountHUTMFISuccess++;
00859 AllFreq = false;
00860 break;
00861 }
00862 }
00863
00864 NoChild = false;
00865
00866 MAFIA(node, HUT, FHUT, useComp);
00867
00868
00869
00870
00871 C->rEnd = node->rEnd;
00872 } else
00873 AllFreq = false;
00874
00875
00876 if (FHUT) {
00877
00878 if (HUT) {
00879 return ;
00880
00881
00882
00883 } else {
00884 FHUT = false;
00885 break;
00886 }
00887 }
00888
00889
00890 iTAIL++;
00891 }
00892 }
00893
00894
00895 if (HUT && AllFreq && GoFHUT) {
00896 FHUT = true;
00897 CountFHUT++;
00898 }
00899
00900
00901 if (MethodIsFI)
00902 AddToFI(C);
00903 else if (MethodIsFCI && C != Root)
00904 AddToFCI(C);
00905 else if (NoChild && !LMFISuperSet(C)) {
00906 AddToMFI(C);
00907 }
00908
00909 int subtreeSize = CountNodes - beforeCountNodes + 1;
00910 int estimateTail = (int)(frequentTailSize / (double)EstimateDiv);
00911 if (estimateTail > 0 && C->Trans->_count != TransCount) {
00912 EstimateBuffy[estimateTail].Count++;
00913 EstimateBuffy[estimateTail].Sum += subtreeSize;
00914 }
00915 }
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925 void MergeRepeatedItemsets() {
00926 NodeList::iterator bali = F1.begin();
00927 Bitmap out(*(*bali)->Trans);
00928 int blah = 0;
00929
00930
00931 while (bali != F1.end()) {
00932 out.FillOnes();
00933 NodeList::iterator noli = bali;
00934 noli++;
00935
00936
00937 while (noli != F1.end()) {
00938
00939 if ((*bali)->Trans->_count != (*noli)->Trans->_count)
00940 break;
00941 else {
00942
00943 out.AndOnly(*(*bali)->Trans, *(*noli)->Trans, CountAnds);
00944 out.Count(blah);
00945
00946
00947 if (out._count == (*noli)->Trans->_count) {
00948 (*bali)->Name->Or(*(*bali)->Name, *(*noli)->Name);
00949 F1.erase(noli);
00950 } else
00951 noli++;
00952 }
00953 }
00954 bali++;
00955 }
00956 }
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970 int main(int argc, char **argv) {
00971
00972
00973 if (argc < 5) {
00974 cerr << "Usage: " << argv[0] << " [-mfi/-fci/-fi] [min sup (percent)] " << endl;
00975 cerr << "\t[-ascii/-binary] [input filename] " << endl;
00976 cerr << "\t[output filename (optional)]" << endl;
00977 cerr << "Ex: " << argv[0] << " -mfi .5 -ascii connect4.ascii mfi.txt" << endl;
00978 cerr << "Ex: " << argv[0] << " -mfi .3 -binary chess.binary" << endl;
00979 exit(0);
00980 }
00981
00982
00983 time(&total_start);
00984
00985
00986 method = argv[1];
00987
00988
00989 MSF = atof(argv[2]);
00990 if (strcmp(argv[3], "-ascii") == 0)
00991 F1FromFile(argv[4], true);
00992 else if (strcmp(argv[3], "-binary") == 0)
00993 F1FromFile(argv[4], false);
00994 else {
00995 cerr << "File format must be -ascii or -binary" << endl;
00996 exit(1);
00997 }
00998
00999 if (argc == 6) {
01000 outputMFI = true;
01001 outFilename = argv[5];
01002 }
01003
01004
01005 NullTrans = new Bitmap(TransCount);
01006 NullTrans->FillOnes();
01007 NullTrans->_count = TransCount;
01008 ItemSet NullList;
01009
01010 MFIBySizes = new int[MAX_ITEMSET_SIZE];
01011 for (int h = 0; h < MAX_ITEMSET_SIZE; h++) {
01012 MFIBySizes[h] = 0;
01013 }
01014
01015
01016 FullF1size = F1size = F1.size();
01017
01018
01019 if (FullF1size != 0) {
01020
01021 BaseBitmap *NullName = new BaseBitmap(FullF1size);
01022 MFI.reserve(100000);
01023
01024 int p = 0;
01025 ItemMap = new int[FullF1size];
01026 ItemsetBuffy = new int[FullF1size];
01027
01028
01029 for (NodeList::iterator nli = F1.begin(); nli != F1.end(); nli++) {
01030
01031 ItemMap[p] = (*nli)->Prefix;
01032
01033
01034 (*nli)->Prefix = p;
01035
01036
01037 (*nli)->Name = new BaseBitmap(FullF1size);
01038 (*nli)->Name->FillEmptyPosition(p);
01039 (*nli)->Name->_count = 1;
01040 p++;
01041 }
01042
01043
01044
01045 MergeRepeatedItemsets();
01046 F1size = F1.size();
01047
01048
01049 maxtail = F1size * (F1size + 1) / 2;
01050 gTail = new TailElement[maxtail];
01051
01052
01053 TailBuffy = new TailElement[F1size];
01054
01055
01056 EstimateSize = (int)ceil(F1size / (double)EstimateDiv);
01057 EstimateBuffy = new SubtreeEstimate[EstimateSize];
01058 for (int estimateIndex = 0; estimateIndex < EstimateSize; estimateIndex++) {
01059 EstimateBuffy[estimateIndex].Count = 1;
01060 EstimateBuffy[estimateIndex].Sum = estimateIndex * EstimateDiv * estimateIndex * EstimateDiv / 2;
01061 }
01062
01063
01064 int uu;
01065 for (uu = 0; uu < maxtail; uu++) {
01066 gTail[uu].Item = -1;
01067 gTail[uu].Count = 0;
01068 }
01069
01070
01071 for (uu = 0; uu < F1size; uu++) {
01072 gTail[uu].Item = uu;
01073 gTail[uu].Count = F1[uu]->Trans->_count;
01074
01075
01076 F1[uu]->tBegin = uu + 1;
01077 if (uu == F1size - 1)
01078 F1[uu]->tBegin = -1;
01079
01080 TempName = new BaseBitmap(FullF1size);
01081
01082
01083 BaseBitmap *name = new BaseBitmap(FullF1size);
01084 NameBuffy.push_back(name);
01085 Bitmap *buff = new Bitmap(TransCount);
01086 TransBuffy.push_back(buff);
01087 TreeNode *newNode = new TreeNode();
01088 NodeBuffy.push_back(newNode);
01089 }
01090
01091 srand(666);
01092 bool FHUT;
01093
01094
01095 clock_t start, finish;
01096 double duration = -1;
01097 start = clock();
01098
01099 time(&algorithm_start);
01100
01101
01102 Root = new TreeNode(NullName, NullTrans, 0, 0, -1, 0, F1size);
01103
01104
01105 Root->rBegin = 0;
01106 Root->rEnd = 0;
01107
01108
01109 if (method.compare("-fci") == 0) {
01110
01111
01112 GoFHUT = false;
01113 HUTMFI = false;
01114 PEPrune = true;
01115 Reorder = true;
01116 MethodIsFCI = true;
01117
01118 MAFIA(Root, false, FHUT, false);
01119 } else if (method.compare("-mfi") == 0) {
01120
01121 GoFHUT = true;
01122 HUTMFI = true;
01123 PEPrune = false;
01124 Reorder = true;
01125
01126 MAFIA(Root, true, FHUT, false);
01127 } else if (method.compare("-fi") == 0) {
01128 if (outputMFI) {
01129
01130 outFile = new ItemsetOutput(outFilename);
01131 if (!outFile->isOpen()) {
01132 cerr << "Output file not open!" << endl;
01133 exit(1);
01134 }
01135 }
01136
01137
01138 GoFHUT = false;
01139 HUTMFI = false;
01140 PEPrune = false;
01141 Reorder = false;
01142 MethodIsFI = true;
01143
01144 MAFIA(Root, false, FHUT, false);
01145
01146 if (outputMFI) {
01147 delete outFile;
01148 }
01149 } else {
01150 cerr << "Invalid algorithm option!" << endl;
01151 exit(0);
01152 }
01153
01154 finish = clock();
01155 duration = (finish - start) / (double)CLOCKS_PER_SEC;
01156
01157 printf( "Algorithm CPU time: %.3f seconds.\n", duration );
01158
01159 time(&algorithm_finish);
01160 algorithm_time = difftime(algorithm_finish, algorithm_start);
01161 printf("Algorithm time: %.2f seconds.\n", algorithm_time);
01162 } else {
01163 MFIBySizes[0]++;
01164 }
01165
01166 #ifdef DEBUG
01167
01168 cout << "F1size: " << FullF1size << endl;
01169 cout << "MFISize: " << MFI.size() << endl;
01170 cout << "MFIAvg: " << MFIDepth / (double) MFISize << endl;
01171 cout << "CountNodes: " << CountNodes << endl;
01172 cout << "CountAnds: " << CountAnds << endl;
01173 cout << "CountSmallAnds: " << CountSmallAnds << endl;
01174 cout << "CountRebuilds: " << CountRebuilds << endl;
01175 cout << "CountCounts: " << CountCounts << endl << endl;
01176 cout << "CountFHUT: " << CountFHUT << endl;
01177 cout << "CountHUTMFI: " << CountHUTMFI << endl;
01178 cout << "CountHUTMFISuccess: " << CountHUTMFISuccess << endl;
01179 cout << "CountCheckPosition: " << CountCheckPosition << endl;
01180 cout << "CountPEPrunes: " << CountPEPrunes << endl;
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205 #endif
01206
01207
01208
01209
01210
01211 if (outputMFI && !MethodIsFI) {
01212
01213
01214 time(&print_start);
01215
01216 PrintMFI();
01217
01218 time(&print_finish);
01219 print_time = difftime(print_finish, print_start);
01220 printf("Printing output time: %.2f seconds.\n", print_time);
01221 }
01222
01223 time(&total_finish);
01224 total_time = difftime(total_finish, total_start);
01225 printf("Total time: %.2f seconds.\n\n\n", total_time);
01226
01227 return 0;
01228 }
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370