#include <iostream>
#include <fstream>
#include <time.h>
#include <stdio.h>
#include <list>
#include <vector>
#include <algorithm>
#include <string>
#include <assert.h>
#include <math.h>
#include <map>
#include "Bitmap.h"
#include "TreeNode.h"
#include "Transaction.h"
#include "ItemsetOutput.h"
Include dependency graph for Mafia.good.cpp:
Data Structures | |
class | SubtreeEstimate |
Simple class for storing subtree size estimates. More... | |
class | TailElement |
Simple class for storing tail elements of each node of the tree. More... | |
Defines | |
#define | DEBUG |
Typedefs | |
typedef vector< TreeNode * > | NodeList |
typedef vector< TreeNode * > | BranchList |
typedef vector< Bitmap * > | BitmapList |
typedef vector< BaseBitmap * > | BaseBitmapList |
typedef vector< int > | ItemSet |
typedef map< long, ItemSet * > | HashTable |
Functions | |
void | AddToF1 (TreeNode *newNode) |
Insert pointer to node in order of increasing support. | |
void | F1UsingProb (double P) |
Create bitmaps filled randomly with probability p. | |
void | F1FromFile (char *filename, bool isAsciiFile) |
Read transaction data from file and build item bitmaps. | |
void | PrintMFI () |
To print the MFI data out to an ASCII file with each entry having the format: [list of items in MFI entry...] [(support)]. | |
void | cerrMFI () |
void | PrintItemset (BaseBitmap *printSet) |
bool | LMFISuperSet (TreeNode *location) |
Check for an existing superset of name in the MFI. | |
void | AddToFI (TreeNode *C) |
Output itemset (don't need to save bitmaps for FI). | |
void | AddToMFI (TreeNode *C) |
Add this node's name bitmap to the MFI list. | |
void | AddToFCI (TreeNode *C) |
Add this node's name bitmap to the FCI list. | |
int | SortLMFI (int rBegin, int rEnd, int sortBy) |
bool | CheckHUTMFI (TreeNode *C, int iTAIL) |
Determine whether a HUTMFI is true. | |
void | ReorderTail (TreeNode *C, int &iTAIL, bool useComp, bool &NoChild, bool &AllFreq) |
Dynamically reorder the elements in the tail by increasing support - Expand all children and sort by increasing support - Remove infrequent children. | |
void | NoorderTail (TreeNode *C, int &iTAIL) |
Simply copy over the tail without expanding any of the children for pure DFS (no expansion of all children). | |
void | MAFIA (TreeNode *C, bool HUT, bool &FHUT, bool useComp) |
The main MAFIA algorithm function. | |
void | MergeRepeatedItemsets () |
Merge repeated itemsets into one combined itemset - e.g. | |
int | main (int argc, char **argv) |
Variables | |
string | method |
either -mfi or -fci or -fi | |
char * | outFilename |
filename for output | |
ItemsetOutput * | outFile |
file for ouput | |
bool | outputMFI = false |
true if MFI should be saved to a file | |
bool | MethodIsFI = false |
true if the method is -fi | |
bool | MethodIsFCI = false |
true if the method is -fci | |
int | ItemCount |
# of items in the file | |
int | TransCount |
# of transactions in the file | |
double | MSF |
user-defined min sup as percentage | |
int | MS |
min sup as a transaction count | |
int | VerScale = 1 |
Scaling factor for transactions. | |
int | HorScale = 1 |
Scaling factor for items. | |
bool | GoFHUT = true |
FHUT flag. | |
bool | HUTMFI = true |
HUTMFI flag. | |
bool | PEPrune = true |
PEPrune flag -- parent equivalent pruning. | |
bool | Reorder = true |
Reorder flag. | |
int | CountFHUT = 0 |
# of times FHUT was successful | |
int | CountNodes = 0 |
# of frequent nodes in the tree | |
int | CountCounts = 0 |
# of Counts or all nodes in the tree | |
int | CountAnds = 0 |
# of ANDs of normal bitmaps | |
int | CountSmallAnds = 0 |
# of compressed bitmap ANDs | |
int | CountPEPrunes = 0 |
# of PEPruning | |
int | CountCheckPosition = 0 |
# of CheckPosition calls | |
int | CountHUTMFI = 0 |
# of HUTMFI attempts | |
int | CountHUTMFISuccess = 0 |
# of HUTMFI successes | |
int | CountRebuilds |
# of Rebuilds | |
int | maxtail = 0 |
int | MFISize = 0 |
MFI size before pruning. | |
int | MFIDepth = 0 |
The aggregated depth of the all MFI elements. | |
int | F1size = 0 |
# of frequent 1-itemsets after merging repeats | |
int | FullF1size = 0 |
# of frequent 1-itemsets | |
int | k = 50 |
# of items checked for a MFI lookup | |
int | MAX_compID = 1 |
max compression ID | |
int | projectDepth = -1 |
depth of the bitmap you're projecting from | |
int | EstimateSize |
size of subtree estimation buffer | |
int | EstimateDiv = 5 |
bucket size by frequent tail length | |
int | maxItemsetSize = 0 |
max size of a frequent itemset | |
NodeList | F1 |
List of frequent 1-itemsets. | |
BitmapList | TransBuffy |
Buffer of transaction bitmaps. | |
BaseBitmapList | NameBuffy |
Buffer of name bitmaps. | |
NodeList | NodeBuffy |
Buffer of tree nodess. | |
TreeNode * | Root |
The root (the nullset). | |
TailElement * | gTail |
global tail pointer | |
TailElement * | TailBuffy |
Common Buffer for tail elements. | |
Bitmap * | NullTrans |
A transaction bitmap filled with ones. | |
int * | ItemMap |
For renaming items after sorting by support. | |
int * | ReverseItemMap |
For remembering the renaming... | |
BaseBitmapList | MFI |
List of Maximally Frequent Itemsets. | |
HashTable | HT |
Hash table of transaction supports. | |
vector< int > | SupportCountList |
List that stores support count. | |
BaseBitmap * | TempName |
Temporary buffer for one name bitmap. | |
SubtreeEstimate * | EstimateBuffy |
Buffer of subtree estimates. | |
int * | MFIBySizes |
Buffer for counting MFI by itemset size. | |
int * | ItemsetBuffy |
Buffer for writing itemsets to file output. | |
time_t | total_start |
time_t | total_finish |
double | total_time |
time_t | read_start |
time_t | read_finish |
double | read_time |
time_t | algorithm_start |
time_t | algorithm_finish |
double | algorithm_time |
time_t | print_start |
time_t | print_finish |
double | print_time |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Insert pointer to node in order of increasing support.
|
|
Add this node's name bitmap to the FCI list.
|
|
Output itemset (don't need to save bitmaps for FI).
|
|
Add this node's name bitmap to the MFI list.
|
|
Determine whether a HUTMFI is true.
|
|
Read transaction data from file and build item bitmaps. If the data is in ascii form: [itemid_1] [itemid_2] ... [itemid_n] If the data is in binary form: [custid] [transid] [number of items] [itemid_1] [itemid_2] ... [itemid_n]
|
|
Create bitmaps filled randomly with probability p.
|
|
Check for an existing superset of name in the MFI.
|
|
The main MAFIA algorithm function.
|
|
|
|
Merge repeated itemsets into one combined itemset - e.g. if (transaction set of item 4) AND (transaction set item 5) = (transaction set item 5), then item 5 is a duplicate of item 4 due to increasing support |
|
Simply copy over the tail without expanding any of the children for pure DFS (no expansion of all children).
|
|
To print the MFI data out to an ASCII file with each entry having the format: [list of items in MFI entry...] [(support)].
|
|
Dynamically reorder the elements in the tail by increasing support - Expand all children and sort by increasing support - Remove infrequent children.
|
|
|
|
|
|
|
|
|
|
# of ANDs of normal bitmaps
|
|
# of CheckPosition calls
|
|
# of Counts or all nodes in the tree
|
|
# of times FHUT was successful
|
|
# of HUTMFI attempts
|
|
# of HUTMFI successes
|
|
# of frequent nodes in the tree
|
|
# of PEPruning
|
|
# of Rebuilds
|
|
# of compressed bitmap ANDs
|
|
Buffer of subtree estimates.
|
|
bucket size by frequent tail length
|
|
size of subtree estimation buffer
|
|
List of frequent 1-itemsets.
|
|
# of frequent 1-itemsets after merging repeats
|
|
# of frequent 1-itemsets
|
|
FHUT flag.
|
|
global tail pointer
|
|
Scaling factor for items.
|
|
Hash table of transaction supports.
|
|
HUTMFI flag.
|
|
# of items in the file
|
|
For renaming items after sorting by support.
|
|
Buffer for writing itemsets to file output.
|
|
# of items checked for a MFI lookup
|
|
max compression ID
|
|
max size of a frequent itemset
|
|
|
|
either -mfi or -fci or -fi
|
|
true if the method is -fci
|
|
true if the method is -fi
|
|
List of Maximally Frequent Itemsets.
|
|
Buffer for counting MFI by itemset size.
|
|
The aggregated depth of the all MFI elements.
|
|
MFI size before pruning.
|
|
min sup as a transaction count
|
|
user-defined min sup as percentage
|
|
Buffer of name bitmaps.
|
|
Buffer of tree nodess.
|
|
A transaction bitmap filled with ones.
|
|
file for ouput
|
|
filename for output
|
|
true if MFI should be saved to a file
|
|
PEPrune flag -- parent equivalent pruning.
|
|
|
|
|
|
|
|
depth of the bitmap you're projecting from
|
|
|
|
|
|
|
|
Reorder flag.
|
|
For remembering the renaming...
|
|
The root (the nullset).
|
|
List that stores support count.
|
|
Common Buffer for tail elements.
|
|
Temporary buffer for one name bitmap.
|
|
|
|
|
|
|
|
Buffer of transaction bitmaps.
|
|
# of transactions in the file
|
|
Scaling factor for transactions.
|