Main Page   Modules   Namespace List   Class Hierarchy   Data Structures   File List   Data Fields   Globals  

Mafia.good.cpp File Reference

#include <iostream>
#include <fstream>
#include <time.h>
#include <stdio.h>
#include <list>
#include <vector>
#include <algorithm>
#include <string>
#include <assert.h>
#include <math.h>
#include <map>
#include "Bitmap.h"
#include "TreeNode.h"
#include "Transaction.h"
#include "ItemsetOutput.h"

Include dependency graph for Mafia.good.cpp:

Include dependency graph

Data Structures

class  SubtreeEstimate
 Simple class for storing subtree size estimates. More...

class  TailElement
 Simple class for storing tail elements of each node of the tree. More...


Defines

#define DEBUG

Typedefs

typedef vector< TreeNode * > NodeList
typedef vector< TreeNode * > BranchList
typedef vector< Bitmap * > BitmapList
typedef vector< BaseBitmap * > BaseBitmapList
typedef vector< int > ItemSet
typedef map< long, ItemSet * > HashTable

Functions

void AddToF1 (TreeNode *newNode)
 Insert pointer to node in order of increasing support.

void F1UsingProb (double P)
 Create bitmaps filled randomly with probability p.

void F1FromFile (char *filename, bool isAsciiFile)
 Read transaction data from file and build item bitmaps.

void PrintMFI ()
 To print the MFI data out to an ASCII file with each entry having the format: [list of items in MFI entry...] [(support)].

void cerrMFI ()
void PrintItemset (BaseBitmap *printSet)
bool LMFISuperSet (TreeNode *location)
 Check for an existing superset of name in the MFI.

void AddToFI (TreeNode *C)
 Output itemset (don't need to save bitmaps for FI).

void AddToMFI (TreeNode *C)
 Add this node's name bitmap to the MFI list.

void AddToFCI (TreeNode *C)
 Add this node's name bitmap to the FCI list.

int SortLMFI (int rBegin, int rEnd, int sortBy)
bool CheckHUTMFI (TreeNode *C, int iTAIL)
 Determine whether a HUTMFI is true.

void ReorderTail (TreeNode *C, int &iTAIL, bool useComp, bool &NoChild, bool &AllFreq)
 Dynamically reorder the elements in the tail by increasing support - Expand all children and sort by increasing support - Remove infrequent children.

void NoorderTail (TreeNode *C, int &iTAIL)
 Simply copy over the tail without expanding any of the children for pure DFS (no expansion of all children).

void MAFIA (TreeNode *C, bool HUT, bool &FHUT, bool useComp)
 The main MAFIA algorithm function.

void MergeRepeatedItemsets ()
 Merge repeated itemsets into one combined itemset - e.g.

int main (int argc, char **argv)

Variables

string method
 either -mfi or -fci or -fi

char * outFilename
 filename for output

ItemsetOutputoutFile
 file for ouput

bool outputMFI = false
 true if MFI should be saved to a file

bool MethodIsFI = false
 true if the method is -fi

bool MethodIsFCI = false
 true if the method is -fci

int ItemCount
 # of items in the file

int TransCount
 # of transactions in the file

double MSF
 user-defined min sup as percentage

int MS
 min sup as a transaction count

int VerScale = 1
 Scaling factor for transactions.

int HorScale = 1
 Scaling factor for items.

bool GoFHUT = true
 FHUT flag.

bool HUTMFI = true
 HUTMFI flag.

bool PEPrune = true
 PEPrune flag -- parent equivalent pruning.

bool Reorder = true
 Reorder flag.

int CountFHUT = 0
 # of times FHUT was successful

int CountNodes = 0
 # of frequent nodes in the tree

int CountCounts = 0
 # of Counts or all nodes in the tree

int CountAnds = 0
 # of ANDs of normal bitmaps

int CountSmallAnds = 0
 # of compressed bitmap ANDs

int CountPEPrunes = 0
 # of PEPruning

int CountCheckPosition = 0
 # of CheckPosition calls

int CountHUTMFI = 0
 # of HUTMFI attempts

int CountHUTMFISuccess = 0
 # of HUTMFI successes

int CountRebuilds
 # of Rebuilds

int maxtail = 0
int MFISize = 0
 MFI size before pruning.

int MFIDepth = 0
 The aggregated depth of the all MFI elements.

int F1size = 0
 # of frequent 1-itemsets after merging repeats

int FullF1size = 0
 # of frequent 1-itemsets

int k = 50
 # of items checked for a MFI lookup

int MAX_compID = 1
 max compression ID

int projectDepth = -1
 depth of the bitmap you're projecting from

int EstimateSize
 size of subtree estimation buffer

int EstimateDiv = 5
 bucket size by frequent tail length

int maxItemsetSize = 0
 max size of a frequent itemset

NodeList F1
 List of frequent 1-itemsets.

BitmapList TransBuffy
 Buffer of transaction bitmaps.

BaseBitmapList NameBuffy
 Buffer of name bitmaps.

NodeList NodeBuffy
 Buffer of tree nodess.

TreeNodeRoot
 The root (the nullset).

TailElementgTail
 global tail pointer

TailElementTailBuffy
 Common Buffer for tail elements.

BitmapNullTrans
 A transaction bitmap filled with ones.

int * ItemMap
 For renaming items after sorting by support.

int * ReverseItemMap
 For remembering the renaming...

BaseBitmapList MFI
 List of Maximally Frequent Itemsets.

HashTable HT
 Hash table of transaction supports.

vector< int > SupportCountList
 List that stores support count.

BaseBitmapTempName
 Temporary buffer for one name bitmap.

SubtreeEstimateEstimateBuffy
 Buffer of subtree estimates.

int * MFIBySizes
 Buffer for counting MFI by itemset size.

int * ItemsetBuffy
 Buffer for writing itemsets to file output.

time_t total_start
time_t total_finish
double total_time
time_t read_start
time_t read_finish
double read_time
time_t algorithm_start
time_t algorithm_finish
double algorithm_time
time_t print_start
time_t print_finish
double print_time

Define Documentation

#define DEBUG
 


Typedef Documentation

typedef vector<BaseBitmap *> BaseBitmapList
 

typedef vector<Bitmap *> BitmapList
 

typedef vector<TreeNode *> BranchList
 

typedef map<long, ItemSet *> HashTable
 

typedef vector<int> ItemSet
 

typedef vector<TreeNode *> NodeList
 


Function Documentation

void AddToF1 TreeNode   newNode
 

Insert pointer to node in order of increasing support.

Parameters:
newNode  item node to add to F1

void AddToFCI TreeNode   C
 

Add this node's name bitmap to the FCI list.

Parameters:
C  the current node

void AddToFI TreeNode   C
 

Output itemset (don't need to save bitmaps for FI).

Parameters:
C  the current node

void AddToMFI TreeNode   C
 

Add this node's name bitmap to the MFI list.

Parameters:
C  the current node

bool CheckHUTMFI TreeNode   C,
int    iTAIL
 

Determine whether a HUTMFI is true.

  • if HUT is in MFI, then HUT is frequent and the subtree rooted at this node can be pruned
Returns:
True if HUT is in the MFI

void F1FromFile char *    filename,
bool    isAsciiFile
 

Read transaction data from file and build item bitmaps.

If the data is in ascii form: [itemid_1] [itemid_2] ... [itemid_n] If the data is in binary form: [custid] [transid] [number of items] [itemid_1] [itemid_2] ... [itemid_n]

Parameters:
filename  name of file to be read in
isAsciiFile  true for ASCII input, false for binary

void F1UsingProb double    P
 

Create bitmaps filled randomly with probability p.

Parameters:
P  - probability of a bit p being set

bool LMFISuperSet TreeNode   location
 

Check for an existing superset of name in the MFI.

Parameters:
location  The node we're at. Use this to examine only the relevant portion of the MFI
Returns:
True - if superset found. False - if no superset.

void MAFIA TreeNode   C,
bool    HUT,
bool &    FHUT,
bool    useComp
 

The main MAFIA algorithm function.

Parameters:
C  the current node
HUT  whether this is a HUT check (left most branch)
FHUT  [output] whether the HUT is frequent
useComp  if compression has been switched on

int main int    argc,
char **    argv
 

void MergeRepeatedItemsets  
 

Merge repeated itemsets into one combined itemset - e.g.

if (transaction set of item 4) AND (transaction set item 5) = (transaction set item 5), then item 5 is a duplicate of item 4 due to increasing support

void NoorderTail TreeNode   C,
int &    iTAIL
 

Simply copy over the tail without expanding any of the children for pure DFS (no expansion of all children).

Parameters:
C  current node
iTAIL  index into tail of current node

void PrintMFI  
 

To print the MFI data out to an ASCII file with each entry having the format: [list of items in MFI entry...] [(support)].

void ReorderTail TreeNode   C,
int &    iTAIL,
bool    useComp,
bool &    NoChild,
bool &    AllFreq
 

Dynamically reorder the elements in the tail by increasing support - Expand all children and sort by increasing support - Remove infrequent children.

Parameters:
C  current node
iTAIL  index into tail of current node
useComp  whether compressed bitmaps should be used
NoChild  whether C has any frequent children
AllFreq  whether all children are frequent

int SortLMFI int    rBegin,
int    rEnd,
int    sortBy
 


Variable Documentation

time_t algorithm_finish
 

time_t algorithm_start
 

double algorithm_time
 

int CountAnds = 0
 

# of ANDs of normal bitmaps

int CountCheckPosition = 0
 

# of CheckPosition calls

int CountCounts = 0
 

# of Counts or all nodes in the tree

int CountFHUT = 0
 

# of times FHUT was successful

int CountHUTMFI = 0
 

# of HUTMFI attempts

int CountHUTMFISuccess = 0
 

# of HUTMFI successes

int CountNodes = 0
 

# of frequent nodes in the tree

int CountPEPrunes = 0
 

# of PEPruning

int CountRebuilds
 

# of Rebuilds

int CountSmallAnds = 0
 

# of compressed bitmap ANDs

SubtreeEstimate* EstimateBuffy
 

Buffer of subtree estimates.

int EstimateDiv = 5
 

bucket size by frequent tail length

int EstimateSize
 

size of subtree estimation buffer

NodeList F1
 

List of frequent 1-itemsets.

int F1size = 0
 

# of frequent 1-itemsets after merging repeats

int FullF1size = 0
 

# of frequent 1-itemsets

bool GoFHUT = true
 

FHUT flag.

TailElement* gTail
 

global tail pointer

int HorScale = 1
 

Scaling factor for items.

HashTable HT
 

Hash table of transaction supports.

bool HUTMFI = true
 

HUTMFI flag.

int ItemCount
 

# of items in the file

int* ItemMap
 

For renaming items after sorting by support.

int* ItemsetBuffy
 

Buffer for writing itemsets to file output.

int k = 50
 

# of items checked for a MFI lookup

int MAX_compID = 1
 

max compression ID

int maxItemsetSize = 0
 

max size of a frequent itemset

int maxtail = 0
 

string method
 

either -mfi or -fci or -fi

bool MethodIsFCI = false
 

true if the method is -fci

bool MethodIsFI = false
 

true if the method is -fi

BaseBitmapList MFI
 

List of Maximally Frequent Itemsets.

int* MFIBySizes
 

Buffer for counting MFI by itemset size.

int MFIDepth = 0
 

The aggregated depth of the all MFI elements.

int MFISize = 0
 

MFI size before pruning.

int MS
 

min sup as a transaction count

double MSF
 

user-defined min sup as percentage

BaseBitmapList NameBuffy
 

Buffer of name bitmaps.

NodeList NodeBuffy
 

Buffer of tree nodess.

Bitmap* NullTrans
 

A transaction bitmap filled with ones.

ItemsetOutput* outFile
 

file for ouput

char* outFilename
 

filename for output

bool outputMFI = false
 

true if MFI should be saved to a file

bool PEPrune = true
 

PEPrune flag -- parent equivalent pruning.

time_t print_finish
 

time_t print_start
 

double print_time
 

int projectDepth = -1
 

depth of the bitmap you're projecting from

time_t read_finish
 

time_t read_start
 

double read_time
 

bool Reorder = true
 

Reorder flag.

int* ReverseItemMap
 

For remembering the renaming...

TreeNode* Root
 

The root (the nullset).

vector<int> SupportCountList
 

List that stores support count.

TailElement* TailBuffy
 

Common Buffer for tail elements.

BaseBitmap* TempName
 

Temporary buffer for one name bitmap.

time_t total_finish
 

time_t total_start
 

double total_time
 

BitmapList TransBuffy
 

Buffer of transaction bitmaps.

int TransCount
 

# of transactions in the file

int VerScale = 1
 

Scaling factor for transactions.


Generated on Thu Dec 4 13:52:18 2003 for MAFIA by doxygen1.2.18