#include "header.h" NODE* ID3 ( MATRIX *matrix, NODE* parent, UINT target, UINT state) /* Routine to build a decision tree, based on Quinlan's ID3 algorithm. */ { NEGENTROPY negentropy_struct; NODE *node; UINT n_vars = matrix->width, n_samples = matrix->height, i, j, split; REAL **data = matrix->data; REAL best_threshold, min_negentropy, _negentropy; /* Allocate memory for this node */ node = (NODE*) malloc (sizeof(NODE)); if (!node) err_exit (__FILE__, __LINE__); /* Set up links in decision tree */ node->parent = parent; /* Set address of parent node */ if (parent != NULL) /* parent to child; not relevant for root node */ { /* Pass address of this node to the parent node */ if (state == ON) parent->on = node; else if (state == OFF) parent->off = node; } /* Select attribute with lowest negentropy for splitting. Scan through * ALL attributes (except target) and ALL data samples. This is inefficient * for data sets with repeated values, but will do for illustrative purposes */ min_negentropy = 1.0; for (i=0; iidx = i; node->threshold = data[j][i]; /* ...and calculate the negentropy of this partition */ negentropy_struct = negentropy (data, n_samples, node, target); _negentropy = negentropy_struct.ne; /* If this negentropy is lower than any other, retain the index and threshold for future use */ if (_negentropy < min_negentropy) { min_negentropy = _negentropy; split = i; best_threshold = data[j][i]; } } /*if (i != target)*/ } /*for (j=0; jidx = split; node->threshold = best_threshold; /* If the negentropy routine found itself at an end-of-branch * for the decision tree, the 'status' flag in 'negentropy_struct' * is set to ON or OFF and the node labelled accordingly. Otherwise, * ID3 continues to call itself until all end-of-branch nodes are found. */ if (negentropy_struct.status != INACTIVE) { node->on = node->off = NULL; node->idx = negentropy_struct.status; } else { node->on = ID3 (matrix, node, target, ON); node->off = ID3 (matrix, node, target, OFF); } return node; }