""" Utility code for drawing decision trees Author: Nathan Sprague Version: 1/26/2020 """ from matplotlib import pyplot as plt import numpy as np def draw_tree(X, y, tree): """Plot training data and...

Python programming assignment about creating a decision tree. details are in the screenshot and the decision_tree.py


""" Utility code for drawing decision trees Author: Nathan Sprague Version: 1/26/2020 """ from matplotlib import pyplot as plt import numpy as np def draw_tree(X, y, tree): """Plot training data and node boundaries for a 2d decision tree with at most three classes. In order for this code to work, the provided tree must have: - A _root attribute containing a root node - a get_depth() method Nodes must have: - left and right attributes - a split attribute containing a Split object """ shapes = ['r*', 'bo', 'gs'] for i, cl in enumerate(sorted(set(y))): indices = y == cl plt.plot(X[indices,0], X[indices,1], shapes[i], markersize=10) range_x_0 = np.max(X[:, 0]) - np.min(X[:, 0]) range_x_1 = np.max(X[:, 1]) - np.min(X[:, 1]) bounds = [np.min(X[:, 0]) - .05 * range_x_0, np.max(X[:, 0]) + .05 * range_x_0, np.min(X[:, 1]) - .05 * range_x_1, np.max(X[:, 1]) + .05 * range_x_1,] _draw_tree(tree._root, bounds, 0, tree.get_depth()) plt.xlabel("$x_0$") plt.ylabel("$x_1$") plt.axis(bounds) plt.show() def _draw_tree(node, bounds, cur_depth, max_depth): """ Recursive helper method. """ linewidth = ((max_depth - cur_depth) / max_depth) * 3 + .1 if node.left is not None: if node.split.dim == 0: plt.plot([node.split.pos, node.split.pos], [bounds[2], bounds[3]], '--',linewidth=linewidth, color='black') left_bounds = [bounds[0], node.split.pos, bounds[2], bounds[3]] _draw_tree(node.left, left_bounds, cur_depth + 1, max_depth) right_bounds = [node.split.pos, bounds[1], bounds[2], bounds[3]] _draw_tree(node.right, right_bounds, cur_depth + 1, max_depth) if node.split.dim == 1: plt.plot([bounds[0], bounds[1]], [node.split.pos, node.split.pos],'--', linewidth=linewidth, color='black') left_bounds = [bounds[0], bounds[1], bounds[2], node.split.pos] _draw_tree(node.left, left_bounds, cur_depth + 1, max_depth) right_bounds = [bounds[0], bounds[1], node.split.pos, bounds[3]] _draw_tree(node.right, right_bounds, cur_depth + 1, max_depth) """Pure Python Decision Tree Classifier and Regressor. Simple binary decision tree classifier and regressor. Splits for classification are based on Gini impurity. Splits for regression are based on variance. Author: CS445 Instructor and ??? Version: """ from collections import namedtuple, Counter import numpy as np from abc import ABC # Named tuple is a quick way to create a simple wrapper class... Split_ = namedtuple('Split', ['dim', 'pos', 'X_left', 'y_left', 'counts_left', 'X_right', 'y_right', 'counts_right']) class Split(Split_): """ Represents a possible split point during the decision tree creation process. Attributes: dim (int): the dimension along which to split pos (float): the position of the split X_left (ndarray): all X entries that are <= to="" the="" split="" position="" y_left="" (ndarray):="" labels="" corresponding="" to="" x_left="" counts_left="" (counter):="" label="" counts="" x_right="" (ndarray):="" all="" x="" entries="" that="" are=""> the split position y_right (ndarray): labels corresponding to X_right counts_right (Counter): label counts """ def __repr__(self): result = "Split(dim={}, pos={},\nX_left=\n".format(self.dim, self.pos) result += repr(self.X_left) + ",\ny_left=" result += repr(self.y_left) + ",\ncounts_left=" result += repr(self.counts_left) + ",\nX_right=\n" result += repr(self.X_right) + ",\ny_right=" result += repr(self.y_right) + ",\ncounts_right=" result += repr(self.counts_right) + ")" return result def split_generator(X, y, keep_counts=True): """ Utility method for generating all possible splits of a data set for the decision tree construction algorithm. :param X: Numpy array with shape (num_samples, num_features) :param y: Numpy array with length num_samples :param keep_counts: Maintain counters (only useful for classification.) :return: A generator for Split objects that will yield all possible splits of the data """ # Loop over all of the dimensions. for dim in range(X.shape[1]): if np.issubdtype(y.dtype, np.integer): counts_left = Counter() counts_right = Counter(y) else: counts_left = None counts_right = None # Get the indices in sorted order so we can sort both data and labels ind = np.argsort(X[:, dim]) # Copy the data and the labels in sorted order X_sort = X[ind, :] y_sort = y[ind] last_split = 0 # Loop through the midpoints between each point in the # current dimension for index in range(1, X_sort.shape[0]): # don't try to split between equal points. if X_sort[index - 1, dim] != X_sort[index, dim]: pos = (X_sort[index - 1, dim] + X_sort[index, dim]) / 2.0 if np.issubdtype(y.dtype, np.integer): flipped_counts = Counter(y_sort[last_split:index]) counts_left = counts_left + flipped_counts counts_right = counts_right - flipped_counts last_split = index # Yield a possible split. Note that the slicing here does # not make a copy, so this should be relatively fast. yield Split(dim, pos, X_sort[0:index, :], y_sort[0:index], counts_left,
Sep 14, 2021
SOLUTION.PDF

Get Answer To This Question

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here