#pip install sklearn #https://en.wikipedia.org/wiki/Decision_tree #https://en.wikipedia.org/wiki/Decision_tree_learning #https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html #. max_depth, min_samples_leaf, #Classification and Regression Trees #n observations = rows #m = features or columns # FEVER COUGHING HEADACHE FLU #0 1 0 0 1 #1 0 1 1 1 #2 0 1 1 0 #3 0 1 0 0 #4 1 1 0 1 #As with other classifiers, DecisionTreeClassifier takes as input two arrays: # an array X, sparse or dense, of shape (n_samples, n_features) holding the training samples, # and an array Y of integer values, shape (n_samples,), holding the class labels for the training samples: from sklearn import tree from joblib import dump, load import pickle from io import StringIO #FEVER COUGHING HEADACHE FLU X = [ [1, 0,0,1], [0,1,1,1], [0,1,1,0], [0,1,0,0], [0,1,0,1] ] #COCO virus results yes or no , 1 = yes Y = [1,0,0,0,1] #https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html #need to specify maximums or will go m markov model https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html clf = tree.DecisionTreeClassifier() clf = clf.fit(X, Y) #Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample. # hencetwo square brackets [[ ] print(clf.predict([[1,1,1,0]] ) ) print(clf.predict([[0,0,0,1]]) ) from sklearn.tree import export_text r = export_text(clf, feature_names=['fever','cough','head','flue']) print(r) #need head =0, and flu = 1 --- very overfit model #[1] #[1] #|--- head <= 0.50 #| |--- flue <= 0.50 #| | |--- class: 0 #| |--- flue > 0.50 #| | |--- class: 1 #|--- head > 0.50 #| |--- class: 0 import graphviz dotfile = tree.export_graphviz(clf, out_file=None,feature_names=['fever','cough','head','flue'], class_names=None, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dotfile) graph.render("iris") #s = pickle.dumps(clf) #clf2 = pickle.loads(s) #clf2.predict(X[0:1]) #y[0] #x columns can be a series of