#pip install sklearn
#https://en.wikipedia.org/wiki/Decision_tree
#https://en.wikipedia.org/wiki/Decision_tree_learning
#https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
#. max_depth, min_samples_leaf,
#Classification and Regression Trees
#n observations = rows
#m = features or columns
#	FEVER	COUGHING	HEADACHE	FLU
#0	1	0	0	1
#1	0	1	1	1
#2	0	1	1	0
#3	0	1	0	0
#4	1	1	0	1
#As with other classifiers, DecisionTreeClassifier takes as input two arrays: 
# an array X, sparse or dense, of shape (n_samples, n_features) holding the training samples, 
# and an array Y of integer values, shape (n_samples,), holding the class labels for the training samples:
from sklearn import tree
from joblib import dump, load
import pickle

from io import StringIO


#FEVER	COUGHING	HEADACHE	FLU
X = [
        [1, 0,0,1], 
        [0,1,1,1],
        [0,1,1,0],
        [0,1,0,0],
        [0,1,0,1]
    ]
#COCO virus  results yes or no  , 1 = yes
Y = [1,0,0,0,1]
#https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
#need to specify maximums or will go m markov model https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, Y)
#Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
# hencetwo square brackets [[ ]
print(clf.predict([[1,1,1,0]] ) )
print(clf.predict([[0,0,0,1]]) )

from sklearn.tree import export_text
r = export_text(clf, feature_names=['fever','cough','head','flue'])
print(r)
 
 #need head =0, and flu = 1 --- very overfit model
#[1]
#[1]
#|--- head <= 0.50
#|   |--- flue <= 0.50
#|   |   |--- class: 0
#|   |--- flue >  0.50
#|   |   |--- class: 1
#|--- head >  0.50
#|   |--- class: 0


import graphviz 
dotfile = tree.export_graphviz(clf, out_file=None,feature_names=['fever','cough','head','flue'],  
                     class_names=None,  
                     filled=True, rounded=True,  
                     special_characters=True) 
graph = graphviz.Source(dotfile) 
graph.render("iris") 
#s = pickle.dumps(clf)
#clf2 = pickle.loads(s)
#clf2.predict(X[0:1])

#y[0]
#x columns can be a series of