#pip install sklearn
#https://en.wikipedia.org/wiki/Decision_tree
#https://en.wikipedia.org/wiki/Decision_tree_learning
#https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
#. max_depth, min_samples_leaf,
#Classification and Regression Trees
#n observations = rows
#m = features or columns
#	FEVER	COUGHING	HEADACHE	FLU
#0	1	0	0	1
#1	0	1	1	1
#2	0	1	1	0
#3	0	1	0	0
#4	1	1	0	1
#As with other classifiers, DecisionTreeClassifier takes as input two arrays: 
# an array X, sparse or dense, of shape (n_samples, n_features) holding the training samples, 
# and an array Y of integer values, shape (n_samples,), holding the class labels for the training samples:
from sklearn import tree
from joblib import dump, load
import pickle

from io import StringIO


#FEVER	COUGHING	HEADACHE	FLU
X = [
        [1, 0,0,1], 
        [0,1,1,1],
        [0,1,1,0],
        [0,1,0,0],
        [0,1,0,1]
    ]
#COCO virus  results yes or no  , 1 = yes
Y = [1,0,0,0,1]
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X, Y)
#Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
# hencetwo square brackets [[ ]
print(clf.predict_proba([[0,1,1,0]] ) )
print(clf.predict_proba([[0,0,0,1]]) )
#As an alternative to outputting a specific class, 
# the probability of each class can be predicted, 
# which is the fraction of training samples of the class in a leaf: