import numpy as npimport pandas as pddata = pd.read_csv("dataset1.csv")sample = np.array(data)[1:,:-1] # Removes the header and last coloumn of datatarget = np.array(data)[1:,-1] # Removes the header and gives the last coloumndef train(sample, target): # Initializing specific_hypothesis for i in range(len(target)): if target[i] == "Yes": specific_hypothesis = list(sample[i]) break # Tuning specific_hypothesis for i in range(len(sample)): if target[i] == "Yes": for j in range(len(specific_hypothesis)): if sample[i][j] != specific_hypothesis[j] and specific_hypothesis[j] != '?': specific_hypothesis[j] = '?' return specific_hypothesisspecific_hypothesis = train(sample, target)print("Specific Hypothesis: ", specific_hypothesis)
Candidate Elimination
import numpy as npimport pandas as pddata = pd.read_csv("dataset1.csv")sample = np.array(data)[1:,:-1] # Removes the header and last coloumn of datatarget = np.array(data)[1:,-1] # Removes the header and gives the last coloumndef train(sample, target): specific_hypothesis = list(sample[0]) # assumes first sample is positive num_atb = len(specific_hypothesis) generic_hypothesis = [['?' for i in range(num_atb)] for j in range(num_atb)] for i in range(len(sample)): if target[i] == "Yes": if specific_hypothesis != list(sample[i]): for j in range(num_atb): if sample[i][j] != specific_hypothesis[j] and specific_hypothesis[j] != '?': specific_hypothesis[j] = '?' generic_hypothesis[j][j] = '?' else: for j in range(num_atb): if sample[i][j] != specific_hypothesis[j]: generic_hypothesis[j][j] = specific_hypothesis[j] else: generic_hypothesis[j][j] = '?' tmp = ['?' for _ in range(num_atb)] generic_hypothesis = [i for i in generic_hypothesis if i != tmp] return specific_hypothesis, generic_hypothesisspecific_hypothesis, generic_hypothesis = train(sample, target)print(f"Specific Hypothesis: {specific_hypothesis}\nGeneric Hypothesis: {generic_hypothesis}")
Linear Regression
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltdata = pd.read_csv("dataset.csv")x = list(np.array(data)[:,0])y = list(np.array(data)[:,-1])mean_x = sum(x) / len(x)mean_y = sum(y) / len(y)deviation_x = [i-mean_x for i in x]deviation_y = [i-mean_y for i in y]product_deviation = [i*j for i, j in zip(deviation_x, deviation_y)]square_deviation_x = [i**2 for i in deviation_x]slope = sum(product_deviation) / sum(square_deviation_x)intercept = mean_y - (slope * mean_x)X = [i for i in range(-1, 20)]Y = [i*slope + intercept for i in X]plt.scatter(x, y)plt.plot(X, Y)plt.show()
EM
from sklearn.cluster import KMeansfrom sklearn import preprocessingfrom sklearn.mixture import GaussianMixturefrom sklearn.datasets import load_irisimport sklearn.metrics as smimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltdataset=load_iris()X=pd.DataFrame(dataset.data)X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']y=pd.DataFrame(dataset.target)y.columns=['Targets']plt.figure(figsize=(14,7))colormap=np.array(['red','lime','black'])# REAL PLOTplt.subplot(1,3,1)plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)plt.title('Real')# K-PLOTplt.subplot(1,3,2)model=KMeans(n_clusters=3)model.fit(X)predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)plt.title('KMeans')# GMM PLOTscaler=preprocessing.StandardScaler()scaler.fit(X)xsa=scaler.transform(X)xs=pd.DataFrame(xsa,columns=X.columns)gmm=GaussianMixture(n_components=3)gmm.fit(xs)y_cluster_gmm=gmm.predict(xs)plt.subplot(1,3,3)plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)plt.title('GMM Classification')
K-Nearest Neighbor (KNN)
from sklearn.model_selection import train_test_splitfrom sklearn.neighbors import KNeighborsClassifier as knfrom sklearn import datasetsiris = datasets.load_iris()x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.1)for i in range(len(iris.target_names)): print("Label ", i, '- ', str(iris.target_names[i]))k = 3classifier = kn(n_neighbors = k)classifier.fit(x_train, y_train)y_pred = classifier.predict(x_test)print("Accuracy for k:",k,"is ", classifier.score(x_test, y_test))
Naive Bayesian
# import necessary libaritiesimport pandas as pdfrom sklearn import treefrom sklearn.preprocessing import LabelEncoderfrom sklearn.naive_bayes import GaussianNB# load data from CSVdata = pd.read_csv('tennisdata.csv')print("THe first 5 values of data is :\n",data.head())# obtain Train data and Train outputX = data.iloc[:,:-1]print("\nThe First 5 values of train data is\n",X.head())y = data.iloc[:,-1]print("\nThe first 5 values of Train output is\n",y.head())# Convert then in numbers le_outlook = LabelEncoder()X.Outlook = le_outlook.fit_transform(X.Outlook)le_Temperature = LabelEncoder()X.Temperature = le_Temperature.fit_transform(X.Temperature)le_Humidity = LabelEncoder()X.Humidity = le_Humidity.fit_transform(X.Humidity)le_Windy = LabelEncoder()X.Windy = le_Windy.fit_transform(X.Windy)print("\nNow the Train data is :\n",X.head())le_PlayTennis = LabelEncoder()y = le_PlayTennis.fit_transform(y)print("\nNow the Train output is\n",y)from sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)classifier = GaussianNB()classifier.fit(X_train,y_train)from sklearn.metrics import accuracy_scoreprint("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))