ML LAB

Find S

import numpy as np
import pandas as pd
 
data = pd.read_csv("dataset1.csv")
sample = np.array(data)[1:,:-1]  # Removes the header and last coloumn of data
target = np.array(data)[1:,-1]  # Removes the header and gives the last coloumn
 
def train(sample, target):
    # Initializing specific_hypothesis
    for i in range(len(target)):
        if target[i] == "Yes":
            specific_hypothesis = list(sample[i])
            break
 
    # Tuning specific_hypothesis
    for i in range(len(sample)):
        if target[i] == "Yes":
            for j in range(len(specific_hypothesis)):
                if sample[i][j] != specific_hypothesis[j] and specific_hypothesis[j] != '?':
                    specific_hypothesis[j] = '?'
 
    return specific_hypothesis
 
 
specific_hypothesis = train(sample, target)
print("Specific Hypothesis: ", specific_hypothesis)

Candidate Elimination

import numpy as np
import pandas as pd
 
data = pd.read_csv("dataset1.csv")
sample = np.array(data)[1:,:-1]  # Removes the header and last coloumn of data
target = np.array(data)[1:,-1]   # Removes the header and gives the last coloumn
 
def train(sample, target):
    specific_hypothesis = list(sample[0]) # assumes first sample is positive
    num_atb = len(specific_hypothesis)
    generic_hypothesis = [['?' for i in range(num_atb)] for j in range(num_atb)]
 
    for i in range(len(sample)):
        if target[i] == "Yes":
            if specific_hypothesis != list(sample[i]):
                for j in range(num_atb):
                    if sample[i][j] != specific_hypothesis[j] and specific_hypothesis[j] != '?':
                        specific_hypothesis[j] = '?'
                        generic_hypothesis[j][j] = '?'
        else:
            for j in range(num_atb):
                if sample[i][j] != specific_hypothesis[j]:
                    generic_hypothesis[j][j] = specific_hypothesis[j]
                else:
                    generic_hypothesis[j][j] = '?'
 
    tmp = ['?' for _ in range(num_atb)]
    generic_hypothesis = [i for i in generic_hypothesis if i != tmp]
 
    return specific_hypothesis, generic_hypothesis
 
specific_hypothesis, generic_hypothesis = train(sample, target)
print(f"Specific Hypothesis: {specific_hypothesis}\nGeneric Hypothesis: {generic_hypothesis}")

Linear Regression

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
data = pd.read_csv("dataset.csv")
x = list(np.array(data)[:,0])
y = list(np.array(data)[:,-1])
 
mean_x = sum(x) / len(x)
mean_y = sum(y) / len(y)
 
deviation_x = [i-mean_x for i in x]
deviation_y = [i-mean_y for i in y]
 
product_deviation = [i*j for i, j in zip(deviation_x, deviation_y)]
square_deviation_x = [i**2 for i in deviation_x]
 
slope = sum(product_deviation) / sum(square_deviation_x)
intercept = mean_y - (slope * mean_x)
 
 
X = [i for i in range(-1, 20)]
Y = [i*slope + intercept for i in X]
 
plt.scatter(x, y)
plt.plot(X, Y)
plt.show()

EM

from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.datasets import load_iris
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
 
dataset=load_iris()
 
X=pd.DataFrame(dataset.data)
X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=pd.DataFrame(dataset.target)
y.columns=['Targets']
 
plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])
 
# REAL PLOT
plt.subplot(1,3,1)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)
plt.title('Real')
 
# K-PLOT
plt.subplot(1,3,2)
model=KMeans(n_clusters=3)
model.fit(X)
predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)
plt.title('KMeans')
 
# GMM PLOT
scaler=preprocessing.StandardScaler()
scaler.fit(X)
xsa=scaler.transform(X)
xs=pd.DataFrame(xsa,columns=X.columns)
gmm=GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm=gmm.predict(xs)
plt.subplot(1,3,3)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)
plt.title('GMM Classification')

K-Nearest Neighbor (KNN)

from sklearn.model_selection  import train_test_split
from sklearn.neighbors import KNeighborsClassifier as kn
from sklearn import datasets
 
iris = datasets.load_iris()
 
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.1)
 
for i in range(len(iris.target_names)):
	print("Label ", i, '- ', str(iris.target_names[i]))
 
k = 3
classifier = kn(n_neighbors = k)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
print("Accuracy for k:",k,"is ", classifier.score(x_test, y_test))

Naive Bayesian

# import necessary libarities
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
 
# load data from CSV
data = pd.read_csv('tennisdata.csv')
print("THe first 5 values of data is :\n",data.head())
 
# obtain Train data and Train output
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
 
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())
 
# Convert then in numbers 
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
 
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
 
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
 
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)
 
print("\nNow the Train data is :\n",X.head())
 
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
 
classifier = GaussianNB()
classifier.fit(X_train,y_train)
 
from sklearn.metrics import accuracy_score
print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))

Naive Bayesian Using API

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score,recall_score
 
msg = pd.read_csv('data.csv', names=[ 'message', 'label'])
print("Total Instances of Dataset: ", msg.shape[0])
msg[ 'labelnum' ] = msg.label.map({'pos': 1, 'neg': 0})
 
X = msg.message
y = msg.labelnum
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
count_v = CountVectorizer()
Xtrain_dm = count_v.fit_transform(Xtrain)
Xtest_am = count_v.transform (Xtest)
 
df = pd.DataFrame(Xtrain_dm.toarray(),columns=count_v.get_feature_names())
print(df[0:5])
 
clf = MultinomialNB()
clf. fit(Xtrain_am, ytrain)
pred = clf.predict(Xtest_dm)
 
for doc, p in zip(Xtrain, pred):
	p= 'pos' if p == 1 else 'neg'
	print("%s -> %s" % (doc, p))
 
print( 'Accuracy Metrics: \n')
print( 'Accuracy: ', accuracy_score(ytest, pred))
print( 'Recall: ', recall_score(ytest, pred))
print( 'Precision: ', precision , precision_score (ytest, pred))
print( 'Confusion Matrix: In', confusion_matrix(ytest, pred))

K-Means

from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture 
from sklearn.datasets import load_iris 
import sklearn.metrics as sm
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
 
dataset=load_iris()
 
X=pd.DataFrame(dataset.data)
X.columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
y=pd.DataFrame(dataset.target)
y.columns=['Targets']
 
plt.figure(figsize=(14,7))
colormap=np.array(['red', 'lime', 'black'])
 
# REAL PLOT
plt.subplot (1,3,1)
plt.scatter X.Petal_Length, X.Petal_Width, C=colormap[y.Targets], 5=40)
plt.title('Real')
 
# K-PLOT
plt.subplot (1,3,2)
model=KMeans(n _clusters=3)
model.fit(X)
predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)
plt.title( 'KMeans')
 
# GMM PLOT
scaler=preprocessing.StandardScaler()
scaler.fit(X)
xsa=scaler.transform(X)
xS=pd.DataFrame(xsa,columns=X.columns)
gmm=GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm=gmm.predict(xs)
plt.subplot(1,3,3)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)
plt.title('GMM Classification')

Locally Weighted Regression

import math
from math import ceil 
import numpy as np 
from scipy import linalg
import matplotlib.pyplot as plt 
 
def lowess(x, y, f, iterations):
	n= len(x)
	r= int(ceil(f * n))
	h= [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
	w = np. clip(np.abs((x[:, None] - x[None, : 1]) / h), 0.0, 1.0)
	w=（1 - w ** 3）**3
	yest = np.zeros(n)
	delta = np.ones(n)
	
	for iteration in range(iterations):
		for i in range(n):
			weights = delta * w[:, i]
			b = np.array([np.sum(weights * y), пр.sum(weights * y * x)])
			A = np.array([[np.sum(weights), пр.sum(weights * x)],[np.sum(weigh
			beta = linalg.solve(A, b)
			yest[i] = beta[0] + beta[1] * x[i]
		
		residuals = y - yest
		s = np.median(np.abs(residuals))
		delta = np.clip(residuals / (6.0 * 5), -1, 1)
		delta = (1 - delta ** 2) ** 2
	
	return yest
 
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)
 
plt.plot(x,y, "r.")
plt.plot(x,yest, "b-")

ID3 (Decision Tree)

import numpy as np
from scipy.stats import entropy
import pandas as pd
from sklearn import metrics, tree
from sklearn.preprocessing import LabelEncoder
 
le = LabelEncoder()
dataset = pd.read_csv('dataset3.csv')
dataset = dataset.apply(le.fit_transform)
 
x = dataset.drop(['Play'], axis=1)
y = dataset['Play']
 
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(x,y)
tree.plot_tree(clf)

Karthik reddy

Explorer

ML LAB

Find S

Candidate Elimination

Linear Regression

EM

K-Nearest Neighbor (KNN)

Naive Bayesian

Naive Bayesian Using API

K-Means

Locally Weighted Regression

ID3 (Decision Tree)

Table of Contents