Data Science: Regression on Javascript

Background

Data Preprocessing

encoder = LabelEncoder()
df_LE = combineddf.apply(encoder.fit_transform)
print('Replacing categories by numerical labels: ')
print(df_LE.head())
filtered_df = combineddf[combineddf["yearly_salary"].notna()];
cleanup_nums = {"yearly_salary": { "work_for_free": 0, "0_10": 1, "10_30": 2, "30_50": 3, "50_100": 4, "100_200": 5, "more_than_200": 6}}
filtered_df.replace(cleanup_nums, inplace=True)
print(filtered_df)

Logistic regression — linear model

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import seaborn as sns
from sklearn import metrics

data_req = filtered_df[:1000]

feature_cols_react = ['yearly_salary', 'years_of_experience', 'backend_proficiency', 'css_proficiency']
X = data_req[feature_cols_react] # Features
y = data_req['angular'] # Target variable

X_train ,X_test ,y_train ,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

logreg = LogisticRegression(solver = 'lbfgs', C=1e5);
logreg.fit(X_train,y_train);


y_pred=logreg.predict(X_test)
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
print("confusion matrix \n", cnf_matrix)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision :",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

Logistic regression — Polynomial model

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import seaborn as sns
from sklearn import metrics
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

data_req = filtered_df;

feature_cols_react = ['yearly_salary', 'css_proficiency']
X = data_req[feature_cols_react] # Features
y = data_req['angular'] # Target variable

X_train ,X_test ,y_train ,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

poly = PolynomialFeatures(degree = 2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X_train)

logreg = LogisticRegression(solver = 'liblinear', multi_class = 'ovr');
logreg.fit(X_poly,y_train);

X_poly_test = poly.transform(X_test);
y_pred = logreg.predict(X_poly_test)

y_pred=logreg.predict(X_poly_test)

cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
print("confusion matrix \n", cnf_matrix)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision :",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

CODER | BLOGGER | ARTIST | GHOST