Files

63 lines
2.0 KiB
Python
Executable File

import pandas as pd
import numpy as np
from ISLP import load_data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
#from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
# === Setup ===
# Load and preprocess Hitters data
Hitters = load_data('Hitters').dropna()
# Convert target to binary classification (Salary >= 500 as good income)
print(Hitters[["Salary"]].describe())
y = np.where(Hitters['Salary'] >= 500, 1, 0)
# Convert categorical variables into numerical variables (if needed)
Hitters = pd.get_dummies(Hitters.drop(columns=['Salary']), drop_first=True)
# Extract feature matrix after one-hot encoding
X = Hitters
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
## Build the Neural Network
model = Sequential([
Dense(units=64, input_shape=(X_train.shape[1],), activation='relu'), # Input and hidden layer
Dense(units=32, activation='relu'), # Hidden layer
Dense(units=1, activation='sigmoid') # Output layer
])
## Compile the Model (Adam optimizer and binary_crossentropy loss)
model.compile(optimizer=Adam(learning_rate=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])
## Train the Model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1, verbose=1)
## Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")
## Visualize Training Progress
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()