import pandas as pd import numpy as np from ISLP import load_data from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense #from tensorflow.keras.optimizers import SGD from tensorflow.keras.optimizers import Adam # === Setup === # Load and preprocess Hitters data Hitters = load_data('Hitters').dropna() # Convert target to binary classification (Salary >= 500 as good income) print(Hitters[["Salary"]].describe()) y = np.where(Hitters['Salary'] >= 500, 1, 0) # Convert categorical variables into numerical variables (if needed) Hitters = pd.get_dummies(Hitters.drop(columns=['Salary']), drop_first=True) # Extract feature matrix after one-hot encoding X = Hitters # Standardize the features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Split into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) ## Build the Neural Network model = Sequential([ Dense(units=64, input_shape=(X_train.shape[1],), activation='relu'), # Input and hidden layer Dense(units=32, activation='relu'), # Hidden layer Dense(units=1, activation='sigmoid') # Output layer ]) ## Compile the Model (Adam optimizer and binary_crossentropy loss) model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy']) ## Train the Model history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1, verbose=1) ## Evaluate the Model loss, accuracy = model.evaluate(X_test, y_test) print(f"Test Accuracy: {accuracy:.2f}") ## Visualize Training Progress import matplotlib.pyplot as plt plt.plot(history.history['accuracy'], label='Train Accuracy') plt.plot(history.history['val_accuracy'], label='Validation Accuracy') plt.title('Model Accuracy') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend() plt.show()