econometrics-and-machine-le…/Machine Learning for Economics and Finance/python-exercises/randomly_split_train_test_NOTE.txt

import numpy as np

# set seed
np.random.seed(1)

# Number of observations in the dataset
n = len(default_data)

# Randomly shuffle the indices of the dataset
indices = np.random.permutation(n)

# Compute training and validation sample sizes
nT = int(0.7 * n)  # Training sample size

# Split the dataset based on shuffled indices
n_train = indices[:nT]   # First 70% for training
n_test = indices[nT:]    # Remaining 30% for validation

# Create training and validation datasets
train_data = default_data.iloc[n_train]
test_data = default_data.iloc[n_test]