Files
econometrics-and-machine-le…/Machine Learning for Economics and Finance/python-exercises/randomly_split_train_test_NOTE.txt

22 lines
558 B
Plaintext
Executable File

import numpy as np
# set seed
np.random.seed(1)
# Number of observations in the dataset
n = len(default_data)
# Randomly shuffle the indices of the dataset
indices = np.random.permutation(n)
# Compute training and validation sample sizes
nT = int(0.7 * n) # Training sample size
# Split the dataset based on shuffled indices
n_train = indices[:nT] # First 70% for training
n_test = indices[nT:] # Remaining 30% for validation
# Create training and validation datasets
train_data = default_data.iloc[n_train]
test_data = default_data.iloc[n_test]