#putting the data together:
##take the numerical data from the original data
X_num = data[['age', 'bmi', 'children']].copy()
##take the encoded data and add to numerical data
X_final = pd.concat([X_num, region, sex, smoker], axis = 1)
#define y as being the "charges column" from the original dataset
y_final = data[['charges']].copy()
#Test train split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size = 0.33, random_state = 0 )