Commit 40af08ef authored by bozza's avatar bozza
Browse files

First commit

parent d401402e
# coding: utf-8
from __future__ import print_function
import numpy as np
import os
from network_models import train_neural_network
from network_models import CHECKPOINT_FOLDER_PATH, DirectionNet, DirectionNetShared
from data_loaders import direction_net_data_generator, metadata_generator, get_n_iterations
from data_files import get_train_validation_test_files, get_multi_data_files
from pickle import dump
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
from tqdm import tqdm
# Paths to Weights Filess
TZ_FILE = './model/model_regression_directions/renamed/tz_net_regression_64_100_regression_cosz_renamed.hdf5'
TX_FILE = './model/model_regression_directions/renamed/tx_net_regression_dirx_v1_64_100_regression_cosx_renamed.hdf5'
TY_FILE = './model/model_regression_directions/renamed/ty_net_regression_diry_v1_64_100_regression_cosy_renamed.hdf5'
loss_weights = (1, 1, 1, 1, 1)
loss_weights_label = '_'.join(map(str, loss_weights))
#model = DirectionNet(loss_weights=loss_weights)
model = DirectionNetShared(loss_weights=loss_weights)
model.summary()
# Initialise Weights
#print('Pre-loading Weights for Three Branches')
#model.load_weights(TX_FILE, by_name=True)
#model.load_weights(TY_FILE, by_name=True)
#model.load_weights(TZ_FILE, by_name=True)
N_FILES = 100
BATCH_SIZE = 64
MAIN_DATA_FOLDER_NAME = 'km3net'
DATA_FOLDER_NAME = 'multi_target_directions'
TASK_NAME = 'regression_directions_shared_weights'
TASK_FOLDER_PATH = os.path.join(CHECKPOINT_FOLDER_PATH, TASK_NAME)
if not os.path.exists(TASK_FOLDER_PATH):
os.makedirs(TASK_FOLDER_PATH)
TRAINING_WEIGHTS_FILEPATH = os.path.join(TASK_FOLDER_PATH,
'{}_weights_training_{}_lw_{}.hdf5'.format(model.name, TASK_NAME,
loss_weights_label))
HISTORY_FILEPATH = os.path.join(TASK_FOLDER_PATH,
'{}_history_{}_lw_{}.pkl'.format(model.name, TASK_NAME, loss_weights_label))
MODEL_JSON_FILEPATH = os.path.join(TASK_FOLDER_PATH, '{}_{}.json'.format(model.name, loss_weights_label))
print('TRAINING_WEIGHTS: ', TRAINING_WEIGHTS_FILEPATH)
print('NET HISTORY: ', HISTORY_FILEPATH)
multi_data_folder = os.path.join('/', 'data', MAIN_DATA_FOLDER_NAME, 'Xy_multi_data_files')
train_test_dir = os.path.join(multi_data_folder, 'train_test_files', DATA_FOLDER_NAME)
fnames_train, fnames_val, fnames_test, index_filelist = get_train_validation_test_files(train_test_dir,
n_files=N_FILES)
steps_per_epoch, n_events = get_n_iterations(fnames_train[:N_FILES], batch_size=BATCH_SIZE, target_key='dirz')
print(steps_per_epoch, n_events)
validation_steps, n_evts_val = get_n_iterations(fnames_val[:N_FILES], batch_size=BATCH_SIZE, target_key='dirz')
print(validation_steps, n_evts_val)
prediction_steps, n_evts_test = get_n_iterations(fnames_test[:N_FILES], batch_size=BATCH_SIZE, target_key='dirz')
print(prediction_steps, n_evts_test)
training_generator = direction_net_data_generator(fnames_train[:N_FILES], batch_size=BATCH_SIZE)
validation_generator = direction_net_data_generator(fnames_val[:N_FILES], batch_size=BATCH_SIZE)
training_history = train_neural_network(model, training_generator, steps_per_epoch,
validation_generator,
validation_steps, batch_size=BATCH_SIZE,
log_suffix="{}_lw_{}".format(TASK_NAME, loss_weights_label),
checkpoint_folder=TASK_FOLDER_PATH)
# Dump of Training History
print('Saving Model (JSON), Training History & Weights...', end='')
model_json_str = model.to_json()
with open(MODEL_JSON_FILEPATH, 'w') as model_json_f:
model_json_f.write(model_json_str)
history_filepath = HISTORY_FILEPATH
dump(training_history.history, open(history_filepath, 'wb'))
model.save_weights(TRAINING_WEIGHTS_FILEPATH)
print('...Done!')
# Inference
print('INFERENCE STEP')
xy_filelist = get_multi_data_files(multi_data_folder, n_files=N_FILES)
metadata_keylist = ["E", "dirx", "diry", "dirz", "posx", "posy", "posz", "dist"]
dirx_true_l = list()
dirx_pred_l = list()
diry_true_l = list()
diry_pred_l = list()
dirz_true_l = list()
dirz_pred_l = list()
sumsq_pred_l = list()
metadata = None
metadata_gen = metadata_generator(index_filelist, xy_filelist, metadata_keylist)
data_gen = direction_net_data_generator(fnames_test[:N_FILES], batch_size=BATCH_SIZE)
for i in tqdm(range(prediction_steps)):
X_batch, Y_batch_true = next(data_gen)
metadata_batch = next(metadata_gen)
if metadata is None:
metadata = metadata_batch
else:
metadata = pd.concat((metadata, metadata_batch))
Y_batch_pred = model.predict_on_batch(X_batch)
dirx_true, diry_true, dirz_true, _, _, = Y_batch_true
dirx_pred, diry_pred, dirz_pred, eu_pred, sumsq_pred = Y_batch_pred
dirx_pred = dirx_pred.ravel()
diry_pred = diry_pred.ravel()
dirz_pred = dirz_pred.ravel()
sumsq_pred = sumsq_pred.ravel()
dirx_true_l.append(dirx_true)
dirx_pred_l.append(dirx_pred)
diry_true_l.append(diry_true)
diry_pred_l.append(diry_pred)
dirz_true_l.append(dirz_true)
dirz_pred_l.append(dirz_pred)
sumsq_pred_l.append(sumsq_pred)
dirx_true_l = np.hstack(np.asarray(dirx_true_l))
dirx_pred_l = np.hstack(np.asarray(dirx_pred_l))
diry_true_l = np.hstack(np.asarray(diry_true_l))
diry_pred_l = np.hstack(np.asarray(diry_pred_l))
dirz_true_l = np.hstack(np.asarray(dirz_true_l))
dirz_pred_l = np.hstack(np.asarray(dirz_pred_l))
sumsq_pred_l = np.hstack(np.asarray(sumsq_pred_l))
print('MSE (dirx): ', mean_squared_error(dirx_true_l, dirx_pred_l))
print('R2 Score (dirx): ', r2_score(dirx_true_l, dirx_pred_l))
print('MSE (diry): ', mean_squared_error(diry_true_l, diry_pred_l))
print('R2 Score (diry): ', r2_score(diry_true_l, diry_pred_l))
print('MSE (dirz): ', mean_squared_error(dirz_true_l, dirz_pred_l))
print('R2 Score (dirz): ', r2_score(dirz_true_l, dirz_pred_l))
print('MSE Sum of Squares: ', mean_squared_error(np.ones(shape=sumsq_pred_l.shape), sumsq_pred_l))
print('R2 Sum of Squares: : ', r2_score(np.ones(shape=sumsq_pred_l.shape), sumsq_pred_l))
import numpy as np
import os
import pandas as pd
from network_models import train_neural_network, inference_step
# In[3]:
from network_models import TXYZnet
# In[4]:
model = TXYZnet(num_classes=2)
# In[5]:
model.summary()
# In[6]:
from data_loaders import data_generator, metadata_generator, get_n_iterations, get_class_weights
# In[7]:
# In[8]:
train_test_dir = os.path.join("train_test_files","cosz")
fnames_train =[os.path.join(train_test_dir, "Xy_train{}_sel5_doms.npz".format(i+1)) for i in range(100)]
fnames_test =[os.path.join(train_test_dir, "Xy_test{}_sel5_doms.npz".format(i+1)) for i in range(100)]
fnames_val =[os.path.join(train_test_dir, "Xy_val{}_sel5_doms.npz".format(i+1)) for i in range(100)]
# In[9]:
n_files=100
batch_size = 32
steps_per_epoch, n_events = get_n_iterations(fnames_train[:n_files], batch_size=batch_size)
print(steps_per_epoch, n_events)
validation_steps, n_evts_val = get_n_iterations(fnames_val[:n_files], batch_size=batch_size)
print(validation_steps, n_evts_val)
prediction_steps, n_evts_test = get_n_iterations(fnames_test[:n_files], batch_size=batch_size)
print(prediction_steps, n_evts_test)
# In[10]:
cls_weights = {i: v for i, v in enumerate(get_class_weights(fnames_train[:n_files]))}
# In[11]:
from keras.utils import to_categorical
def process_cosz(y):
y[y>0]=1
y[y<=0]=0
return to_categorical(y)
def add_channel_dim(X):
return X[:,:, np.newaxis, ...]
training_generator = data_generator(fnames_train[:n_files], batch_size=batch_size,
fdata=add_channel_dim, ftarget=process_cosz)
# In[12]:
validation_generator = data_generator(fnames_val[:n_files], batch_size=batch_size,
fdata=add_channel_dim, ftarget=process_cosz)
# In[ ]:
train_neural_network(model, training_generator, steps_per_epoch, validation_generator, validation_steps,
batch_size=batch_size, class_weights=cls_weights, log_suffix="updown")
# In[ ]:
# coding: utf-8
from __future__ import print_function
import numpy as np
import os
import pandas as pd
from network_models import train_neural_network
from network_models import TZXY_regression_logE_relu_psigmoid, TZXY_regression_logE_relu_tanh
from network_models import TZXY_regression_logE_vgg
from network_models import CHECKPOINT_FOLDER_PATH
from keras import backend as K
from data_loaders import data_generator, metadata_generator
from data_loaders import get_n_iterations
from pickle import dump
from tqdm import tqdm
from data_files import get_train_validation_test_files, get_multi_data_files
from sklearn.metrics import mean_squared_error, r2_score
N_FILES = 100
BATCH_SIZE = 64
model_build_func = TZXY_regression_logE_vgg
model = model_build_func()
model.summary()
TRAINING_WEIGHTS_FILEPATH = os.path.join(CHECKPOINT_FOLDER_PATH,
'{}_net_weights_training.hdf5'.format(model.name))
HISTORY_FILEPATH = os.path.join(CHECKPOINT_FOLDER_PATH,
'{}_net_history.pkl'.format(model.name))
print('TRAINING_WEIGHTS: ', TRAINING_WEIGHTS_FILEPATH)
print('NET HISTORY: ', HISTORY_FILEPATH)
multi_data_folder = os.path.join('/', 'data', 'km3net', 'Xy_multi_data_files')
train_test_dir = os.path.join(multi_data_folder, 'train_test_files', 'log_energies_stratified')
fnames_train, fnames_val, fnames_test, index_filelist = get_train_validation_test_files(train_test_dir,
n_files=N_FILES)
steps_per_epoch, n_events = get_n_iterations(fnames_train[:N_FILES], batch_size=BATCH_SIZE)
print(steps_per_epoch, n_events)
validation_steps, n_evts_val = get_n_iterations(fnames_val[:N_FILES], batch_size=BATCH_SIZE)
print(validation_steps, n_evts_val)
prediction_steps, n_evts_test = get_n_iterations(fnames_test[:N_FILES], batch_size=BATCH_SIZE)
print(prediction_steps, n_evts_test)
def get_TZXY_data(X):
TZ = np.sum(X, axis=(2, 3))
XY = np.sum(X, axis=(1, 4))
if K.image_data_format() == "channels_first":
TZ = TZ[:, np.newaxis, ...]
XY = XY[:, np.newaxis, ...]
else:
TZ = TZ[..., np.newaxis]
XY = XY[..., np.newaxis]
return [TZ, XY]
training_generator = data_generator(fnames_train[:N_FILES], batch_size=BATCH_SIZE,
fdata=get_TZXY_data, ftarget=lambda y: y)
validation_generator = data_generator(fnames_val[:N_FILES], batch_size=BATCH_SIZE,
fdata=get_TZXY_data, ftarget=lambda y: y)
training_history = train_neural_network(model, training_generator, steps_per_epoch, validation_generator,
validation_steps, batch_size=BATCH_SIZE,
log_suffix="regression_logE")
# Dump of Training History
print('Saving Training History & Weights...', end='')
history_filepath = HISTORY_FILEPATH
dump(training_history.history, open(history_filepath, 'wb'))
model.save_weights(TRAINING_WEIGHTS_FILEPATH)
print('...Done!')
# Inference
print('INFERENCE STEP')
xy_filelist = get_multi_data_files(multi_data_folder, n_files=N_FILES)
metadata_keylist = ["E", "dirx", "diry", "dirz", "posx", "posy", "posz", "dist"]
y_true = list()
y_pred = list()
metadata = None
predict_steps, n_test_events = get_n_iterations(fnames_test[:N_FILES], batch_size=64)
print(predict_steps, n_test_events)
metadata_gen = metadata_generator(index_filelist, xy_filelist, metadata_keylist)
data_gen = data_generator(fnames_test[:N_FILES], batch_size=BATCH_SIZE,
fdata=get_TZXY_data, ftarget=lambda y: y)
for i in tqdm(range(predict_steps)):
[ZT_batch, XY_batch], y_batch_true = next(data_gen)
metadata_batch = next(metadata_gen)
if metadata is None:
metadata = metadata_batch
else:
metadata = pd.concat((metadata, metadata_batch))
y_batch_pred = model.predict_on_batch([ZT_batch, XY_batch])
y_batch_pred = y_batch_pred.ravel()
y_true.append(y_batch_true)
y_pred.append(y_batch_pred)
y_true = np.hstack(np.asarray(y_true))
y_pred = np.hstack(np.asarray(y_pred))
print('MSE: ', mean_squared_error(y_true, y_pred))
print('R2 Score', r2_score(y_true, y_pred))
# coding: utf-8
from __future__ import print_function
import numpy as np
import os
from network_models import train_neural_network, inference_step
from network_models import CHECKPOINT_FOLDER_PATH
from network_models import TZnet_regression_cosz
from data_loaders import data_generator, metadata_generator, get_n_iterations
from keras import backend as K
from data_files import get_train_validation_test_files, get_multi_data_files
from pickle import dump
from sklearn.metrics import mean_squared_error, r2_score
model = TZnet_regression_cosz()
model.summary()
TRAINING_WEIGHTS_FILEPATH = os.path.join(CHECKPOINT_FOLDER_PATH,
'{}_net_weights_training_regression_crossz.hdf5'.format(model.name))
HISTORY_FILEPATH = os.path.join(CHECKPOINT_FOLDER_PATH,
'{}_net_history_regression_crossz.pkl'.format(model.name))
print('TRAINING_WEIGHTS: ', TRAINING_WEIGHTS_FILEPATH)
print('NET HISTORY: ', HISTORY_FILEPATH)
n_files = 100
batch_size = 64
train_test_dir = os.path.abspath("./cosz")
fnames_train, fnames_val, fnames_test, index_filelist = get_train_validation_test_files(train_test_dir, n_files=100)
steps_per_epoch, n_events = get_n_iterations(fnames_train[:n_files], batch_size=batch_size)
print(steps_per_epoch, n_events)
validation_steps, n_evts_val = get_n_iterations(fnames_val[:n_files], batch_size=batch_size)
print(validation_steps, n_evts_val)
prediction_steps, n_evts_test = get_n_iterations(fnames_test[:n_files], batch_size=batch_size)
print(prediction_steps, n_evts_test)
def get_TZ_only(X):
TZ = np.sum(X, axis=(2, 3))
if K.image_data_format() == "channels_first":
TZ = TZ[:, np.newaxis, ...]
else:
TZ = TZ[..., np.newaxis]
return TZ
training_generator = data_generator(fnames_train[:n_files], batch_size=batch_size,
fdata=get_TZ_only, ftarget=lambda y: y)
validation_generator = data_generator(fnames_val[:n_files], batch_size=batch_size,
fdata=get_TZ_only, ftarget=lambda y: y)
training_history = train_neural_network(model, training_generator, steps_per_epoch, validation_generator,
validation_steps,
batch_size=batch_size,
log_suffix="regression_cosz.hdf5")
# Dump of Training History
print('Saving Training History & Weights...', end='')
history_filepath = HISTORY_FILEPATH
dump(training_history.history, open(history_filepath, 'wb'))
model.save_weights(TRAINING_WEIGHTS_FILEPATH)
print('...Done!')
# Inference
print('INFERENCE STEP')
multi_data_folder = os.path.join('/', 'data', 'km3net', 'Xy_multi_data_files')
xy_filelist = get_multi_data_files(multi_data_folder, n_files=n_files)
metadata_keylist = ["E", "dirx", "diry", "dirz", "posx", "posy", "posz", "dist"]
predict_steps, n_test_events = get_n_iterations(fnames_test[:n_files], batch_size=64)
print(predict_steps, n_test_events)
metadata_gen = metadata_generator(index_filelist, xy_filelist, metadata_keylist)
test_data_generator = data_generator(fnames_test[:n_files], batch_size=batch_size,
fdata=get_TZ_only, ftarget=lambda y: y)
inference_res = inference_step(model, test_data_generator, predict_steps,
metadata_gen, categorical=False)
_, y_true, y_pred = inference_res
print('MSE: ', mean_squared_error(y_true, y_pred))
print('R2 Score', r2_score(y_true, y_pred))
# coding: utf-8
from __future__ import print_function
import numpy as np
import os
from network_models import train_neural_network, inference_step
from network_models import CHECKPOINT_FOLDER_PATH
from network_models import TZXY_numu_nue_classification
from data_loaders import data_generator, metadata_generator, get_n_iterations
from keras import backend as K
from data_files import get_train_validation_test_files, get_multi_data_files
from pickle import dump
from sklearn.metrics import accuracy_score, confusion_matrix
IRON_HIDE = os.path.join('/', 'data', 'km3net', 'Xy_multi_data_files')
UNISA = os.path.abspath("./Xy_multi_data_files_logE")
XY_AXIS = (1, 4)
TZ_AXIS = (2, 3)
def get_Time_Coord(X):
TZ = np.sum(X, axis=TZ_AXIS)
XY = np.sum(X, axis=XY_AXIS)
if K.image_data_format() == "channels_first":
TZ = TZ[:, np.newaxis, ...]
XY = XY[:, np.newaxis, ...]
else:
TZ = TZ[..., np.newaxis]
XY = XY[..., np.newaxis]
return [TZ, XY]
model = TZXY_numu_nue_classification(2)
model.summary()
N_FILES = 100
BATCH_SIZE = 64
DATA_FOLDER_NAME = 'numu_nue_stratified_labels'
TASK_NAME = 'numu_nue_classification'
TASK_FOLDER_PATH = os.path.join(CHECKPOINT_FOLDER_PATH, TASK_NAME)
if not os.path.exists(TASK_FOLDER_PATH):
os.makedirs(TASK_FOLDER_PATH)
TRAINING_WEIGHTS_FILEPATH = os.path.join(TASK_FOLDER_PATH,
'{}_weights_training_{}.hdf5'.format(model.name, TASK_NAME))
HISTORY_FILEPATH = os.path.join(TASK_FOLDER_PATH,
'{}_history_{}.pkl'.format(model.name, TASK_NAME))
MODEL_JSON_FILEPATH = os.path.join(TASK_FOLDER_PATH, '{}_{}.json'.format(model.name, TASK_NAME))
print('TRAINING_WEIGHTS: ', TRAINING_WEIGHTS_FILEPATH)
print('NET HISTORY: ', HISTORY_FILEPATH)
multi_data_folder = IRON_HIDE #Changed to re-run classification 26/01/2018
train_test_dir = os.path.join(multi_data_folder, 'train_test_files', DATA_FOLDER_NAME)
fnames_train, fnames_val, fnames_test, index_filelist = get_train_validation_test_files(train_test_dir,
n_files=N_FILES)
steps_per_epoch, n_events = get_n_iterations(fnames_train[:N_FILES], batch_size=BATCH_SIZE)
print(steps_per_epoch, n_events)
validation_steps, n_evts_val = get_n_iterations(fnames_val[:N_FILES], batch_size=BATCH_SIZE)
print(validation_steps, n_evts_val)
prediction_steps, n_evts_test = get_n_iterations(fnames_test[:N_FILES], batch_size=BATCH_SIZE)
print(prediction_steps, n_evts_test)
training_generator = data_generator(fnames_train[:N_FILES], batch_size=BATCH_SIZE,
fdata=get_Time_Coord)
validation_generator = data_generator(fnames_val[:N_FILES], batch_size=BATCH_SIZE,
fdata=get_Time_Coord)
training_history = train_neural_network(model, training_generator, steps_per_epoch,
validation_generator,
validation_steps, batch_size=BATCH_SIZE,
log_suffix="{}".format(TASK_NAME))
# Dump of Training History
print('Saving Model (JSON), Training History & Weights...', end='')
model_json_str = model.to_json()
with open(MODEL_JSON_FILEPATH, 'w') as model_json_f:
model_json_f.write(model_json_str)
history_filepath = HISTORY_FILEPATH
dump(training_history.history, open(history_filepath, 'w'))
model.save_weights(TRAINING_WEIGHTS_FILEPATH)
print('...Done!')
# Inference
print('INFERENCE STEP')
xy_filelist = get_multi_data_files(multi_data_folder, n_files=N_FILES)
metadata_keylist = ["E", "dirx", "diry", "dirz", "posx", "posy", "posz", "dist"]
metadata_gen = metadata_generator(index_filelist, xy_filelist, metadata_keylist)
test_data_gen = data_generator(fnames_test[:N_FILES], batch_size=BATCH_SIZE,
fdata=get_Time_Coord)
metadata, y_true, y_pred, probs = inference_step(model, test_data_gen, prediction_steps, metadata_gen)
print('Accuracy: ', accuracy_score(y_true, y_pred))
print('Confusion Matrix', '\n', confusion_matrix(y_true, y_pred))
# coding: utf-8
from __future__ import print_function
import numpy as np
import os