#
# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from abc import ABCMeta, abstractmethod
from zoo.automl.search.abstract import *
import numpy as np
from ray import tune
import json
[docs]class Recipe(metaclass=ABCMeta):
"""
Recipe
"""
def __init__(self):
# ----- runtime parameters
self.training_iteration = 1
self.num_samples = 1
self.reward_metric = None
[docs] @abstractmethod
def search_space(self, all_available_features):
pass
[docs] def runtime_params(self):
runtime_config = {
"training_iteration": self.training_iteration,
"num_samples": self.num_samples,
}
if self.reward_metric is not None:
runtime_config["reward_metric"] = self.reward_metric
return runtime_config
[docs] def fixed_params(self):
return None
[docs] def search_algorithm_params(self):
return None
[docs] def search_algorithm(self):
return None
[docs] def scheduler_algorithm(self):
return None
[docs]class SmokeRecipe(Recipe):
"""
A very simple Recipe for smoke test that runs one epoch and one iteration
with only 1 random sample.
"""
def __init__(self):
super(self.__class__, self).__init__()
[docs] def search_space(self, all_available_features):
return {
"selected_features": json.dumps(all_available_features),
"model": "LSTM",
"lstm_1_units": tune.choice([32, 64]),
"dropout_1": tune.uniform(0.2, 0.5),
"lstm_2_units": tune.choice([32, 64]),
"dropout_2": tune.uniform(0.2, 0.5),
"lr": 0.001,
"batch_size": 1024,
"epochs": 1,
"past_seq_len": 2,
}
[docs]class MTNetSmokeRecipe(Recipe):
"""
A very simple Recipe for smoke test that runs one epoch and one iteration
with only 1 random sample.
"""
def __init__(self):
super(self.__class__, self).__init__()
[docs] def search_space(self, all_available_features):
return {
"selected_features": json.dumps(all_available_features),
"model": "MTNet",
"lr": 0.001,
"batch_size": 16,
"epochs": 1,
"cnn_dropout": 0.2,
"rnn_dropout": 0.2,
"time_step": tune.choice([3, 4]),
"cnn_height": 2,
"long_num": tune.choice([3, 4]),
"ar_size": tune.choice([2, 3]),
"past_seq_len": tune.sample_from(lambda spec:
(spec.config.long_num + 1) * spec.config.time_step),
}
[docs]class PastSeqParamHandler(object):
"""
Utility to handle PastSeq Param
"""
def __init__(self):
pass
[docs] @staticmethod
def get_past_seq_config(look_back):
"""
generate pass sequence config based on look_back
:param look_back: look_back configuration
:return: search configuration for past sequence
"""
if isinstance(
look_back,
tuple) and len(look_back) == 2 and isinstance(
look_back[0],
int) and isinstance(
look_back[1],
int):
if look_back[1] < 2:
raise ValueError(
"The max look back value should be at least 2")
if look_back[0] < 2:
print(
"The input min look back value is smaller than 2. "
"We sample from range (2, {}) instead.".format(
look_back[1]))
past_seq_config = tune.randint(look_back[0], look_back[1] + 1)
elif isinstance(look_back, int):
if look_back < 2:
raise ValueError(
"look back value should not be smaller than 2. "
"Current value is ", look_back)
past_seq_config = look_back
else:
raise ValueError(
"look back is {}.\n "
"look_back should be either a tuple with 2 int values:"
" (min_len, max_len) or a single int".format(look_back))
return past_seq_config
[docs]class GridRandomRecipe(Recipe):
"""
A recipe involves both grid search and random search.
tsp = TimeSequencePredictor(...,recipe = GridRandomRecipe(1))
"""
def __init__(
self,
num_rand_samples=1,
look_back=2,
epochs=5,
training_iteration=10):
"""
Constructor.
:param num_rand_samples: number of hyper-param configurations sampled randomly
:param look_back: the length to look back, either a tuple with 2 int values,
which is in format is (min len, max len), or a single int, which is
a fixed length to look back.
:param training_iteration: no. of iterations for training (n epochs) in trials
:param epochs: no. of epochs to train in each iteration
"""
super(self.__class__, self).__init__()
self.num_samples = num_rand_samples
self.training_iteration = training_iteration
self.past_seq_config = PastSeqParamHandler.get_past_seq_config(
look_back)
self.epochs = epochs
[docs] def search_space(self, all_available_features):
return {
# -------- feature related parameters
"selected_features": tune.sample_from(lambda spec:
json.dumps(
list(np.random.choice(
all_available_features,
size=np.random.randint(
low=3,
high=len(all_available_features)),
replace=False)))),
# -------- model selection TODO add MTNet
"model": tune.choice(["LSTM", "Seq2seq"]),
# --------- Vanilla LSTM model parameters
"lstm_1_units": tune.grid_search([16, 32]),
"dropout_1": 0.2,
"lstm_2_units": tune.grid_search([16, 32]),
"dropout_2": tune.uniform(0.2, 0.5),
# ----------- Seq2Seq model parameters
"latent_dim": tune.grid_search([32, 64]),
"dropout": tune.uniform(0.2, 0.5),
# ----------- optimization parameters
"lr": tune.uniform(0.001, 0.01),
"batch_size": tune.choice([32, 64], replace=False),
"epochs": self.epochs,
"past_seq_len": self.past_seq_config,
}
[docs]class LSTMGridRandomRecipe(Recipe):
"""
A recipe involves both grid search and random search, only for LSTM.
tsp = TimeSequencePredictor(...,recipe = LSTMGridRandomRecipe(1))
"""
def __init__(
self,
num_rand_samples=1,
epochs=5,
training_iteration=10,
look_back=2,
lstm_1_units=[16, 32, 64, 128],
lstm_2_units=[16, 32, 64],
batch_size=[32, 64]):
"""
Constructor.
:param lstm_1_units: random search candidates for num of lstm_1_units
:param lstm_2_units: grid search candidates for num of lstm_1_units
:param batch_size: grid search candidates for batch size
:param num_rand_samples: number of hyper-param configurations sampled randomly
:param look_back: the length to look back, either a tuple with 2 int values,
which is in format is (min len, max len), or a single int, which is
a fixed length to look back.
:param training_iteration: no. of iterations for training (n epochs) in trials
:param epochs: no. of epochs to train in each iteration
"""
super(self.__class__, self).__init__()
# -- runtime params
self.num_samples = num_rand_samples
self.training_iteration = training_iteration
# -- model params
self.past_seq_config = PastSeqParamHandler.get_past_seq_config(
look_back)
self.lstm_1_units_config = tune.choice(lstm_1_units)
self.lstm_2_units_config = tune.grid_search(lstm_2_units)
self.dropout_2_config = tune.uniform(0.2, 0.5)
# -- optimization params
self.lr = tune.uniform(0.001, 0.01)
self.batch_size = tune.grid_search(batch_size)
self.epochs = epochs
[docs] def search_space(self, all_available_features):
return {
# -------- feature related parameters
"selected_features": tune.sample_from(lambda spec:
json.dumps(
list(np.random.choice(
all_available_features,
size=np.random.randint(
low=3,
high=len(all_available_features) + 1),
replace=False)))),
"model": "LSTM",
# --------- Vanilla LSTM model parameters
"lstm_1_units": self.lstm_1_units_config,
"dropout_1": 0.2,
"lstm_2_units": self.lstm_2_units_config,
"dropout_2": self.dropout_2_config,
# ----------- optimization parameters
"lr": self.lr,
"batch_size": self.batch_size,
"epochs": self.epochs,
"past_seq_len": self.past_seq_config,
}
[docs]class MTNetGridRandomRecipe(Recipe):
"""
Grid+Random Recipe for MTNet
"""
def __init__(self,
num_rand_samples=1,
epochs=5,
training_iteration=10,
time_step=[3, 4],
long_num=[3, 4],
cnn_height=[2, 3],
cnn_hid_size=[32, 50, 100],
ar_size=[2, 3],
batch_size=[32, 64]):
"""
Constructor.
:param num_rand_samples: number of hyper-param configurations sampled randomly
:param training_iteration: no. of iterations for training (n epochs) in trials
:param epochs: no. of epochs to train in each iteration
:param time_step: random search candidates for model param "time_step"
:param long_num: random search candidates for model param "long_num"
:param ar_size: random search candidates for model param "ar_size"
:param batch_size: grid search candidates for batch size
:param cnn_height: random search candidates for model param "cnn_height"
:param cnn_hid_size: random search candidates for model param "cnn_hid_size"
"""
super(self.__class__, self).__init__()
# -- run time params
self.num_samples = num_rand_samples
self.training_iteration = training_iteration
# -- optimization params
self.lr = tune.uniform(0.001, 0.01)
self.batch_size = self.batch_size = tune.grid_search(batch_size)
self.epochs = epochs
# ---- model params
self.cnn_dropout = tune.uniform(0.2, 0.5)
self.rnn_dropout = tune.uniform(0.2, 0.5)
self.time_step = tune.choice(time_step)
self.long_num = tune.choice(long_num,)
self.cnn_height = tune.choice(cnn_height)
self.cnn_hid_size = tune.choice(cnn_hid_size)
self.ar_size = tune.choice(ar_size)
self.past_seq_len = tune.sample_from(
lambda spec: (
spec.config.long_num + 1) * spec.config.time_step)
[docs] def search_space(self, all_available_features):
return {
"selected_features": tune.sample_from(lambda spec:
json.dumps(
list(np.random.choice(
all_available_features,
size=np.random.randint(
low=3,
high=len(all_available_features)),
replace=False)))),
"model": "MTNet",
"lr": self.lr,
"batch_size": self.batch_size,
"epochs": self.epochs,
"cnn_dropout": self.cnn_dropout,
"rnn_dropout": self.rnn_dropout,
"time_step": self.time_step,
"long_num": self.long_num,
"ar_size": self.ar_size,
"past_seq_len": self.past_seq_len,
"cnn_hid_size": self.cnn_hid_size,
"cnn_height": self.cnn_height
}
[docs]class RandomRecipe(Recipe):
"""
Pure random sample Recipe. Often used as baseline.
tsp = TimeSequencePredictor(...,recipe = RandomRecipe(5))
"""
def __init__(
self,
num_rand_samples=1,
look_back=2,
epochs=5,
reward_metric=-0.05,
training_iteration=10):
"""
:param num_rand_samples: number of hyper-param configurations sampled randomly
:param look_back:the length to look back, either a tuple with 2 int values,
which is in format is (min len, max len), or a single int, which is
a fixed length to look back.
:param reward_metric: the rewarding metric value, when reached, stop trial
:param training_iteration: no. of iterations for training (n epochs) in trials
:param epochs: no. of epochs to train in each iteration
"""
super(self.__class__, self).__init__()
self.num_samples = num_rand_samples
self.reward_metric = reward_metric
self.training_iteration = training_iteration
self.epochs = epochs
self.past_seq_config = PastSeqParamHandler.get_past_seq_config(
look_back)
[docs] def search_space(self, all_available_features):
import random
return {
# -------- feature related parameters
"selected_features": tune.sample_from(lambda spec:
json.dumps(
list(np.random.choice(
all_available_features,
size=np.random.randint(
low=3,
high=len(all_available_features)),
replace=False)))),
"model": tune.choice(["LSTM", "Seq2seq"]),
# --------- Vanilla LSTM model parameters
"lstm_1_units": tune.choice([8, 16, 32, 64, 128]),
"dropout_1": tune.uniform(0.2, 0.5),
"lstm_2_units": tune.choice([8, 16, 32, 64, 128]),
"dropout_2": tune.uniform(0.2, 0.5),
# ----------- Seq2Seq model parameters
"latent_dim": tune.choice([32, 64, 128, 256]),
"dropout": tune.uniform(0.2, 0.5),
# ----------- optimization parameters
"lr": tune.uniform(0.001, 0.01),
"batch_size": tune.choice([32, 64, 1024], replace=False),
"epochs": self.epochs,
"past_seq_len": self.past_seq_config,
}
[docs]class BayesRecipe(Recipe):
"""
A Bayes search Recipe. (Experimental)
tsp = TimeSequencePredictor(...,recipe = BayesRecipe(5))
"""
def __init__(
self,
num_samples=1,
look_back=2,
epochs=5,
reward_metric=-0.05,
training_iteration=5):
"""
Constructor
:param num_samples: number of hyper-param configurations sampled
:param look_back: the length to look back, either a tuple with 2 int values,
which is in format is (min len, max len), or a single int, which is
a fixed length to look back.
:param reward_metric: the rewarding metric value, when reached, stop trial
:param training_iteration: no. of iterations for training (n epochs) in trials
:param epochs: no. of epochs to train in each iteration
"""
super(self.__class__, self).__init__()
self.num_samples = num_samples
self.reward_metric = reward_metric
self.training_iteration = training_iteration
self.epochs = epochs
if isinstance(
look_back,
tuple) and len(look_back) == 2 and isinstance(
look_back[0],
int) and isinstance(
look_back[1],
int):
if look_back[1] < 2:
raise ValueError(
"The max look back value should be at least 2")
if look_back[0] < 2:
print(
"The input min look back value is smaller than 2. "
"We sample from range (2, {}) instead.".format(
look_back[1]))
self.bayes_past_seq_config = {"past_seq_len_float": look_back}
self.fixed_past_seq_config = {}
elif isinstance(look_back, int):
if look_back < 2:
raise ValueError(
"look back value should not be smaller than 2. "
"Current value is ", look_back)
self.bayes_past_seq_config = {}
self.fixed_past_seq_config = {"past_seq_len": look_back}
else:
raise ValueError(
"look back is {}.\n "
"look_back should be either a tuple with 2 int values:"
" (min_len, max_len) or a single int".format(look_back))
[docs] def search_space(self, all_available_features):
feature_space = {"bayes_feature_{}".format(feature): (0.3, 1)
for feature in all_available_features}
other_space = {
# --------- model parameters
"lstm_1_units_float": (8, 128),
"dropout_1": (0.2, 0.5),
"lstm_2_units_float": (8, 128),
"dropout_2": (0.2, 0.5),
# ----------- optimization parameters
"lr": (0.001, 0.01),
"batch_size_log": (5, 10),
}
total_space = other_space.copy()
total_space.update(feature_space)
total_space.update(self.bayes_past_seq_config)
return total_space
[docs] def fixed_params(self):
total_fixed_params = {
"epochs": self.epochs,
# "batch_size": 1024,
}
total_fixed_params.update(self.fixed_past_seq_config)
return total_fixed_params
[docs] def search_algorithm_params(self):
return {
"utility_kwargs": {
"kind": "ucb",
"kappa": 2.5,
"xi": 0.0
}
}
[docs] def search_algorithm(self):
return 'BayesOpt'
[docs]class XgbRegressorGridRandomRecipe(Recipe):
def __init__(
self,
num_rand_samples=1,
n_estimators=[8, 15],
max_depth=[10, 15],
n_jobs=-1,
tree_method='hist',
random_state=2,
seed=0,
lr=(1e-4, 1e-1),
subsample=0.8,
colsample_bytree=0.8,
min_child_weight=[1, 2, 3],
gamma=0,
reg_alpha=0,
reg_lambda=1):
"""
"""
super(self.__class__, self).__init__()
self.num_samples = num_rand_samples
self.n_jobs = n_jobs
self.tree_method = tree_method
self.random_state = random_state
self.seed = seed
self.colsample_bytree = colsample_bytree
self.gamma = gamma
self.reg_alpha = reg_alpha
self.reg_lambda = reg_lambda
self.n_estimators = tune.grid_search(n_estimators)
self.max_depth = tune.grid_search(max_depth)
self.lr = tune.loguniform(lr[0], lr[-1])
self.subsample = subsample
self.min_child_weight = tune.choice(min_child_weight)
[docs] def search_space(self, all_available_features):
return {
# -------- feature related parameters
"model": "XGBRegressor",
"imputation": tune.choice(["LastFillImpute", "FillZeroImpute"]),
"n_estimators": self.n_estimators,
"max_depth": self.max_depth,
"min_child_weight": self.min_child_weight,
"lr": self.lr
}
[docs]class XgbRegressorSkOptRecipe(Recipe):
def __init__(
self,
num_rand_samples=10,
n_estimators_range=(50, 1000),
max_depth_range=(2, 15),
):
"""
"""
super(self.__class__, self).__init__()
self.num_samples = num_rand_samples
self.n_estimators_range = n_estimators_range
self.max_depth_range = max_depth_range
[docs] def search_space(self, all_available_features):
return {
# -------- feature related parameters
"n_estimators": self.n_estimators_range,
"max_depth": self.max_depth_range,
}
[docs] def fixed_params(self):
total_fixed_params = {
"n_estimators": tune.randint(self.n_estimators_range[0],
self.n_estimators_range[1]),
"max_depth": tune.randint(self.max_depth_range[0],
self.max_depth_range[1]),
}
return total_fixed_params
[docs] def opt_params(self):
from skopt.space import Integer
params = [
Integer(self.n_estimators_range[0], self.n_estimators_range[1]),
Integer(self.max_depth_range[0], self.max_depth_range[1]),
]
return params
[docs] def search_algorithm(self):
return 'SkOpt'
[docs] def scheduler_algorithm(self):
return "AsyncHyperBand"