Source code for zoo.automl.model.tcmf.data_loader

# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#
# This file is adapted from the DeepGlo Project. https://github.com/rajatsen91/deepglo
#
# Note: This license has also been called the "New BSD License" or "Modified BSD License". See also
# the 2-clause BSD License.
#
# Copyright (c) 2019 The DeepGLO Project.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions
# and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of
# conditions and the following disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to
# endorse or promote products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


import numpy as np
import torch


[docs]class TCMFDataLoader(object):
    """
    Data Loader Class for DeepGLO
    """

    def __init__(
        self,
        Ymat,
        covariates=None,
        Ycov=None,
        vbsize=200,
        hbsize=100,
        end_index=20000,
        val_len=30,
        shuffle=False,
    ):
        """
        Argeuments:
        Ymat: time-series matrix n*T
        covariates: global covariates common for all time series r*T, where r is the number of
            covariates
        Ycov: per time-series covariates n*l*T, l such covariates per time-series
        All of the above arguments are numpy arrays
        vbsize: vertical batch size
        hbsize: horizontal batch size
        end_index: training and validation set is only from 0:end_index
        val_len: validation length. The last 'val_len' time-points for every time-series is the
            validation set
        shuffle: data is shuffles if True (this is deprecated and set to False)
        """
        n, T = Ymat.shape
        self.vindex = 0
        self.hindex = 0
        self.epoch = 0
        self.vbsize = vbsize
        self.hbsize = hbsize
        self.Ymat = Ymat
        self.val_len = val_len
        self.end_index = end_index
        self.val_index = np.random.randint(0, n - self.vbsize - 5)
        self.shuffle = shuffle
        self.I = np.array(range(n))
        self.covariates = covariates
        self.Ycov = Ycov

[docs]    def next_batch(self):
        """
        Returns:
        data is returned as pytorch tensor of shape nd*cd*td where nd is vbsize,
        hb is hsize and cd is the number os channels (depends on covariates)
        inp: input batch
        out: one shifted output batch
        vindex: strating vertical index of input batch
        hindex: starting horizontal index of input batch
        """
        n, T = self.Ymat.shape
        if self.hindex + self.hbsize + 1 >= self.end_index:
            pr_hindex = self.hindex
            self.hindex = 0
            if self.vindex + self.vbsize >= n:
                pr_vindex = self.vindex
                self.vindex = 0
                self.epoch = self.epoch + 1
                if self.shuffle:
                    I = np.random.choice(n, n, replace=False)
                    self.I = I
                    self.Ymat = self.Ymat[self.I, :]
            else:
                pr_vindex = self.vindex
                self.vindex = self.vindex + self.vbsize
        else:
            pr_hindex = self.hindex
            self.hindex = self.hindex + self.hbsize
            pr_vindex = self.vindex

        data = self.Ymat[
            int(pr_vindex): int(pr_vindex + self.vbsize),
            int(pr_hindex): int(min(self.end_index, pr_hindex + self.hbsize)),
        ]
        out_data = self.Ymat[
            int(pr_vindex): int(pr_vindex + self.vbsize),
            int(pr_hindex + 1): int(min(self.end_index, pr_hindex + self.hbsize) + 1),
        ]
        nd, Td = data.shape
        if self.covariates is not None:
            covs = self.covariates[
                :, int(pr_hindex): int(min(self.end_index, pr_hindex + self.hbsize))
            ]
            rcovs = np.repeat(
                covs.reshape(1, covs.shape[0], covs.shape[1]), repeats=nd, axis=0
            )

        if self.Ycov is not None:
            ycovs = self.Ycov[
                int(pr_vindex): int(pr_vindex + self.vbsize),
                :,
                int(pr_hindex): int(min(self.end_index, pr_hindex + self.hbsize)),
            ]
        inp = torch.from_numpy(data).view(1, nd, Td)
        out = torch.from_numpy(out_data).view(1, nd, Td)
        if self.covariates is not None:
            rcovs = torch.from_numpy(rcovs).float()
        if self.Ycov is not None:
            ycovs = torch.from_numpy(ycovs).float()
        inp = inp.transpose(0, 1).float()
        if self.covariates is not None:
            inp = torch.cat((inp, rcovs), 1)
        if self.Ycov is not None:
            inp = torch.cat((inp, ycovs), 1)
        out = out.transpose(0, 1).float()

        inp[torch.isnan(inp)] = 0
        out[torch.isnan(out)] = 0

        return inp, out, self.vindex, self.hindex

[docs]    def supply_test(self):
        """
        Supplies validation set in the same format as above
        """
        n, T = self.Ymat.shape
        index = self.val_index
        in_data = self.Ymat[
            int(index): int(index + self.vbsize),
            int(self.end_index): int(self.end_index + self.val_len),
        ]
        out_data = self.Ymat[
            int(index): int(index + self.vbsize),
            int(self.end_index + 1): int(self.end_index + self.val_len + 1),
        ]
        nd, Td = in_data.shape
        if self.covariates is not None:
            covs = self.covariates[
                :, int(self.end_index): int(self.end_index + self.val_len)
            ]
            rcovs = np.repeat(
                covs.reshape(1, covs.shape[0], covs.shape[1]), repeats=nd, axis=0
            )
        if self.Ycov is not None:
            ycovs = self.Ycov[
                int(index): int(index + self.vbsize), :,
                int(self.end_index): int(self.end_index + self.val_len),
            ]
        inp = torch.from_numpy(in_data).view(1, nd, Td)
        inp = inp.transpose(0, 1).float()
        if self.covariates is not None:
            rcovs = torch.from_numpy(rcovs).float()
        if self.Ycov is not None:
            ycovs = torch.from_numpy(ycovs).float()
        out = torch.from_numpy(out_data).view(1, nd, Td)
        if self.covariates is not None:
            inp = torch.cat((inp, rcovs), 1)
        if self.Ycov is not None:
            inp = torch.cat((inp, ycovs), 1)
        out = out.transpose(0, 1).float()
        return inp, out, self.vindex, self.hindex
analytics-zoo 0.9.0.dev0 documentation

Source code for zoo.automl.model.tcmf.data_loader