Source code for zoo.feature.image.imageset


# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from bigdl.transform.vision.image import ImageFrame
from bigdl.util.common import *
from zoo.common.utils import callZooFunc

[docs]def is_local(self): """ whether this is a LocalImageSet Create a ImageSet from rdds of ndarray. :param image_rdd: a rdd of ndarray, each ndarray should has dimension of 3 or 4 (3D images) :param label_rdd: a rdd of ndarray :return: a DistributedImageSet >>> import numpy as np >>> from bigdl.util.common import callBigDlFunc >>> from numpy.testing import assert_allclose >>> np.random.seed(123) >>> sample = Sample.from_ndarray(np.random.random((2,3)), np.random.random((2,3))) >>> sample_back = callBigDlFunc("float", "testSample", sample) >>> assert_allclose(sample.features[0].to_ndarray(), sample_back.features[0].to_ndarray()) >>> assert_allclose(sample.label.to_ndarray(), sample_back.label.to_ndarray()) >>> expected_feature_storage = np.array(([[0.69646919, 0.28613934, 0.22685145], [0.55131477, 0.71946895, 0.42310646]])) >>> expected_feature_shape = np.array([2, 3]) >>> expected_label_storage = np.array(([[0.98076421, 0.68482971, 0.48093191], [0.39211753, 0.343178, 0.72904968]])) >>> expected_label_shape = np.array([2, 3]) >>> assert_allclose(sample.features[0].storage, expected_feature_storage, rtol=1e-6, atol=1e-6) >>> assert_allclose(sample.features[0].shape, expected_feature_shape) """ return callZooFunc(self.bigdl_type, "isLocalImageSet", self.value)
[docs]class ImageSet(JavaValue): """ ImageSet wraps a set of ImageFeature """ def __init__(self, jvalue, bigdl_type="float"): self.value = jvalue self.bigdl_type = bigdl_type if self.is_local(): self.image_set = LocalImageSet(jvalue=self.value) else: self.image_set = DistributedImageSet(jvalue=self.value)
[docs] def is_local(self): """ whether this is a LocalImageSet """ return callZooFunc(self.bigdl_type, "isLocalImageSet", self.value)
[docs] def is_distributed(self): """ whether this is a DistributedImageSet """ return callZooFunc(self.bigdl_type, "isDistributedImageSet", self.value)
@property def label_map(self): """ :return: the labelMap of this ImageSet, None if the ImageSet does not have a labelMap """ return callZooFunc(self.bigdl_type, "imageSetGetLabelMap", self.value)
[docs] @classmethod def read(cls, path, sc=None, min_partitions=1, resize_height=-1, resize_width=-1, image_codec=-1, with_label=False, one_based_label=True, bigdl_type="float"): """ Read images as Image Set :param path: path to read images if sc is defined, path can be local or HDFS. Wildcard character are supported. if withLabel is set to true, path should be a directory that have two levels. The first level is class folders, and the second is images. All images belong to a same class should be put into the same class folder. So each image in the path is labeled by the folder it belongs. :param sc: SparkContext :param min_partitions: A suggestion value of the minimal splitting number for input data. :param resize_height: height after resize, by default is -1 which will not resize the image :param resize_width: width after resize, by default is -1 which will not resize the image :param image_codec: specifying the color type of a loaded image, same as in OpenCV.imread.By default is Imgcodecs.CV_LOAD_IMAGE_UNCHANGED(-1) :param with_label: whether to treat folders in the path as image classification labels and read the labels into ImageSet. :param one_based_label: whether to use one based label :return: ImageSet """ return ImageSet(jvalue=callZooFunc(bigdl_type, "readImageSet", path, sc, min_partitions, resize_height, resize_width, image_codec, with_label, one_based_label))
[docs] @classmethod def from_image_frame(cls, image_frame, bigdl_type="float"): return ImageSet(jvalue=callZooFunc(bigdl_type, "imageFrameToImageSet", image_frame))
[docs] @classmethod def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"): """ Create a ImageSet from rdds of ndarray. :param image_rdd: a rdd of ndarray, each ndarray should has dimension of 3 or 4 (3D images) :param label_rdd: a rdd of ndarray :return: a DistributedImageSet """ image_rdd = image_rdd.map(lambda x: JTensor.from_ndarray(x)) if label_rdd is not None: label_rdd = label_rdd.map(lambda x: JTensor.from_ndarray(x)) return ImageSet(jvalue=callZooFunc(bigdl_type, "createDistributedImageSet", image_rdd, label_rdd), bigdl_type=bigdl_type)
[docs] def transform(self, transformer): """ transformImageSet """ return ImageSet(callZooFunc(self.bigdl_type, "transformImageSet", transformer, self.value), self.bigdl_type)
[docs] def get_image(self, key="floats", to_chw=True): """ get image from ImageSet """ return self.image_set.get_image(key, to_chw)
[docs] def get_label(self): """ get label from ImageSet """ return self.image_set.get_label()
[docs] def get_predict(self, key="predict"): """ get prediction from ImageSet """ return self.image_set.get_predict(key)
[docs] def to_image_frame(self, bigdl_type="float"): return ImageFrame(callZooFunc(bigdl_type, "imageSetToImageFrame", self.value), bigdl_type)
[docs]class LocalImageSet(ImageSet): """ LocalImageSet wraps a list of ImageFeature """ def __init__(self, image_list=None, label_list=None, jvalue=None, bigdl_type="float"): assert jvalue or image_list, "jvalue and image_list cannot be None in the same time" if jvalue: self.value = jvalue else: # init from image ndarray list and label rdd(optional) image_tensor_list = list(map(lambda image: JTensor.from_ndarray(image), image_list)) label_tensor_list = list(map(lambda label: JTensor.from_ndarray(label), label_list)) \ if label_list else None self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self), image_tensor_list, label_tensor_list) self.bigdl_type = bigdl_type
[docs] def get_image(self, key="floats", to_chw=True): """ get image list from ImageSet """ tensors = callZooFunc(self.bigdl_type, "localImageSetToImageTensor", self.value, key, to_chw) return list(map(lambda tensor: tensor.to_ndarray(), tensors))
[docs] def get_label(self): """ get label list from ImageSet """ labels = callZooFunc(self.bigdl_type, "localImageSetToLabelTensor", self.value) return map(lambda tensor: tensor.to_ndarray(), labels)
[docs] def get_predict(self, key="predict"): """ get prediction list from ImageSet """ predicts = callZooFunc(self.bigdl_type, "localImageSetToPredict", self.value, key) return list(map(lambda predict: (predict[0], list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1] else (predict[0], None), predicts))
[docs]class DistributedImageSet(ImageSet): """ DistributedImageSet wraps an RDD of ImageFeature """ def __init__(self, image_rdd=None, label_rdd=None, jvalue=None, bigdl_type="float"): assert jvalue or image_rdd, "jvalue and image_rdd cannot be None in the same time" if jvalue: self.value = jvalue else: # init from image ndarray rdd and label rdd(optional) image_tensor_rdd = image_rdd.map(lambda image: JTensor.from_ndarray(image)) label_tensor_rdd = label_rdd.map(lambda label: JTensor.from_ndarray(label)) \ if label_rdd else None self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self), image_tensor_rdd, label_tensor_rdd) self.bigdl_type = bigdl_type
[docs] def get_image(self, key="floats", to_chw=True): """ get image rdd from ImageSet """ tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToImageTensorRdd", self.value, key, to_chw) return tensor_rdd.map(lambda tensor: tensor.to_ndarray())
[docs] def get_label(self): """ get label rdd from ImageSet """ tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToLabelTensorRdd", self.value) return tensor_rdd.map(lambda tensor: tensor.to_ndarray())
[docs] def get_predict(self, key="predict"): """ get prediction rdd from ImageSet """ predicts = callZooFunc(self.bigdl_type, "distributedImageSetToPredict", self.value, key) return predicts.map(lambda predict: (predict[0], list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1] else (predict[0], None))