# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from bigdl.transform.vision.image import ImageFrame
from bigdl.util.common import *
from zoo.common.utils import callZooFunc
[docs]def is_local(self):
"""
whether this is a LocalImageSet
Create a ImageSet from rdds of ndarray.
:param image_rdd: a rdd of ndarray, each ndarray should has dimension of 3 or 4 (3D images)
:param label_rdd: a rdd of ndarray
:return: a DistributedImageSet
>>> import numpy as np
>>> from bigdl.util.common import callBigDlFunc
>>> from numpy.testing import assert_allclose
>>> np.random.seed(123)
>>> sample = Sample.from_ndarray(np.random.random((2,3)), np.random.random((2,3)))
>>> sample_back = callBigDlFunc("float", "testSample", sample)
>>> assert_allclose(sample.features[0].to_ndarray(), sample_back.features[0].to_ndarray())
>>> assert_allclose(sample.label.to_ndarray(), sample_back.label.to_ndarray())
>>> expected_feature_storage = np.array(([[0.69646919, 0.28613934, 0.22685145], [0.55131477, 0.71946895, 0.42310646]]))
>>> expected_feature_shape = np.array([2, 3])
>>> expected_label_storage = np.array(([[0.98076421, 0.68482971, 0.48093191], [0.39211753, 0.343178, 0.72904968]]))
>>> expected_label_shape = np.array([2, 3])
>>> assert_allclose(sample.features[0].storage, expected_feature_storage, rtol=1e-6, atol=1e-6)
>>> assert_allclose(sample.features[0].shape, expected_feature_shape)
"""
return callZooFunc(self.bigdl_type, "isLocalImageSet", self.value)
[docs]class ImageSet(JavaValue):
"""
ImageSet wraps a set of ImageFeature
"""
def __init__(self, jvalue, bigdl_type="float"):
self.value = jvalue
self.bigdl_type = bigdl_type
if self.is_local():
self.image_set = LocalImageSet(jvalue=self.value)
else:
self.image_set = DistributedImageSet(jvalue=self.value)
[docs] def is_local(self):
"""
whether this is a LocalImageSet
"""
return callZooFunc(self.bigdl_type, "isLocalImageSet", self.value)
[docs] def is_distributed(self):
"""
whether this is a DistributedImageSet
"""
return callZooFunc(self.bigdl_type, "isDistributedImageSet", self.value)
@property
def label_map(self):
"""
:return: the labelMap of this ImageSet, None if the ImageSet does not have a labelMap
"""
return callZooFunc(self.bigdl_type, "imageSetGetLabelMap", self.value)
[docs] @classmethod
def read(cls, path, sc=None, min_partitions=1, resize_height=-1,
resize_width=-1, image_codec=-1, with_label=False, one_based_label=True,
bigdl_type="float"):
"""
Read images as Image Set
:param path: path to read images
if sc is defined, path can be local or HDFS. Wildcard character are supported.
if withLabel is set to true, path should be a directory that have two levels. The
first level is class folders, and the second is images. All images belong to a same
class should be put into the same class folder. So each image in the path is labeled by the
folder it belongs.
:param sc: SparkContext
:param min_partitions: A suggestion value of the minimal splitting number for input data.
:param resize_height: height after resize, by default is -1 which will not resize the image
:param resize_width: width after resize, by default is -1 which will not resize the image
:param image_codec: specifying the color type of a loaded image, same as in OpenCV.imread.By default is Imgcodecs.CV_LOAD_IMAGE_UNCHANGED(-1)
:param with_label: whether to treat folders in the path as image classification
labels and read the labels into ImageSet.
:param one_based_label: whether to use one based label
:return: ImageSet
"""
return ImageSet(jvalue=callZooFunc(bigdl_type, "readImageSet", path,
sc, min_partitions, resize_height,
resize_width, image_codec, with_label,
one_based_label))
[docs] @classmethod
def from_image_frame(cls, image_frame, bigdl_type="float"):
return ImageSet(jvalue=callZooFunc(bigdl_type, "imageFrameToImageSet", image_frame))
[docs] @classmethod
def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"):
"""
Create a ImageSet from rdds of ndarray.
:param image_rdd: a rdd of ndarray, each ndarray should has dimension of 3 or 4 (3D images)
:param label_rdd: a rdd of ndarray
:return: a DistributedImageSet
"""
image_rdd = image_rdd.map(lambda x: JTensor.from_ndarray(x))
if label_rdd is not None:
label_rdd = label_rdd.map(lambda x: JTensor.from_ndarray(x))
return ImageSet(jvalue=callZooFunc(bigdl_type, "createDistributedImageSet",
image_rdd, label_rdd), bigdl_type=bigdl_type)
[docs] def get_image(self, key="floats", to_chw=True):
"""
get image from ImageSet
"""
return self.image_set.get_image(key, to_chw)
[docs] def get_label(self):
"""
get label from ImageSet
"""
return self.image_set.get_label()
[docs] def get_predict(self, key="predict"):
"""
get prediction from ImageSet
"""
return self.image_set.get_predict(key)
[docs] def to_image_frame(self, bigdl_type="float"):
return ImageFrame(callZooFunc(bigdl_type, "imageSetToImageFrame", self.value), bigdl_type)
[docs]class LocalImageSet(ImageSet):
"""
LocalImageSet wraps a list of ImageFeature
"""
def __init__(self, image_list=None, label_list=None, jvalue=None, bigdl_type="float"):
assert jvalue or image_list, "jvalue and image_list cannot be None in the same time"
if jvalue:
self.value = jvalue
else:
# init from image ndarray list and label rdd(optional)
image_tensor_list = list(map(lambda image: JTensor.from_ndarray(image), image_list))
label_tensor_list = list(map(lambda label: JTensor.from_ndarray(label), label_list)) \
if label_list else None
self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self),
image_tensor_list, label_tensor_list)
self.bigdl_type = bigdl_type
[docs] def get_image(self, key="floats", to_chw=True):
"""
get image list from ImageSet
"""
tensors = callZooFunc(self.bigdl_type, "localImageSetToImageTensor",
self.value, key, to_chw)
return list(map(lambda tensor: tensor.to_ndarray(), tensors))
[docs] def get_label(self):
"""
get label list from ImageSet
"""
labels = callZooFunc(self.bigdl_type, "localImageSetToLabelTensor", self.value)
return map(lambda tensor: tensor.to_ndarray(), labels)
[docs] def get_predict(self, key="predict"):
"""
get prediction list from ImageSet
"""
predicts = callZooFunc(self.bigdl_type, "localImageSetToPredict", self.value, key)
return list(map(lambda predict:
(predict[0], list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1]
else (predict[0], None), predicts))
[docs]class DistributedImageSet(ImageSet):
"""
DistributedImageSet wraps an RDD of ImageFeature
"""
def __init__(self, image_rdd=None, label_rdd=None, jvalue=None, bigdl_type="float"):
assert jvalue or image_rdd, "jvalue and image_rdd cannot be None in the same time"
if jvalue:
self.value = jvalue
else:
# init from image ndarray rdd and label rdd(optional)
image_tensor_rdd = image_rdd.map(lambda image: JTensor.from_ndarray(image))
label_tensor_rdd = label_rdd.map(lambda label: JTensor.from_ndarray(label)) \
if label_rdd else None
self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self),
image_tensor_rdd, label_tensor_rdd)
self.bigdl_type = bigdl_type
[docs] def get_image(self, key="floats", to_chw=True):
"""
get image rdd from ImageSet
"""
tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToImageTensorRdd",
self.value, key, to_chw)
return tensor_rdd.map(lambda tensor: tensor.to_ndarray())
[docs] def get_label(self):
"""
get label rdd from ImageSet
"""
tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToLabelTensorRdd",
self.value)
return tensor_rdd.map(lambda tensor: tensor.to_ndarray())
[docs] def get_predict(self, key="predict"):
"""
get prediction rdd from ImageSet
"""
predicts = callZooFunc(self.bigdl_type, "distributedImageSetToPredict", self.value, key)
return predicts.map(lambda predict:
(predict[0],
list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1]
else (predict[0], None))