try:
with open(f'F:/Storage/readyDataset{IMG_SIZE}.pkl', 'rb') as save:
training_data = pickle.load(save)
print("Using dataset")
except:
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
print("Gathering Dataset: {}%".format(class_num*50))
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
training_data.append([new_array, class_num])
print("Gathering Dataset: 100%\nSaving dataset")
with open(f'F:/Storage/readyDataset{IMG_SIZE}.pkl', 'wb') as save:
pickle.dump(training_data, save, protocol=4)
print("Compiling Data")
random.shuffle(training_data)
for features, label in training_data:
X.append(features)
Y.append(label)
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 3) / 255
Y = np.array(Y)
print("Finished Compiling Data")
MemoryError: Unable to allocate 39.8 GiB for an array with shape (6800, 512, 512, 3) and data type float64
I am currently trying to use a custom dataset of 6800 images in TensorFlow v2. The dataset is less than 3 GB but after compiling the data it says its 40GB. How is this possible? I feel like I'm doing this wrong.
You can use tf.data.Dataset
to create your dataset to optimize memory consumption.
For TensorFlow input pipelines with a different type of data, you can follow this official documentation.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.