文章目录
- H5数据集的使用
- 使用h5py的 group 分割GB级数据
H5数据集的使用
#创建
with
h5py
.
File
(
"data_train.h5"
,
'w'
)
as
hf
:
hf
.
create_dataset
(
'train_input'
,
data
=
shuffled_input
)
hf
.
create_dataset
(
'train_label'
,
data
=
shuffled_label
)
#打开
with
h5py
.
File
(
"data_train.h5"
,
'r'
)
as
hf
:
train_data
=
np
.
array
(
hf
.
get
(
'train_input'
)
)
train_label
=
np
.
array
(
hf
.
get
(
'train_label'
)
)
使用h5py的 group 分割GB级数据
#创建
with
h5py
.
File
(
"myh5py.h5"
,
"w"
)
as
f
:
# 创建两个组
g1
=
f
.
create_group
(
"bar1"
)
g2
=
f
.
create_group
(
"bar2"
)
# 分别组里面分别创建名为为train,label的数据集并赋值。
g1
[
"train"
]
=
np
.
arange
(
10
)
g1
[
"label"
]
=
np
.
arange
(
10
)
g2
[
"train"
]
=
np
.
arange
(
20
)
g2
[
"label"
]
=
np
.
arange
(
20
)
#打开
with
h5py
.
File
(
"myh5py.h5"
,
"r"
)
as
hf
:
for
g
in
hf
.
keys
(
)
:
#遍历所有的组
print
(
np
.
array
(
hf
.
get
(
g
)
[
'train'
]
)
)
#获取组中名为 train 的数据集
print
(
np
.
array
(
hf
.
get
(
g
)
[
'label'
]
)
)