|
14 | 14 | from dptb.utils import instantiate, get_w_prefix
|
15 | 15 | from dptb.utils.tools import j_loader
|
16 | 16 | from dptb.utils.argcheck import normalize_setinfo, normalize_lmdbsetinfo
|
| 17 | +import logging |
17 | 18 |
|
| 19 | +log = logging.getLogger(__name__) |
18 | 20 |
|
19 | 21 | def dataset_from_config(config, prefix: str = "dataset") -> AtomicDataset:
|
20 | 22 | """initialize database based on a config instance
|
@@ -198,31 +200,32 @@ def build_dataset(
|
198 | 200 | if os.path.exists(f"{root}/info.json"):
|
199 | 201 | public_info = j_loader(os.path.join(root, "info.json"))
|
200 | 202 | if dataset_type == "LMDBDataset":
|
201 |
| - public_info = normalize_lmdbsetinfo(public_info) |
| 203 | + public_info = {} |
| 204 | + log.info("A public `info.json` file is provided, but will not be used anymore for LMDBDataset.") |
202 | 205 | else:
|
203 | 206 | public_info = normalize_setinfo(public_info)
|
204 |
| - print("A public `info.json` file is provided, and will be used by the subfolders who do not have their own `info.json` file.") |
| 207 | + log.info("A public `info.json` file is provided, and will be used by the subfolders who do not have their own `info.json` file.") |
205 | 208 | else:
|
206 | 209 | public_info = None
|
207 | 210 |
|
208 | 211 | # Load info in each trajectory folders seperately.
|
209 | 212 | for file in include_folders:
|
210 | 213 | #if "info.json" in os.listdir(os.path.join(root, file)):
|
211 |
| - if os.path.exists(f"{root}/{file}/info.json"): |
| 214 | + |
| 215 | + if dataset_type == "LMDBDataset": |
| 216 | + info_files[file] = {} |
| 217 | + elif os.path.exists(f"{root}/{file}/info.json"): |
212 | 218 | # use info provided in this trajectory.
|
213 | 219 | info = j_loader(f"{root}/{file}/info.json")
|
214 |
| - if dataset_type == "LMDBDataset": |
215 |
| - info = normalize_lmdbsetinfo(info) |
216 |
| - else: |
217 |
| - info = normalize_setinfo(info) |
| 220 | + info = normalize_setinfo(info) |
218 | 221 | info_files[file] = info
|
219 |
| - elif public_info is not None: |
| 222 | + elif public_info is not None: # not lmbd and no info in subfolder, then must use public info. |
220 | 223 | # use public info instead
|
221 | 224 | # yaml will not dump correctly if this is not a deepcopy.
|
222 | 225 | info_files[file] = deepcopy(public_info)
|
223 |
| - else: |
224 |
| - # no info for this file |
225 |
| - raise Exception(f"info.json is not properly provided for `{file}`.") |
| 226 | + else: # not lmdb no info in subfolder and no public info. then raise error. |
| 227 | + log.error(f"for {dataset_type} type, the info.json is not properly provided for `{file}`") |
| 228 | + raise ValueError(f"for {dataset_type} type, the info.json is not properly provided for `{file}`") |
226 | 229 |
|
227 | 230 | # We will sort the info_files here.
|
228 | 231 | # The order itself is not important, but must be consistant for the same list.
|
|
0 commit comments