Stream file from tar archive based on location in archive.
Reads always consult the cache (hit β local, miss β remote stream);
_caching_enabled only governs whether a miss is also persisted.
Source code in datachain/lib/file.py
| @classmethod
def open(cls, file: "File", location: list[dict]):
"""Stream file from tar archive based on location in archive.
Reads always consult the cache (hit β local, miss β remote stream);
``_caching_enabled`` only governs whether a miss is also persisted.
"""
tar_file = cls.parent(file, location)
loc = location[0]
if (offset := loc.get("offset", None)) is None:
raise VFileError("'offset' is not specified", file.source, file.path)
if (size := loc.get("size", None)) is None:
raise VFileError("'size' is not specified", file.source, file.path)
client = file._catalog.get_client(tar_file.source)
fd = client.open_object(tar_file, use_cache=True)
return FileSlice(fd, offset, size, file.name)
|