Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for memory-mapped files #604

Merged
merged 18 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions blosc/blosc2-stdio.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,25 @@ void *blosc2_stdio_mmap_open(const char *urlpath, const char *mode, void* params

blosc2_stdio_mmap *mmap_file = (blosc2_stdio_mmap *) params;
if (mmap_file->addr != NULL) {
if (strcmp(mmap_file->urlpath, urlpath) != 0) {
BLOSC_TRACE_ERROR(
"The memory-mapped file is already opened with the path %s and hence cannot be reopened with the path %s. This "
"happens if you try to open a sframe (sparse frame) but please not that memory-mapped files are not supported "
"for sframes.",
mmap_file->urlpath,
urlpath
);
return NULL;
}

/* A memory-mapped file is only opened once */
return mmap_file;
}

// Keep the original path to ensure that all future file openings are with the same path
mmap_file->urlpath = malloc(strlen(urlpath) + 1);
strcpy(mmap_file->urlpath, urlpath);

/* mmap_file->mode mapping is similar to Numpy's memmap
(https://github.com/numpy/numpy/blob/main/numpy/_core/memmap.py) and CPython
(https://github.com/python/cpython/blob/main/Modules/mmapmodule.c) */
Expand Down Expand Up @@ -452,6 +467,7 @@ int blosc2_stdio_mmap_destroy(void* params) {
err = -1;
}

free(mmap_file->urlpath);
if (mmap_file->needs_free) {
free(mmap_file);
}
Expand Down
11 changes: 8 additions & 3 deletions include/blosc2/blosc2-stdio.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ BLOSC_EXPORT int blosc2_stdio_truncate(void *stream, int64_t size);


/**
* @brief Parameters for memory-mapped I/O.
* @brief Parameters for memory-mapped I/O. Existing schunks can be opened memory-mapped with the *_udio functions and
* new schunks be created by setting the io member of the #blosc2_storage struct (see test_mmap for examples). Please
* note that only cframes and not sframes can be opened memory-mapped.
*/
typedef struct {
/* Arguments of the mapping */
Expand All @@ -56,14 +58,17 @@ typedef struct {
//!< but keep all modifications in-memory. On Windows, the file size cannot change in the c mode.
int64_t initial_mapping_size;
//!< The initial size of the memory mapping used as a large enough write buffer for the r+, w+ and c modes (for
//!< Windows, only the r+ and w+ modes).
//!< Windows, only the r+ and w+ modes). On Windows, this will also be the size of the file while the file is opened.
//!< It will be truncated to the target size when the file is closed (e.g., when the schunk is destroyed).
bool needs_free;
//!< Indicates whether this object should be freed in the blosc2_destroy_cb callback (set to true if the
//!< blosc2_stdio_mmap struct was created on the heap).

/* Internal attributes of the mapping */
char* addr;
//!< The starting address of the mapping.
char* urlpath;
//!< The path to the file which is associated with this object.
int64_t file_size;
//!< The size of the file.
int64_t mapping_size;
Expand All @@ -86,7 +91,7 @@ typedef struct {
* @brief Default struct for memory-mapped I/O for user initialization.
*/
static const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS = {
"r", (1 << 30), false, NULL, -1, -1, NULL, -1, -1, -1
"r", (1 << 30), false, NULL, NULL, -1, -1, NULL, -1, -1, -1
#if defined(_WIN32)
, INVALID_HANDLE_VALUE
#endif
Expand Down