Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add file_offset functionality #409

Merged
merged 9 commits into from
Jul 5, 2022
2 changes: 1 addition & 1 deletion blosc/blosc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1530,7 +1530,7 @@ static int blosc_d(
fp = io_cb->open(urlpath, "rb", context->schunk->storage->io->params);
BLOSC_ERROR_NULL(fp, BLOSC2_ERROR_FILE_OPEN);
// The offset of the block is src_offset
io_cb->seek(fp, chunk_offset + src_offset, SEEK_SET);
io_cb->seek(fp, frame->file_offset + chunk_offset + src_offset, SEEK_SET);
}
// We can make use of tmp3 because it will be used after src is not needed anymore
int64_t rbytes = io_cb->read(tmp3, 1, block_csize, fp);
Expand Down
186 changes: 98 additions & 88 deletions blosc/frame.c

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion blosc/frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ typedef struct {
uint32_t trailer_len; //!< The current length of the trailer in (compressed) bytes
bool sframe; //!< Whether the frame is sparse (true) or not
blosc2_schunk *schunk; //!< The schunk associated
int64_t file_offset; //!< The offset where the frame starts inside the file
} blosc2_frame_s;


Expand Down Expand Up @@ -108,7 +109,7 @@ int frame_free(blosc2_frame_s *frame);
*
* @return The frame created from the file.
*/
blosc2_frame_s* frame_from_file(const char *urlpath, const blosc2_io *io_cb);
blosc2_frame_s* frame_from_file_offset(const char *urlpath, const blosc2_io *io_cb, int64_t offset);

/**
* @brief Initialize a frame out of a frame buffer.
Expand Down
77 changes: 76 additions & 1 deletion blosc/schunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ blosc2_schunk* blosc2_schunk_open_udio(const char* urlpath, const blosc2_io *udi
return NULL;
}

blosc2_frame_s* frame = frame_from_file(urlpath, udio);
blosc2_frame_s* frame = frame_from_file_offset(urlpath, udio, 0);
if (frame == NULL) {
return NULL;
}
Expand All @@ -327,6 +327,27 @@ blosc2_schunk* blosc2_schunk_open(const char* urlpath) {
return blosc2_schunk_open_udio(urlpath, &BLOSC2_IO_DEFAULTS);
}

BLOSC_EXPORT blosc2_schunk* blosc2_schunk_open_offset(const char* urlpath, int64_t offset) {
if (urlpath == NULL) {
BLOSC_TRACE_ERROR("You need to supply a urlpath.");
return NULL;
}

blosc2_frame_s* frame = frame_from_file_offset(urlpath, &BLOSC2_IO_DEFAULTS, offset);
if (frame == NULL) {
return NULL;
}
blosc2_schunk* schunk = frame_to_schunk(frame, false, &BLOSC2_IO_DEFAULTS);

// Set the storage with proper defaults
size_t pathlen = strlen(urlpath);
schunk->storage->urlpath = malloc(pathlen + 1);
strcpy(schunk->storage->urlpath, urlpath);
schunk->storage->contiguous = !frame->sframe;

return schunk;
}

int64_t blosc2_schunk_to_buffer(blosc2_schunk* schunk, uint8_t** dest, bool* needs_free) {
blosc2_frame_s* frame;
int64_t cframe_len;
Expand Down Expand Up @@ -376,6 +397,29 @@ int64_t frame_to_file(blosc2_frame_s* frame, const char* urlpath) {
}


/* Append an in-memory frame to a file. */
int64_t append_frame_to_file(blosc2_frame_s* frame, const char* urlpath) {
blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id);
if (io_cb == NULL) {
BLOSC_TRACE_ERROR("Error getting the input/output API");
return BLOSC2_ERROR_PLUGIN_IO;
}
void* fp = io_cb->open(urlpath, "ab", frame->schunk->storage->io);
int64_t offset;

# if (UNIX)
offset = io_cb->tell(fp);
# else
io_cb->seek(fp, 0, SEEK_END);
offset = io_cb->tell(fp);
# endif

io_cb->write(frame->cframe, frame->len, 1, fp);
io_cb->close(fp);
return offset;
}


/* Write super-chunk out to a file. */
int64_t blosc2_schunk_to_file(blosc2_schunk* schunk, const char* urlpath) {
if (urlpath == NULL) {
Expand Down Expand Up @@ -407,6 +451,37 @@ int64_t blosc2_schunk_to_file(blosc2_schunk* schunk, const char* urlpath) {
}


/* Append a super-chunk to a file. */
int64_t blosc2_schunk_append_file(blosc2_schunk* schunk, const char* urlpath) {
if (urlpath == NULL) {
BLOSC_TRACE_ERROR("urlpath cannot be NULL");
return BLOSC2_ERROR_INVALID_PARAM;
}

// Accelerated path for in-memory frames
if (schunk->storage->contiguous && schunk->storage->urlpath == NULL) {
int64_t offset = append_frame_to_file((blosc2_frame_s*)(schunk->frame), urlpath);
if (offset <= 0) {
BLOSC_TRACE_ERROR("Error writing to file");
return offset;
}
return offset;
}

// Copy to a contiguous file
blosc2_storage frame_storage = {.contiguous=true, .urlpath=NULL};
blosc2_schunk* schunk_copy = blosc2_schunk_copy(schunk, &frame_storage);
if (schunk_copy == NULL) {
BLOSC_TRACE_ERROR("Error during the conversion of schunk to buffer.");
return BLOSC2_ERROR_SCHUNK_COPY;
}
blosc2_frame_s* frame = (blosc2_frame_s*)(schunk_copy->frame);
int64_t offset = append_frame_to_file(frame, urlpath);
blosc2_schunk_free(schunk_copy);
return offset;
}


/* Free all memory from a super-chunk. */
int blosc2_schunk_free(blosc2_schunk *schunk) {
if (schunk->data != NULL) {
Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Examples with correspondingly named source files
set(EXAMPLES contexts instrument_codec delta_schunk_ex multithread simple frame_metalayers
noinit find_roots schunk_simple frame_simple urcodecs urfilters frame_vlmetalayers
sframe_simple frame_backed_schunk compress_file)
sframe_simple frame_backed_schunk compress_file frame_offset)

if(NOT DEACTIVATE_ZSTD)
set(EXAMPLES ${EXAMPLES} zstd_dict)
Expand Down
181 changes: 181 additions & 0 deletions examples/frame_offset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
Copyright (C) 2021 The Blosc Developers <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)

Example program demonstrating use of the Blosc filter from C code.

To compile this program:

$ gcc frame_offset.c -o frame_offset -lblosc2

To run:

$ ./frame_offset
Blosc version info: 2.1.2.dev ($Date:: 2022-05-07 #$)
Compression ratio: 76.3 MB -> 1.2 MB (66.0x)
Compression time: 1.17 s, 65.0 MB/s
Variable-length metalayer length: 10
0 1 2 3 4 5 6 7 8 9
Time for schunk -> frame: 0.266 s, 286.7 MB/s
Frame length in memory: 1212483 bytes
Frame length on disk: 1212483 bytes
Time for frame -> fileframe (frame_simple.b2frame): 6.2 s, 0.0 GB/s
Time for fileframe (file:///frame_simple.b2frame) -> frame2 : 0.00177 s, 42.2 GB/s
Time for fileframe (file:///frame_simple.b2frame) + offset 1212483 -> frame3 : 0.00176 s, 42.3 GB/s
Successful roundtrip schunk <-> frame <-> fileframe
schunk1 <-> frame1 <-> fileframe + offset

*/

#include <stdio.h>
#include <assert.h>
#include <blosc2.h>

#define KB 1024.
#define MB (1024*KB)
#define GB (1024*MB)

#define CHUNKSIZE (200 * 1000)
#define NCHUNKS 100
#define NTHREADS 4


int main(void) {

blosc_init();

static int32_t data[CHUNKSIZE];
static int32_t data2[CHUNKSIZE];
int32_t isize = CHUNKSIZE * sizeof(int32_t);
int i, nchunk;
int64_t nchunks;
blosc_timestamp_t last, current;
double ttotal;

printf("Blosc version info: %s (%s)\n",
BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);

/* Create a super-chunk container */
blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS;
cparams.typesize = sizeof(int32_t);
cparams.compcode = BLOSC_LZ4;
cparams.clevel = 9;
cparams.nthreads = NTHREADS;
blosc2_dparams dparams = BLOSC2_DPARAMS_DEFAULTS;
dparams.nthreads = NTHREADS;
blosc2_storage storage = {.cparams=&cparams, .dparams=&dparams};
blosc2_schunk* schunk0w = blosc2_schunk_new(&storage);
blosc2_schunk* schunk1a = blosc2_schunk_new(&storage);

// Add some data
for (nchunk = 0; nchunk < NCHUNKS; nchunk++) {
for (i = 0; i < CHUNKSIZE; i++) {
data[i] = i * nchunk;
data2[i] = 2 * i * nchunk;
}
nchunks = blosc2_schunk_append_buffer(schunk0w, data, isize);
assert(nchunks == nchunk + 1);
blosc2_schunk_append_buffer(schunk1a, data2, isize);
}

// Start different conversions between schunks, frames and fileframes

// super-chunk -> cframe (contiguous frame, or buffer)
uint8_t* cframe, *cframe1;
bool cframe_needs_free, cframe_needs_free1;
int64_t frame_len = blosc2_schunk_to_buffer(schunk0w, &cframe, &cframe_needs_free);
if (frame_len < 0) {
return (int)frame_len;
}
int64_t frame_len1 = blosc2_schunk_to_buffer(schunk1a, &cframe1, &cframe_needs_free1);
if (frame_len1 < 0) {
return (int)frame_len1;
}

// super-chunk -> fileframe (contiguous frame, on-disk)
remove("frame_simple.b2frame");
blosc_set_timestamp(&last);
frame_len = blosc2_schunk_to_file(schunk0w, "frame_simple.b2frame");
if (frame_len < 0) {
return (int)frame_len;
}
printf("Frame length on disk: %ld bytes\n", (long)frame_len);
blosc_set_timestamp(&current);
ttotal = blosc_elapsed_secs(last, current);
printf("Time for frame -> fileframe (frame_simple.b2frame): %.3g s, %.1f GB/s\n",
ttotal, (double)schunk0w->nbytes / (ttotal * GB));

blosc_set_timestamp(&last);
int64_t offset = blosc2_schunk_append_file(schunk1a, "frame_simple.b2frame");
if (offset < 0) {
return (int)offset;
}
blosc_set_timestamp(&current);
ttotal = blosc_elapsed_secs(last, current);
printf("Time for frame1 -> fileframe (frame_simple.b2frame) + offset: %.3g s, %.1f GB/s\n",
ttotal, (double)schunk1a->nbytes / (ttotal * GB));

// fileframe (file) -> schunk2 (on-disk contiguous, super-chunk)
blosc_set_timestamp(&last);
blosc2_schunk* schunk2r = blosc2_schunk_open("file:///frame_simple.b2frame");
blosc_set_timestamp(&current);
ttotal = blosc_elapsed_secs(last, current);
printf("Time for fileframe (%s) -> frame2 : %.3g s, %.1f GB/s\n",
schunk2r->storage->urlpath, ttotal, (double)schunk2r->nbytes / (ttotal * GB));

// fileframe (file) -> schunk3 (on-disk contiguous, super-chunk)
blosc_set_timestamp(&last);
blosc2_schunk* schunk3o = blosc2_schunk_open_offset("file:///frame_simple.b2frame", offset);
blosc_set_timestamp(&current);
ttotal = blosc_elapsed_secs(last, current);
printf("Time for fileframe (%s) + offset %lld -> frame3 : %.3g s, %.1f GB/s\n",
schunk3o->storage->urlpath, offset, ttotal, (double)schunk3o->nbytes / (ttotal * GB));

uint8_t* cframe2, *cframe3;
bool cframe_needs_free2, cframe_needs_free3;
int64_t frame_len2 = blosc2_schunk_to_buffer(schunk2r, &cframe2, &cframe_needs_free2);
if (frame_len2 != frame_len) {
return (int)frame_len2;
}
for (int j = 0; j < frame_len; ++j) {
if (cframe[j] != cframe2[j]) {
printf("schunk != schunk2 in index %d: %u, %u", j, cframe[j], cframe2[j]);
return -1;
}
}
int64_t frame_len3 = blosc2_schunk_to_buffer(schunk3o, &cframe3, &cframe_needs_free3);
if (frame_len3 != frame_len1) {
return (int)frame_len3;
}
for (int j = 0; j < frame_len1; ++j) {
if (cframe1[j] != cframe3[j]) {
printf("schunk1 != schunk3 in index %d: %u, %u", j, cframe1[j], cframe3[j]);
return -1;
}
}

printf("Successful roundtrip schunk <-> frame <-> fileframe\n"
" schunk1 <-> frame1 <-> fileframe + offset");

/* Free resources */
blosc2_schunk_free(schunk0w);
blosc2_schunk_free(schunk1a);
blosc2_schunk_free(schunk2r);
blosc2_schunk_free(schunk3o);
if (cframe_needs_free) {
free(cframe);
}
if (cframe_needs_free1) {
free(cframe1);
}
if (cframe_needs_free2) {
free(cframe2);
}
if (cframe_needs_free3) {
free(cframe3);
}
blosc_destroy();

return 0;
}
30 changes: 22 additions & 8 deletions include/blosc2.h
Original file line number Diff line number Diff line change
Expand Up @@ -1528,25 +1528,30 @@ BLOSC_EXPORT blosc2_schunk* blosc2_schunk_from_buffer(uint8_t *cframe, int64_t l
/**
* @brief Open an existing super-chunk that is on-disk (frame). No in-memory copy is made.
*
* @param storage The storage properties of the source.
*
* @remark The storage.urlpath must be not NULL and it should exist on-disk.
* New data or metadata can be appended or updated.
* @param urlpath The file name.
*
* @return The new super-chunk. NULL if not found or not in frame format.
*/
BLOSC_EXPORT blosc2_schunk* blosc2_schunk_open(const char* urlpath);

/**
* @brief Open an existing super-chunk that is on-disk (frame). No in-memory copy is made.
*
* @param urlpath The file name.
*
* @param offset The frame offset.
*
* @return The new super-chunk. NULL if not found or not in frame format.
*/
BLOSC_EXPORT blosc2_schunk* blosc2_schunk_open_offset(const char* urlpath, int64_t offset);

/**
* @brief Open an existing super-chunk (no copy is made) using a user-defined I/O interface.
*
* @param storage The storage properties of the source.
* @param urlpath The file name.
*
* @param udio The user-defined I/O interface.
*
* @remark The storage.urlpath must be not NULL and it should exist on-disk.
* New data or metadata can be appended or updated.
*
* @return The new super-chunk.
*/
BLOSC_EXPORT blosc2_schunk* blosc2_schunk_open_udio(const char* urlpath, const blosc2_io *udio);
Expand Down Expand Up @@ -1576,6 +1581,15 @@ BLOSC_EXPORT int64_t blosc2_schunk_to_buffer(blosc2_schunk* schunk, uint8_t** cf
*/
BLOSC_EXPORT int64_t blosc2_schunk_to_file(blosc2_schunk* schunk, const char* urlpath);

/* @brief Append a super-chunk into a file.
*
* @param schunk The super-chunk to write.
* @param urlpath The path for persistent storage.
*
* @return If successful, return the offset where @p schunk has been appended in @p urlpath.
* Else, a negative value.
*/
int64_t blosc2_schunk_append_file(blosc2_schunk* schunk, const char* urlpath);

/**
* @brief Release resources from a super-chunk.
Expand Down
Loading