From 3aa106dfb5e1a91424a79ced2325d06558136c36 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 13 Jan 2023 09:03:25 -0600 Subject: [PATCH] Compress link_ids lists This reduces the size of our precompile cache files, using run-length encoding (RLE) to represent the module of external linkages. Most linkages seem to be against the sysimg itself, and RLE allows long stretches of such linkages to be encoded compactly. Closes #48218 --- src/staticdata.c | 50 +++++++++++++++----------------- src/staticdata_utils.c | 66 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 27 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index bdbe73f857f26..2485f088e371c 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2461,14 +2461,18 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_write_value(&s, ext_targets); jl_write_value(&s, edges); } - write_uint32(f, jl_array_len(s.link_ids_gctags)); - ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags)*sizeof(uint64_t)); - write_uint32(f, jl_array_len(s.link_ids_relocs)); - ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs)*sizeof(uint64_t)); - write_uint32(f, jl_array_len(s.link_ids_gvars)); - ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t)); - write_uint32(f, jl_array_len(s.link_ids_external_fnvars)); - ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars)*sizeof(uint64_t)); + // Write out the link_ids. First we write all the build_ids, and then the RLE-encoded version + // with indexed lists. + if (!s.incremental) { + write_uint32(f, 0); + } else { + write_uint32(f, jl_array_len(jl_build_ids)); + ios_write(f, (char*)jl_array_data(jl_build_ids), jl_array_len(jl_build_ids)*sizeof(uint64_t)); + } + write_linkids_rle(f, s.link_ids_gctags, jl_build_ids); + write_linkids_rle(f, s.link_ids_relocs, jl_build_ids); + write_linkids_rle(f, s.link_ids_gvars, jl_build_ids); + write_linkids_rle(f, s.link_ids_external_fnvars, jl_build_ids); write_uint32(f, external_fns_begin); jl_write_arraylist(s.s, &s.ccallable_list); } @@ -2758,26 +2762,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl offset_ext_targets = jl_read_offset(&s); offset_edges = jl_read_offset(&s); } - size_t nlinks_gctags = read_uint32(f); - if (nlinks_gctags > 0) { - s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gctags); - ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint64_t)); - } - size_t nlinks_relocs = read_uint32(f); - if (nlinks_relocs > 0) { - s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, nlinks_relocs); - ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint64_t)); - } - size_t nlinks_gvars = read_uint32(f); - if (nlinks_gvars > 0) { - s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gvars); - ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint64_t)); - } - size_t nlinks_external_fnvars = read_uint32(f); - if (nlinks_external_fnvars > 0) { - s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_external_fnvars); - ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint64_t)); + size_t n_pkg_build_ids = read_uint32(f); + jl_array_t *pkg_build_ids = NULL; + if (n_pkg_build_ids > 0) { + assert(s.incremental); + pkg_build_ids = jl_alloc_array_1d(jl_array_uint64_type, n_pkg_build_ids); + ios_read(f, (char*)jl_array_data(pkg_build_ids), n_pkg_build_ids * sizeof(uint64_t)); } + s.link_ids_gctags = read_linkids_rle(f, pkg_build_ids); + s.link_ids_relocs = read_linkids_rle(f, pkg_build_ids); + s.link_ids_gvars = read_linkids_rle(f, pkg_build_ids); + s.link_ids_external_fnvars = read_linkids_rle(f, pkg_build_ids); + uint32_t external_fns_begin = read_uint32(f); jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list); if (s.incremental) { diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 297dbbdf085e3..d85479e672ac6 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -9,6 +9,72 @@ static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT write_uint64(s, *((uint64_t*)&x)); } +static int find_id(uint64_t id, jl_array_t *buildids) +{ + size_t l = jl_array_len(buildids); + uint64_t *ids = (uint64_t*)jl_array_data(buildids); + for (size_t i = 0; i < l; i++) + if (id == ids[i]) + return i; + jl_(buildids); + jl_errorf("build_id %lx not found", id); +} + +static void write_linkids_rle(ios_t *f, jl_array_t *linkids, jl_array_t *buildids) +{ + size_t l = jl_array_len(linkids); + write_uint32(f, l); + if (!buildids || l == 0) { + assert(l == 0); + } else { + uint64_t *ids = (uint64_t*)jl_array_data(linkids); + size_t i0 = 0, i = 1; + uint64_t id = ids[i0]; + while (i <= l) { + while (i < l && ids[i] == id) + i++; + write_uint32(f, find_id(id, buildids)); + write_uint32(f, i - i0); + i0 = i; + if (i0 < l) + id = ids[i0]; + i++; + } + } + // end-of-list sentinel + write_uint32(f, 0); + write_uint32(f, 0); +} + +static jl_array_t *read_linkids_rle(ios_t *f, jl_array_t *buildids) +{ + jl_array_t *linkids = NULL; + size_t l = read_uint32(f); + if (!buildids || l == 0) { + assert(l == 0); + } else { + linkids = jl_alloc_array_1d(jl_array_uint64_type, l); + uint64_t *ids = (uint64_t*)jl_array_data(linkids), *bids = (uint64_t*)jl_array_data(buildids); + size_t i = 0, j; + while (i < l) { + uint32_t k = read_uint32(f); + assert(k < jl_array_len(buildids)); + uint64_t id = bids[k]; + uint32_t nrpt = read_uint32(f); + for (j = 0; j < nrpt; j++, i++) + ids[i] = id; + } + assert(i == l); + } + // end-of-list sentinel + uint32_t dummy = read_uint32(f); + assert(dummy == 0); + dummy = read_uint32(f); + assert(dummy == 0); + (void)dummy; + return linkids; +} + // Decide if `t` must be new, because it points to something new. // If it is new, the object (in particular, the super field) might not be entirely // valid for the cache, so we want to finish transforming it before attempting