Skip to content

Commit f8f2c8c

Browse files
authored
Merge pull request #87814 from bruvzg/memalign
[Core] Improve `CowData` and `Memory` metadata alignment.
2 parents 63d6bda + 7bcb419 commit f8f2c8c

File tree

3 files changed

+99
-52
lines changed

3 files changed

+99
-52
lines changed

core/os/memory.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -72,23 +72,23 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
7272
bool prepad = p_pad_align;
7373
#endif
7474

75-
void *mem = malloc(p_bytes + (prepad ? PAD_ALIGN : 0));
75+
void *mem = malloc(p_bytes + (prepad ? DATA_OFFSET : 0));
7676

7777
ERR_FAIL_NULL_V(mem, nullptr);
7878

7979
alloc_count.increment();
8080

8181
if (prepad) {
82-
uint64_t *s = (uint64_t *)mem;
83-
*s = p_bytes;
84-
8582
uint8_t *s8 = (uint8_t *)mem;
8683

84+
uint64_t *s = (uint64_t *)(s8 + SIZE_OFFSET);
85+
*s = p_bytes;
86+
8787
#ifdef DEBUG_ENABLED
8888
uint64_t new_mem_usage = mem_usage.add(p_bytes);
8989
max_usage.exchange_if_greater(new_mem_usage);
9090
#endif
91-
return s8 + PAD_ALIGN;
91+
return s8 + DATA_OFFSET;
9292
} else {
9393
return mem;
9494
}
@@ -108,8 +108,8 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
108108
#endif
109109

110110
if (prepad) {
111-
mem -= PAD_ALIGN;
112-
uint64_t *s = (uint64_t *)mem;
111+
mem -= DATA_OFFSET;
112+
uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET);
113113

114114
#ifdef DEBUG_ENABLED
115115
if (p_bytes > *s) {
@@ -126,14 +126,14 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
126126
} else {
127127
*s = p_bytes;
128128

129-
mem = (uint8_t *)realloc(mem, p_bytes + PAD_ALIGN);
129+
mem = (uint8_t *)realloc(mem, p_bytes + DATA_OFFSET);
130130
ERR_FAIL_NULL_V(mem, nullptr);
131131

132-
s = (uint64_t *)mem;
132+
s = (uint64_t *)(mem + SIZE_OFFSET);
133133

134134
*s = p_bytes;
135135

136-
return mem + PAD_ALIGN;
136+
return mem + DATA_OFFSET;
137137
}
138138
} else {
139139
mem = (uint8_t *)realloc(mem, p_bytes);
@@ -158,10 +158,10 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) {
158158
alloc_count.decrement();
159159

160160
if (prepad) {
161-
mem -= PAD_ALIGN;
161+
mem -= DATA_OFFSET;
162162

163163
#ifdef DEBUG_ENABLED
164-
uint64_t *s = (uint64_t *)mem;
164+
uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET);
165165
mem_usage.sub(*s);
166166
#endif
167167

core/os/memory.h

+25-10
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@
3838
#include <new>
3939
#include <type_traits>
4040

41-
#ifndef PAD_ALIGN
42-
#define PAD_ALIGN 16 //must always be greater than this at much
43-
#endif
44-
4541
class Memory {
4642
#ifdef DEBUG_ENABLED
4743
static SafeNumeric<uint64_t> mem_usage;
@@ -51,6 +47,17 @@ class Memory {
5147
static SafeNumeric<uint64_t> alloc_count;
5248

5349
public:
50+
// Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t
51+
// ┌─────────────────┬──┬────────────────┬──┬───────────...
52+
// │ uint64_t │░░│ uint64_t │░░│ T[]
53+
// │ alloc size │░░│ element count │░░│ data
54+
// └─────────────────┴──┴────────────────┴──┴───────────...
55+
// Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET
56+
57+
static constexpr size_t SIZE_OFFSET = 0;
58+
static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
59+
static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));
60+
5461
static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
5562
static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
5663
static void free_static(void *p_ptr, bool p_pad_align = false);
@@ -133,6 +140,10 @@ void memdelete_allocator(T *p_class) {
133140

134141
#define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)
135142

143+
_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) {
144+
return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET);
145+
}
146+
136147
template <typename T>
137148
T *memnew_arr_template(size_t p_elements) {
138149
if (p_elements == 0) {
@@ -142,10 +153,12 @@ T *memnew_arr_template(size_t p_elements) {
142153
same strategy used by std::vector, and the Vector class, so it should be safe.*/
143154

144155
size_t len = sizeof(T) * p_elements;
145-
uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true);
156+
uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true);
146157
T *failptr = nullptr; //get rid of a warning
147158
ERR_FAIL_NULL_V(mem, failptr);
148-
*(mem - 1) = p_elements;
159+
160+
uint64_t *_elem_count_ptr = _get_element_count_ptr(mem);
161+
*(_elem_count_ptr) = p_elements;
149162

150163
if constexpr (!std::is_trivially_constructible_v<T>) {
151164
T *elems = (T *)mem;
@@ -166,16 +179,18 @@ T *memnew_arr_template(size_t p_elements) {
166179

167180
template <typename T>
168181
size_t memarr_len(const T *p_class) {
169-
uint64_t *ptr = (uint64_t *)p_class;
170-
return *(ptr - 1);
182+
uint8_t *ptr = (uint8_t *)p_class;
183+
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
184+
return *(_elem_count_ptr);
171185
}
172186

173187
template <typename T>
174188
void memdelete_arr(T *p_class) {
175-
uint64_t *ptr = (uint64_t *)p_class;
189+
uint8_t *ptr = (uint8_t *)p_class;
176190

177191
if constexpr (!std::is_trivially_destructible_v<T>) {
178-
uint64_t elem_count = *(ptr - 1);
192+
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
193+
uint64_t elem_count = *(_elem_count_ptr);
179194

180195
for (uint64_t i = 0; i < elem_count; i++) {
181196
p_class[i].~T();

core/templates/cowdata.h

+62-30
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class CharString;
4646
template <class T, class V>
4747
class VMap;
4848

49-
SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t)
49+
static_assert(std::is_trivially_destructible_v<std::atomic<uint64_t>>);
5050

5151
// Silence a false positive warning (see GH-52119).
5252
#if defined(__GNUC__) && !defined(__clang__)
@@ -89,26 +89,47 @@ class CowData {
8989
return ++x;
9090
}
9191

92-
static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount.
92+
// Alignment: ↓ max_align_t ↓ USize ↓ max_align_t
93+
// ┌────────────────────┬──┬─────────────┬──┬───────────...
94+
// │ SafeNumeric<USize> │░░│ USize │░░│ T[]
95+
// │ ref. count │░░│ data size │░░│ data
96+
// └────────────────────┴──┴─────────────┴──┴───────────...
97+
// Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET
98+
99+
static constexpr size_t REF_COUNT_OFFSET = 0;
100+
static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize)));
101+
static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t)));
93102

94103
mutable T *_ptr = nullptr;
95104

96105
// internal helpers
97106

107+
static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
108+
return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
109+
}
110+
111+
static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
112+
return (USize *)(p_ptr + SIZE_OFFSET);
113+
}
114+
115+
static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
116+
return (T *)(p_ptr + DATA_OFFSET);
117+
}
118+
98119
_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
99120
if (!_ptr) {
100121
return nullptr;
101122
}
102123

103-
return reinterpret_cast<SafeNumeric<USize> *>(_ptr) - 2;
124+
return (SafeNumeric<USize> *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET);
104125
}
105126

106127
_FORCE_INLINE_ USize *_get_size() const {
107128
if (!_ptr) {
108129
return nullptr;
109130
}
110131

111-
return reinterpret_cast<USize *>(_ptr) - 1;
132+
return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
112133
}
113134

114135
_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
@@ -244,7 +265,7 @@ void CowData<T>::_unref(void *p_data) {
244265
}
245266

246267
// free mem
247-
Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false);
268+
Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false);
248269
}
249270

250271
template <class T>
@@ -260,26 +281,27 @@ typename CowData<T>::USize CowData<T>::_copy_on_write() {
260281
/* in use by more than me */
261282
USize current_size = *_get_size();
262283

263-
USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false);
264-
mem_new += 2;
284+
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
285+
ERR_FAIL_NULL_V(mem_new, 0);
265286

266-
new (mem_new - 2) SafeNumeric<USize>(1); //refcount
267-
*(mem_new - 1) = current_size; //size
287+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
288+
USize *_size_ptr = _get_size_ptr(mem_new);
289+
T *_data_ptr = _get_data_ptr(mem_new);
268290

269-
T *_data = (T *)(mem_new);
291+
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
292+
*(_size_ptr) = current_size; //size
270293

271294
// initialize new elements
272295
if constexpr (std::is_trivially_copyable_v<T>) {
273-
memcpy(mem_new, _ptr, current_size * sizeof(T));
274-
296+
memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
275297
} else {
276298
for (USize i = 0; i < current_size; i++) {
277-
memnew_placement(&_data[i], T(_ptr[i]));
299+
memnew_placement(&_data_ptr[i], T(_ptr[i]));
278300
}
279301
}
280302

281303
_unref(_ptr);
282-
_ptr = _data;
304+
_ptr = _data_ptr;
283305

284306
rc = 1;
285307
}
@@ -315,21 +337,28 @@ Error CowData<T>::resize(Size p_size) {
315337
if (alloc_size != current_alloc_size) {
316338
if (current_size == 0) {
317339
// alloc from scratch
318-
USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false);
319-
ptr += 2;
320-
ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY);
321-
*(ptr - 1) = 0; //size, currently none
322-
new (ptr - 2) SafeNumeric<USize>(1); //refcount
340+
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
341+
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
342+
343+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
344+
USize *_size_ptr = _get_size_ptr(mem_new);
345+
T *_data_ptr = _get_data_ptr(mem_new);
323346

324-
_ptr = (T *)ptr;
347+
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
348+
*(_size_ptr) = 0; //size, currently none
349+
350+
_ptr = _data_ptr;
325351

326352
} else {
327-
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
328-
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
329-
_ptrnew += 2;
330-
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
353+
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
354+
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
355+
356+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
357+
T *_data_ptr = _get_data_ptr(mem_new);
331358

332-
_ptr = (T *)(_ptrnew);
359+
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
360+
361+
_ptr = _data_ptr;
333362
}
334363
}
335364

@@ -355,12 +384,15 @@ Error CowData<T>::resize(Size p_size) {
355384
}
356385

357386
if (alloc_size != current_alloc_size) {
358-
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
359-
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
360-
_ptrnew += 2;
361-
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
387+
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
388+
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
389+
390+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
391+
T *_data_ptr = _get_data_ptr(mem_new);
392+
393+
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
362394

363-
_ptr = (T *)(_ptrnew);
395+
_ptr = _data_ptr;
364396
}
365397

366398
*_get_size() = p_size;

0 commit comments

Comments
 (0)