Skip to content

Commit b173a4d

Browse files
committed
[Core] Improve CowData and Memory metadata alignment.
1 parent 36847f6 commit b173a4d

File tree

3 files changed

+105
-52
lines changed

3 files changed

+105
-52
lines changed

include/godot_cpp/core/memory.hpp

+34-12
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,6 @@
4040

4141
#include <type_traits>
4242

43-
#ifndef PAD_ALIGN
44-
#define PAD_ALIGN 16 //must always be greater than this at much
45-
#endif
46-
4743
// p_dummy argument is added to avoid conflicts with the engine functions when both engine and GDExtension are built as a static library on iOS.
4844
void *operator new(size_t p_size, const char *p_dummy, const char *p_description); ///< operator new that takes a description and uses MemoryStaticPool
4945
void *operator new(size_t p_size, const char *p_dummy, void *(*p_allocfunc)(size_t p_size)); ///< operator new that takes a description and uses MemoryStaticPool
@@ -69,6 +65,18 @@ class Memory {
6965
Memory();
7066

7167
public:
68+
// Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t
69+
// ┌─────────────────┬──┬────────────────┬──┬───────────...
70+
// │ uint64_t │░░│ uint64_t │░░│ T[]
71+
// │ alloc size │░░│ element count │░░│ data
72+
// └─────────────────┴──┴────────────────┴──┴───────────...
73+
// Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET
74+
// Note: "alloc size" is used and set by the engine and is never accessed or changed for the extension.
75+
76+
static constexpr size_t SIZE_OFFSET = 0;
77+
static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
78+
static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));
79+
7280
static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
7381
static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
7482
static void free_static(void *p_ptr, bool p_pad_align = false);
@@ -99,7 +107,7 @@ struct Comparator {
99107

100108
template <class T>
101109
void memdelete(T *p_class, typename std::enable_if<!std::is_base_of_v<godot::Wrapped, T>>::type * = nullptr) {
102-
if (!std::is_trivially_destructible<T>::value) {
110+
if constexpr (!std::is_trivially_destructible_v<T>) {
103111
p_class->~T();
104112
}
105113

@@ -113,7 +121,7 @@ void memdelete(T *p_class) {
113121

114122
template <class T, class A>
115123
void memdelete_allocator(T *p_class) {
116-
if (!std::is_trivially_destructible<T>::value) {
124+
if constexpr (!std::is_trivially_destructible_v<T>) {
117125
p_class->~T();
118126
}
119127

@@ -136,6 +144,10 @@ class DefaultTypedAllocator {
136144

137145
#define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)
138146

147+
_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) {
148+
return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET);
149+
}
150+
139151
template <typename T>
140152
T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
141153
if (p_elements == 0) {
@@ -145,12 +157,14 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
145157
same strategy used by std::vector, and the Vector class, so it should be safe.*/
146158

147159
size_t len = sizeof(T) * p_elements;
148-
uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true);
160+
uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true);
149161
T *failptr = nullptr; // Get rid of a warning.
150162
ERR_FAIL_NULL_V(mem, failptr);
151-
*(mem - 1) = p_elements;
152163

153-
if (!std::is_trivially_destructible<T>::value) {
164+
uint64_t *_elem_count_ptr = _get_element_count_ptr(mem);
165+
*(_elem_count_ptr) = p_elements;
166+
167+
if constexpr (!std::is_trivially_destructible_v<T>) {
154168
T *elems = (T *)mem;
155169

156170
/* call operator new */
@@ -162,12 +176,20 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
162176
return (T *)mem;
163177
}
164178

179+
template <typename T>
180+
size_t memarr_len(const T *p_class) {
181+
uint8_t *ptr = (uint8_t *)p_class;
182+
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
183+
return *(_elem_count_ptr);
184+
}
185+
165186
template <typename T>
166187
void memdelete_arr(T *p_class) {
167-
uint64_t *ptr = (uint64_t *)p_class;
188+
uint8_t *ptr = (uint8_t *)p_class;
168189

169-
if (!std::is_trivially_destructible<T>::value) {
170-
uint64_t elem_count = *(ptr - 1);
190+
if constexpr (!std::is_trivially_destructible_v<T>) {
191+
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
192+
uint64_t elem_count = *(_elem_count_ptr);
171193

172194
for (uint64_t i = 0; i < elem_count; i++) {
173195
p_class[i].~T();

include/godot_cpp/templates/cowdata.hpp

+65-34
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class VMap;
5252
template <class T>
5353
class CharStringT;
5454

55-
SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t)
55+
static_assert(std::is_trivially_destructible_v<std::atomic<uint64_t>>);
5656

5757
// Silence a false positive warning (see GH-52119).
5858
#if defined(__GNUC__) && !defined(__clang__)
@@ -96,26 +96,47 @@ class CowData {
9696
return ++x;
9797
}
9898

99-
static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount.
99+
// Alignment: ↓ max_align_t ↓ USize ↓ max_align_t
100+
// ┌────────────────────┬──┬─────────────┬──┬───────────...
101+
// │ SafeNumeric<USize> │░░│ USize │░░│ T[]
102+
// │ ref. count │░░│ data size │░░│ data
103+
// └────────────────────┴──┴─────────────┴──┴───────────...
104+
// Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET
105+
106+
static constexpr size_t REF_COUNT_OFFSET = 0;
107+
static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize)));
108+
static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t)));
100109

101110
mutable T *_ptr = nullptr;
102111

103112
// internal helpers
104113

114+
static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
115+
return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
116+
}
117+
118+
static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
119+
return (USize *)(p_ptr + SIZE_OFFSET);
120+
}
121+
122+
static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
123+
return (T *)(p_ptr + DATA_OFFSET);
124+
}
125+
105126
_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
106127
if (!_ptr) {
107128
return nullptr;
108129
}
109130

110-
return reinterpret_cast<SafeNumeric<USize> *>(_ptr) - 2;
131+
return (SafeNumeric<USize> *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET);
111132
}
112133

113134
_FORCE_INLINE_ USize *_get_size() const {
114135
if (!_ptr) {
115136
return nullptr;
116137
}
117138

118-
return reinterpret_cast<USize *>(_ptr) - 1;
139+
return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
119140
}
120141

121142
_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
@@ -240,7 +261,7 @@ void CowData<T>::_unref(void *p_data) {
240261
}
241262
// clean up
242263

243-
if (!std::is_trivially_destructible<T>::value) {
264+
if constexpr (!std::is_trivially_destructible_v<T>) {
244265
USize *count = _get_size();
245266
T *data = (T *)(count + 1);
246267

@@ -251,7 +272,7 @@ void CowData<T>::_unref(void *p_data) {
251272
}
252273

253274
// free mem
254-
Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false);
275+
Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false);
255276
}
256277

257278
template <class T>
@@ -267,26 +288,27 @@ typename CowData<T>::USize CowData<T>::_copy_on_write() {
267288
/* in use by more than me */
268289
USize current_size = *_get_size();
269290

270-
USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false);
271-
mem_new += 2;
291+
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
292+
ERR_FAIL_NULL_V(mem_new, 0);
272293

273-
new (mem_new - 2) SafeNumeric<USize>(1); //refcount
274-
*(mem_new - 1) = current_size; //size
294+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
295+
USize *_size_ptr = _get_size_ptr(mem_new);
296+
T *_data_ptr = _get_data_ptr(mem_new);
275297

276-
T *_data = (T *)(mem_new);
298+
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
299+
*(_size_ptr) = current_size; //size
277300

278301
// initialize new elements
279-
if (std::is_trivially_copyable<T>::value) {
280-
memcpy(mem_new, _ptr, current_size * sizeof(T));
281-
302+
if constexpr (std::is_trivially_copyable_v<T>) {
303+
memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
282304
} else {
283305
for (USize i = 0; i < current_size; i++) {
284-
memnew_placement(&_data[i], T(_ptr[i]));
306+
memnew_placement(&_data_ptr[i], T(_ptr[i]));
285307
}
286308
}
287309

288310
_unref(_ptr);
289-
_ptr = _data;
311+
_ptr = _data_ptr;
290312

291313
rc = 1;
292314
}
@@ -322,27 +344,33 @@ Error CowData<T>::resize(Size p_size) {
322344
if (alloc_size != current_alloc_size) {
323345
if (current_size == 0) {
324346
// alloc from scratch
325-
USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false);
326-
ptr += 2;
327-
ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY);
328-
*(ptr - 1) = 0; //size, currently none
329-
new (ptr - 2) SafeNumeric<USize>(1); //refcount
347+
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
348+
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
349+
350+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
351+
USize *_size_ptr = _get_size_ptr(mem_new);
352+
T *_data_ptr = _get_data_ptr(mem_new);
330353

331-
_ptr = (T *)ptr;
354+
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
355+
*(_size_ptr) = 0; //size, currently none
332356

357+
_ptr = _data_ptr;
333358
} else {
334-
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
335-
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
336-
_ptrnew += 2;
337-
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
359+
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
360+
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
361+
362+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
363+
T *_data_ptr = _get_data_ptr(mem_new);
338364

339-
_ptr = (T *)(_ptrnew);
365+
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
366+
367+
_ptr = _data_ptr;
340368
}
341369
}
342370

343371
// construct the newly created elements
344372

345-
if (!std::is_trivially_constructible<T>::value) {
373+
if constexpr (!std::is_trivially_constructible_v<T>) {
346374
for (Size i = *_get_size(); i < p_size; i++) {
347375
memnew_placement(&_ptr[i], T);
348376
}
@@ -353,7 +381,7 @@ Error CowData<T>::resize(Size p_size) {
353381
*_get_size() = p_size;
354382

355383
} else if (p_size < current_size) {
356-
if (!std::is_trivially_destructible<T>::value) {
384+
if constexpr (!std::is_trivially_destructible_v<T>) {
357385
// deinitialize no longer needed elements
358386
for (USize i = p_size; i < *_get_size(); i++) {
359387
T *t = &_ptr[i];
@@ -362,12 +390,15 @@ Error CowData<T>::resize(Size p_size) {
362390
}
363391

364392
if (alloc_size != current_alloc_size) {
365-
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
366-
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
367-
_ptrnew += 2;
368-
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
393+
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
394+
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
395+
396+
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
397+
T *_data_ptr = _get_data_ptr(mem_new);
398+
399+
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
369400

370-
_ptr = (T *)(_ptrnew);
401+
_ptr = _data_ptr;
371402
}
372403

373404
*_get_size() = p_size;

src/core/memory.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
4141
bool prepad = p_pad_align;
4242
#endif
4343

44-
void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? PAD_ALIGN : 0));
44+
void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? DATA_OFFSET : 0));
4545
ERR_FAIL_NULL_V(mem, nullptr);
4646

4747
if (prepad) {
4848
uint8_t *s8 = (uint8_t *)mem;
49-
return s8 + PAD_ALIGN;
49+
return s8 + DATA_OFFSET;
5050
} else {
5151
return mem;
5252
}
@@ -69,10 +69,10 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
6969
#endif
7070

7171
if (prepad) {
72-
mem -= PAD_ALIGN;
73-
mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + PAD_ALIGN);
72+
mem -= DATA_OFFSET;
73+
mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + DATA_OFFSET);
7474
ERR_FAIL_NULL_V(mem, nullptr);
75-
return mem + PAD_ALIGN;
75+
return mem + DATA_OFFSET;
7676
} else {
7777
return (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes);
7878
}
@@ -88,7 +88,7 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) {
8888
#endif
8989

9090
if (prepad) {
91-
mem -= PAD_ALIGN;
91+
mem -= DATA_OFFSET;
9292
}
9393
internal::gdextension_interface_mem_free(mem);
9494
}

0 commit comments

Comments
 (0)