The allocator described here gives you a simple, yet effective
replacement for your compilers new/delete library
functions. In the following sections we describe classes for the
layout of a memory chunk, for the layout of a memory page, for an
allocator of fixed sized chunks and for an integrating segregated
allocator. For results using this allocator see our tests.
List management in the allocator is implemented using intrusive lists. Sometimes there is a need to determine the binary logarithm of a number. Memory for management information is allocated using a special internal allocation interface. The allocation of memory pages from the operating system is task of the system allocator.
The first stage designing our allocator is to layout a chunk
of memory. A chunk contains a back pointer to a page, to which
the chunk belongs, the (net) size of the chunk, a list entry for
free list management, and a field for alignment. The second part
(list entry and alignment field) belongs to the allocation unit,
if the chunk is in use, so the overhead per allocation unit is sizeof
(CPage *)+ sizeof (unsigned), typically 8 bytes. On the
other hand the minimal size of an allocation unit is sizeof
(CChunkListEntry)+ sizeof (unsigned), typically 8 bytes.
From this, the allocation unit should be aligned to 8 bytes,
given the chunk is aligned to 8 bytes.
Dereferencing an application pointer in the GetChunk
method contains one guards. The back pointer to the page must be
writable.
class CChunk;
typedef SListEntry< CChunk > CChunkListEntry;
typedef SListHead< CChunk > CChunkListHead;
class CPage;
class CChunk {
private:
CPage *m_page;
unsigned m_size;
CChunkListEntry m_list_entry;
unsigned m_align;
public:
CChunk (CPage *page):
m_page (page), m_size (0), m_list_entry () {}
~CChunk (void) {}
static CChunk *GetThis (CChunkListEntry *list_entry) {
return (CChunk *)((char *) list_entry - (char *) &((CChunk *) 0)->m_list_entry);
}
CPage *GetPage (void) const {
return m_page;
}
unsigned GetNetSize (void) const {
return m_size;
}
CChunkListEntry *GetListEntry (void) {
return &m_list_entry;
}
static CChunk *GetChunk (void *ptr) {
CChunk *chunk = (CChunk *) ((char *) ptr - (sizeof (CPage *) + sizeof (unsigned)));
if (IsBadWritePtr (chunk, sizeof (CChunk)))
return 0;
return chunk;
}
static unsigned GetChunkSize (unsigned size) {
return max (sizeof (CPage *) + sizeof (unsigned) + (size), sizeof(CChunk));
}
void *Allocate (unsigned size) {
m_size = size;
return (char *) this + sizeof (CPage *) + sizeof (unsigned);
}
void *Reallocate (unsigned size) {
if (! m_size)
return 0;
m_size = size;
return (char *) this + sizeof (CPage *) + sizeof (unsigned);
}
void Deallocate (void) {
if (! m_size)
return;
m_size = 0;
}
};
The second stage designing our allocator is to layout a page of memory. A page collects a number of chunks in a contiguous memory zone. The describing attributes of a page include count and (net) size of the chunks, the (gross) chunk size, a pointer to the memory zone, the maximum number of recently used chunks, a list head for free list management of chunks and a list entry for page list management.
The more complex method GetChunkCount calculates
an appropriate count of chunks for a page. The results tabulated
for the powers of two are here:
Intel |
Alpha |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
The method Allocate checks, whether the page is
full. If not, it tries to use the free list to recycle the first
free chunk. If there is no such free chunk, it initializes an
unused chunk using operator placement new. The
method Deallocate links a chunk into the free list.
The static method Create reserves a new page
using the system allocator with
operator placement new. The static
method Destroy releases a page calling the
destructor and freeing the associated memory.
#ifdef _M_IX86
#define MIN_CHUNK_PER_OS_CHUNK 15
#define MIN_CHUNK_PER_OS_PAGE 3
#endif
#ifdef _M_ALPHA
#define MIN_CHUNK_PER_OS_CHUNK 7
#define MIN_CHUNK_PER_OS_PAGE 3
#endif
class CPage;
typedef DListEntry< CPage > CPageListEntry;
typedef DListHead< CPage > CPageListHead;
class CPage {
private:
unsigned m_count;
unsigned m_size;
unsigned m_chunk_size;
void *m_ptr;
unsigned m_max_used;
CChunkListHead m_free_list_head;
CPageListEntry m_list_entry;
public:
CPage (unsigned count, unsigned size, void *ptr):
m_count (count), m_size (size),
m_chunk_size (CChunk::GetChunkSize (size)),
m_ptr (ptr), m_max_used (0), m_free_list_head (),
m_list_entry () {}
~CPage (void) {
CChunkListEntry *list_entry = m_free_list_head.GetFirst ();
while (list_entry) {
m_free_list_head.Unlink (list_entry);
list_entry = m_free_list_head.GetFirst ();
}
while (m_max_used > 0) {
-- m_max_used;
CChunk *chunk = operator [] (m_max_used);
chunk->~CChunk ();
}
}
CChunk *operator [] (int i) const {
return (CChunk *) ((char *) m_ptr + i * m_chunk_size);
}
static CPage *GetThis (CPageListEntry *list_entry) {
return (CPage *)((char *) list_entry - (char *) &((CPage *) 0)->m_list_entry);
}
unsigned GetChunkCount (void) const {
return m_count;
}
unsigned GetNetSize (void) const {
return m_size;
}
unsigned GetUsed (void) const {
return m_max_used - m_free_list_head.GetCount ();
}
unsigned GetMaxUsed (void) const {
return m_max_used;
}
CPageListEntry *GetListEntry (void) {
return &m_list_entry;
}
static unsigned GetPageSize (unsigned count, unsigned size) {
return sizeof (CPage) + count * CChunk::GetChunkSize (size);
}
static unsigned GetChunkCount (unsigned size) {
unsigned chunk_size = CChunk::GetChunkSize (size);
unsigned header_size = sizeof (void *) + sizeof (CPage);
if (chunk_size <= OS_CHUNK_SIZE - header_size) {
unsigned estimated_size = header_size + MIN_CHUNK_PER_OS_CHUNK * chunk_size;
unsigned block_size = max (1 << CEIL_ILOGB (estimated_size), OS_CHUNK_SIZE);
unsigned chunk_count = (block_size - header_size) / chunk_size;
return chunk_count;
} else if (chunk_size <= OS_PAGE_SIZE - header_size) {
unsigned estimated_size = header_size + MIN_CHUNK_PER_OS_PAGE * chunk_size;
unsigned block_size = max (1 << CEIL_ILOGB (estimated_size), OS_PAGE_SIZE);
unsigned chunk_count = (block_size - header_size) / chunk_size;
return chunk_count;
} else {
unsigned estimated_size = header_size + chunk_size;
unsigned block_size = 1 << CEIL_ILOGB (estimated_size);
unsigned chunk_count = (block_size - header_size) / chunk_size;
return chunk_count;
}
}
void *Allocate (unsigned size) {
if (GetUsed () < m_count) {
CChunk *chunk = 0;
CChunkListEntry *list_entry = m_free_list_head.GetFirst ();
if (list_entry) {
chunk = CChunk::GetThis (list_entry);
m_free_list_head.Unlink (list_entry);
} else {
chunk = new (operator [] (m_max_used)) CChunk (this);
++ m_max_used;
}
return chunk->Allocate (size);
}
return 0;
}
bool Deallocate (CChunk *chunk) {
if (((char *) chunk - (char *) m_ptr) / m_chunk_size < m_max_used) {
chunk->Deallocate ();
m_free_list_head.Link (chunk->GetListEntry ());
return true;
}
return false;
}
static CPage *Create (unsigned page_size, unsigned count, unsigned size) {
void *ptr = pmalloc (page_size);
if (! ptr)
return 0;
return new (ptr) CPage (count, size, (char *) ptr + sizeof (CPage));
}
static void Destroy (CPage *page) {
page->~CPage ();
pfree (page);
}
};
The third stage implementing our allocator is the description of a fixed size allocator. Its task is to manage a linked list of pages for chunks of a certain size. The describing attributes of a fixed size allocator include count and (net) size of the chunks and list heads for list management of pages.
Pages are flipped forth and back between a used and a free list, corresponding whether they are full or partially full. This ensures constant time lookup for a page containing free chunks.
A tricky part hereby is, that the fixed size allocator uses the maximum number of recently used chunks in a page to decide, whether an empty page is released to the operating system. If a page is empty and had not been completely filled, it is not released.
class CFixedAllocator {
private:
unsigned m_count;
unsigned m_size;
CPageListHead m_free_list_head;
CPageListHead m_used_list_head;
unsigned m_page_size;
public:
CFixedAllocator (void):
m_count (0), m_size (0),
m_free_list_head (), m_used_list_head (),
m_page_size (0) {}
~CFixedAllocator (void) {
CPageListEntry *list_entry;
while ((list_entry = m_free_list_head.GetFirst ()) != 0) {
CPage *page = CPage::GetThis (list_entry);
m_free_list_head.Unlink (list_entry);
CPage::Destroy (page);
}
while ((list_entry = m_used_list_head.GetFirst ()) != 0) {
CPage *page = CPage::GetThis (list_entry);
m_free_list_head.Unlink (list_entry);
CPage::Destroy (page);
}
}
void SetNetSize (unsigned size) {
m_count = CPage::GetChunkCount (size);
m_size = size;
m_page_size = CPage::GetPageSize (m_count, m_size);
}
void *Allocate (unsigned size) {
CPageListEntry *list_entry = m_free_list_head.GetFirst ();
if (! list_entry) {
CPage *page = CPage::Create (m_page_size, m_count, m_size);
if (! page)
return 0;
list_entry = page->GetListEntry ();
m_free_list_head.Link (list_entry);
}
CPage *page = CPage::GetThis (list_entry);
void *ptr = page->Allocate (size);
if (page->GetUsed () == page->GetChunkCount ()) {
m_free_list_head.Unlink (list_entry);
m_used_list_head.Link (list_entry);
}
return ptr;
}
void Deallocate (CChunk *chunk) {
CPage *page = chunk->GetPage ();
if (page->Deallocate (chunk)) {
if (page->GetUsed () + 1 == page->GetChunkCount ()) {
CPageListEntry *list_entry = page->GetListEntry ();
m_used_list_head.Unlink (list_entry);
m_free_list_head.Link (list_entry);
}
}
if (page->GetUsed () == 0 &&
page->GetMaxUsed () == page->GetChunkCount ()) {
m_free_list_head.Unlink (page->GetListEntry ());
CPage::Destroy (page);
}
}
};
The fourth and last stage implementing our allocator is the description of a segregated allocator. It dispatches the allocation and deallocation requests to the appropriate fixed size allocator.
The segregated allocator is thread safe, because it uses a critical section for mutual exclusion.
The method Allocate compares the requested size
to the minimal size of an allocation unit. The method Deallocate
guards against double deallocation checking the (net) size
of the chunk. If its size is 0 due to a recent deallocation, the
actual request is ignored and the internal lists are guarded.
class CSegregatedAllocator {
private:
CCriticalSection m_critical_section;
CFixedAllocator m_fixed_allocator [32];
int m_min_log_chunk_size;
public:
CSegregatedAllocator (void):
m_critical_section () {
for (int i = 0; i < 32; ++ i)
m_fixed_allocator [i].SetNetSize (1 << i);
m_min_log_chunk_size = CEIL_ILOGB (CChunk::GetChunkSize (1));
}
~CSegregatedAllocator (void) {}
void *Allocate (unsigned size) {
CCriticalSectionGuard guard (m_critical_section);
if (size == 0)
size = 1;
int j = CEIL_ILOGB (size);
if (j < m_min_log_chunk_size)
j = m_min_log_chunk_size;
return m_fixed_allocator [j].Allocate (size);
}
void *Reallocate (void *ptr, unsigned size) {
CCriticalSectionGuard guard (m_critical_section);
CChunk *chunk = CChunk::GetChunk (ptr);
if (! chunk)
return 0;
int i = CEIL_ILOGB (chunk->GetNetSize ());
if (i < 0)
return 0;
if (i < m_min_log_chunk_size)
i = m_min_log_chunk_size;
if (size == 0)
size = 1;
int j = CEIL_ILOGB (size);
if (j < m_min_log_chunk_size)
j = m_min_log_chunk_size;
if (i == j)
return chunk->Reallocate (size);
ptr = m_fixed_allocator [j].Allocate (size);
if (! ptr)
return 0;
memmove (ptr, chunk->GetListEntry (), min (size, chunk->GetNetSize ()));
m_fixed_allocator [i].Deallocate (chunk);
return ptr;
}
void Deallocate (void *ptr) {
CCriticalSectionGuard guard (m_critical_section);
CChunk *chunk = CChunk::GetChunk (ptr);
if (! chunk)
return;
int i = CEIL_ILOGB (chunk->GetNetSize ());
if (i < 0)
return;
if (i < m_min_log_chunk_size)
i = m_min_log_chunk_size;
m_fixed_allocator [i].Deallocate (chunk);
}
};
To test distinct allocation mechanisms, we implemented a
procedural interface for external allocation. This also allows
proper initialization of a singleton segregated allocator. The
initialization of the singleton works using internal
allocation with operator placement new.
Global operator new and delete then
use this procedural interface.
#define EXTERNAL_MS_CRTL 0
#define EXTERNAL_MS_COM 1
#define EXTERNAL_MS_HEAP 2
#define EXTERNAL_DL 3
#define EXTERNAL_OWN 4
int etype = EXTERNAL_MS_CRTL;
IMalloc *g_com_allocator;
CSegregatedAllocator *g_segregated_allocator;
extern "C" {
void *emalloc (unsigned size);
void *erealloc (void *ptr, unsigned size);
void efree (void *ptr);
};
void *emalloc (unsigned size) {
switch (etype) {
case EXTERNAL_MS_CRTL:
return malloc (size);
case EXTERNAL_MS_COM: {
if (! g_com_allocator) {
if (CoGetMalloc (1, &g_com_allocator) != S_OK)
return 0;
}
return g_com_allocator->Alloc (size);
}
case EXTERNAL_MS_HEAP:
return ::HeapAlloc (::GetProcessHeap (), 0, size);
case EXTERNAL_DL:
return dlmalloc (size);
case EXTERNAL_OWN: {
if (! g_segregated_allocator) {
void *ptr = imalloc (sizeof (CSegregatedAllocator));
if (! ptr)
return 0;
g_segregated_allocator = new (ptr) CSegregatedAllocator;
if (! g_segregated_allocator)
return 0;
}
void *ptr = g_segregated_allocator->Allocate (size);
return ptr;
}
default:
return 0;
}
}
void *erealloc (void *ptr, unsigned size) {
switch (etype) {
case EXTERNAL_MS_CRTL:
return realloc (ptr, size);
case EXTERNAL_MS_COM: {
if (! g_com_allocator)
return 0;
return g_com_allocator->Realloc (ptr, size);
}
case EXTERNAL_MS_HEAP:
return ::HeapReAlloc (::GetProcessHeap (), 0, ptr, size);
case EXTERNAL_DL:
return dlrealloc (ptr, size);
case EXTERNAL_OWN: {
if (! g_segregated_allocator)
return 0;
ptr = g_segregated_allocator->Reallocate (ptr, size);
return ptr;
}
default:
return 0;
}
}
void efree (void *ptr) {
switch (etype) {
case EXTERNAL_MS_CRTL:
free (ptr);
return;
case EXTERNAL_MS_COM: {
if (! g_com_allocator)
return;
g_com_allocator->Free (ptr);
}
return;
case EXTERNAL_MS_HEAP:
if (! ::HeapFree (::GetProcessHeap (), 0, ptr))
throw COSException (__FILE__, __LINE__);
return;
case EXTERNAL_DL:
dlfree (ptr);
return;
case EXTERNAL_OWN: {
if (! g_segregated_allocator)
return;
g_segregated_allocator->Deallocate (ptr);
}
return;
}
}
void * __cdecl operator new (unsigned size) {
return emalloc (size);
}
void * __cdecl operator new [] (unsigned size) {
return emalloc (size);
}
void __cdecl operator delete (void *ptr) {
efree (ptr);
}
void __cdecl operator delete [] (void *ptr) {
efree (ptr);
}
© 2000-2002 GeNeSys
mbH & Co. KG
Last revised: 12/14/2000