Navigate: Main, Bottom


Allocator

The allocator described here gives you a simple, yet effective replacement for your compilers new/delete library functions. In the following sections we describe classes for the layout of a memory chunk, for the layout of a memory page, for an allocator of fixed sized chunks and for an integrating segregated allocator. For results using this allocator see our tests.

List management in the allocator is implemented using intrusive lists. Sometimes there is a need to determine the binary logarithm of a number. Memory for management information is allocated using a special internal allocation interface. The allocation of memory pages from the operating system is task of the system allocator.

Chunk

The first stage designing our allocator is to layout a chunk of memory. A chunk contains a back pointer to a page, to which the chunk belongs, the (net) size of the chunk, a list entry for free list management, and a field for alignment. The second part (list entry and alignment field) belongs to the allocation unit, if the chunk is in use, so the overhead per allocation unit is sizeof (CPage *)+ sizeof (unsigned), typically 8 bytes. On the other hand the minimal size of an allocation unit is sizeof (CChunkListEntry)+ sizeof (unsigned), typically 8 bytes. From this, the allocation unit should be aligned to 8 bytes, given the chunk is aligned to 8 bytes.

Dereferencing an application pointer in the GetChunk method contains one guards. The back pointer to the page must be writable.

class CChunk;
typedef SListEntry< CChunk > CChunkListEntry;
typedef SListHead< CChunk > CChunkListHead;

class CPage;

class CChunk {
private:
    CPage *m_page;	
    unsigned m_size;
    CChunkListEntry m_list_entry;
    unsigned m_align;

public:
    CChunk (CPage *page): 
        m_page (page), m_size (0), m_list_entry () {}
    ~CChunk (void) {}

    static CChunk *GetThis (CChunkListEntry *list_entry) {
        return (CChunk *)((char *) list_entry - (char *) &((CChunk *) 0)->m_list_entry);
    }
    CPage *GetPage (void) const {
        return m_page;
    }
    unsigned GetNetSize (void) const {
        return m_size;
    }
    CChunkListEntry *GetListEntry (void) {
        return &m_list_entry;
    }
    static CChunk *GetChunk (void *ptr) {
        CChunk *chunk = (CChunk *) ((char *) ptr - (sizeof (CPage *) + sizeof (unsigned)));
        if (IsBadWritePtr (chunk, sizeof (CChunk)))
            return 0;
        return chunk;
    }
    static unsigned GetChunkSize (unsigned size) {
        return max (sizeof (CPage *) + sizeof (unsigned) + (size), sizeof(CChunk));
    }

    void *Allocate (unsigned size) {
        m_size = size;
        return (char *) this + sizeof (CPage *) + sizeof (unsigned);
    }
    void *Reallocate (unsigned size) {
        if (! m_size)
            return 0;
        m_size = size;
        return (char *) this + sizeof (CPage *) + sizeof (unsigned);
    }
    void Deallocate (void) {
        if (! m_size)
            return;
        m_size = 0;
    }
};

Page

The second stage designing our allocator is to layout a page of memory. A page collects a number of chunks in a contiguous memory zone. The describing attributes of a page include count and (net) size of the chunks, the (gross) chunk size, a pointer to the memory zone, the maximum number of recently used chunks, a list head for free list management of chunks and a list entry for page list management.

The more complex method GetChunkCount calculates an appropriate count of chunks for a page. The results tabulated for the powers of two are here:

Intel

Alpha

Size

Block Size

Count

Filled in %

1

4096

253

99.8047

2

4096

253

99.8047

4

4096

253

99.8047

8

4096

253

99.8047

16

4096

169

100

32

4096

101

99.6094

64

4096

56

99.4141

128

4096

29

97.2656

256

4096

15

97.6563

512

8192

15

95.7031

1024

16384

15

94.7266

2048

32768

15

94.2383

4096

65536

15

93.9941

8192

65536

7

87.6465

16384

65536

3

75.0977

32768

131072

3

75.0488

65536

131072

1

50.0366

131072

262144

1

50.0183

262144

524288

1

50.0092

524288

1048576

1

50.0046

1048576

2097152

1

50.0023

2097152

4194304

1

50.0011

4194304

8388608

1

50.0006

8388608

16777216

1

50.0003

16777216

33554432

1

50.0001

33554432

67108864

1

50.0001

67108864

134217728

1

50

134217728

268435456

1

50

268435456

536870912

1

50

536870912

1073741824

1

50

1073741824

2147483648

1

50

Size

Block Size

Count

Filled in %

1

8192

509

99.9023

2

8192

509

99.9023

4

8192

509

99.9023

8

8192

509

99.9023

16

8192

339

99.8047

32

8192

203

99.6094

64

8192

113

99.8047

128

8192

59

98.4375

256

8192

30

97.168

512

8192

15

95.7031

1024

8192

7

88.6719

2048

16384

7

88.0859

4096

32768

7

87.793

8192

65536

7

87.6465

16384

65536

3

75.0977

32768

131072

3

75.0488

65536

131072

1

50.0366

131072

262144

1

50.0183

262144

524288

1

50.0092

524288

1048576

1

50.0046

1048576

2097152

1

50.0023

2097152

4194304

1

50.0011

4194304

8388608

1

50.0006

8388608

16777216

1

50.0003

16777216

33554432

1

50.0001

33554432

67108864

1

50.0001

67108864

134217728

1

50

134217728

268435456

1

50

268435456

536870912

1

50

536870912

1073741824

1

50

1073741824

2147483648

1

50

The method Allocate checks, whether the page is full. If not, it tries to use the free list to recycle the first free chunk. If there is no such free chunk, it initializes an unused chunk using operator placement new. The method Deallocate links a chunk into the free list.

The static method Create reserves a new page using the system allocator with operator placement new. The static method Destroy releases a page calling the destructor and freeing the associated memory.

#ifdef _M_IX86
#define MIN_CHUNK_PER_OS_CHUNK	15
#define MIN_CHUNK_PER_OS_PAGE	3
#endif
#ifdef _M_ALPHA
#define MIN_CHUNK_PER_OS_CHUNK	7
#define MIN_CHUNK_PER_OS_PAGE	3
#endif

class CPage;
typedef DListEntry< CPage > CPageListEntry;
typedef DListHead< CPage > CPageListHead;

class CPage {
private:
    unsigned m_count;
    unsigned m_size;
    unsigned m_chunk_size;
    void *m_ptr;
    unsigned m_max_used;
    CChunkListHead m_free_list_head;
    CPageListEntry m_list_entry;

public:
    CPage (unsigned count, unsigned size, void *ptr): 
        m_count (count), m_size (size), 
        m_chunk_size (CChunk::GetChunkSize (size)), 
        m_ptr (ptr), m_max_used (0), m_free_list_head (), 
        m_list_entry () {}
    ~CPage (void) {
        CChunkListEntry *list_entry = m_free_list_head.GetFirst ();
        while (list_entry) {
            m_free_list_head.Unlink (list_entry);
            list_entry = m_free_list_head.GetFirst ();
        }
        while (m_max_used > 0) {
            -- m_max_used;
            CChunk *chunk = operator [] (m_max_used); 
            chunk->~CChunk (); 
        }
    }

    CChunk *operator [] (int i) const {
        return (CChunk *) ((char *) m_ptr + i * m_chunk_size);
    }

    static CPage *GetThis (CPageListEntry *list_entry) {
        return (CPage *)((char *) list_entry - (char *) &((CPage *) 0)->m_list_entry);
    }
    unsigned GetChunkCount (void) const {
        return m_count;			
    }
    unsigned GetNetSize (void) const {
        return m_size;
    }
    unsigned GetUsed (void) const {
        return m_max_used - m_free_list_head.GetCount ();
    }
    unsigned GetMaxUsed (void) const {
        return m_max_used;
    }
    CPageListEntry *GetListEntry (void) {
        return &m_list_entry;
    }
    static unsigned GetPageSize (unsigned count, unsigned size) {
        return sizeof (CPage) + count * CChunk::GetChunkSize (size);
    }
    static unsigned GetChunkCount (unsigned size) {
        unsigned chunk_size = CChunk::GetChunkSize (size);
        unsigned header_size = sizeof (void *) + sizeof (CPage);
        if (chunk_size <= OS_CHUNK_SIZE - header_size) {
            unsigned estimated_size = header_size + MIN_CHUNK_PER_OS_CHUNK * chunk_size;
            unsigned block_size = max (1 << CEIL_ILOGB (estimated_size), OS_CHUNK_SIZE);
            unsigned chunk_count = (block_size - header_size) / chunk_size;
            return chunk_count;
        } else if (chunk_size <= OS_PAGE_SIZE - header_size) {
            unsigned estimated_size = header_size + MIN_CHUNK_PER_OS_PAGE * chunk_size;
            unsigned block_size = max (1 << CEIL_ILOGB (estimated_size), OS_PAGE_SIZE);
            unsigned chunk_count = (block_size - header_size) / chunk_size;
            return chunk_count;
        } else {
            unsigned estimated_size = header_size + chunk_size;
            unsigned block_size = 1 << CEIL_ILOGB (estimated_size);
            unsigned chunk_count = (block_size - header_size) / chunk_size;
            return chunk_count;
        }
    }

    void *Allocate (unsigned size) {
        if (GetUsed () < m_count) {
            CChunk *chunk = 0;
            CChunkListEntry *list_entry = m_free_list_head.GetFirst ();
            if (list_entry) {
                chunk = CChunk::GetThis (list_entry);
                m_free_list_head.Unlink (list_entry);
            } else {
                chunk = new (operator [] (m_max_used)) CChunk (this); 
                ++ m_max_used;
            }
            return chunk->Allocate (size);
        }
        return 0;
    }
    bool Deallocate (CChunk *chunk) {
        if (((char *) chunk - (char *) m_ptr) / m_chunk_size < m_max_used) {
            chunk->Deallocate ();
            m_free_list_head.Link (chunk->GetListEntry ());
            return true;
        }
        return false;
    }

    static CPage *Create (unsigned page_size, unsigned count, unsigned size) {
        void *ptr = pmalloc (page_size);
        if (! ptr)
            return 0;
        return new (ptr) CPage (count, size, (char *) ptr + sizeof (CPage));
    }
    static void Destroy (CPage *page) {
        page->~CPage ();
        pfree (page);
    }
};

Fixed Size Allocator

The third stage implementing our allocator is the description of a fixed size allocator. Its task is to manage a linked list of pages for chunks of a certain size. The describing attributes of a fixed size allocator include count and (net) size of the chunks and list heads for list management of pages.

Pages are flipped forth and back between a used and a free list, corresponding whether they are full or partially full. This ensures constant time lookup for a page containing free chunks.

A tricky part hereby is, that the fixed size allocator uses the maximum number of recently used chunks in a page to decide, whether an empty page is released to the operating system. If a page is empty and had not been completely filled, it is not released.

class CFixedAllocator {
private:
    unsigned m_count;
    unsigned m_size;
    CPageListHead m_free_list_head;
    CPageListHead m_used_list_head;
    unsigned m_page_size;

public:
    CFixedAllocator (void): 
        m_count (0), m_size (0), 
        m_free_list_head (), m_used_list_head (),
        m_page_size (0) {}
    ~CFixedAllocator (void) {
        CPageListEntry *list_entry;
        while ((list_entry = m_free_list_head.GetFirst ()) != 0) {
            CPage *page = CPage::GetThis (list_entry);
            m_free_list_head.Unlink (list_entry);
            CPage::Destroy (page);
        }
        while ((list_entry = m_used_list_head.GetFirst ()) != 0) {
            CPage *page = CPage::GetThis (list_entry);
            m_free_list_head.Unlink (list_entry);
            CPage::Destroy (page);
        }
    }

    void SetNetSize (unsigned size) { 
        m_count = CPage::GetChunkCount (size);
        m_size = size;
        m_page_size = CPage::GetPageSize (m_count, m_size);
    }

    void *Allocate (unsigned size) {
        CPageListEntry *list_entry = m_free_list_head.GetFirst ();
        if (! list_entry) {
            CPage *page = CPage::Create (m_page_size, m_count, m_size);
            if (! page)
                return 0;
            list_entry = page->GetListEntry ();
            m_free_list_head.Link (list_entry);
        }
        CPage *page = CPage::GetThis (list_entry);
        void *ptr = page->Allocate (size);
        if (page->GetUsed () == page->GetChunkCount ()) {
            m_free_list_head.Unlink (list_entry);
            m_used_list_head.Link (list_entry);
        }
        return ptr;
    }
    void Deallocate (CChunk *chunk) {
        CPage *page = chunk->GetPage ();
        if (page->Deallocate (chunk)) {
            if (page->GetUsed () + 1 == page->GetChunkCount ()) {
                CPageListEntry *list_entry = page->GetListEntry ();
                m_used_list_head.Unlink (list_entry);
                m_free_list_head.Link (list_entry);
            }
        }
        if (page->GetUsed () == 0 &&
            page->GetMaxUsed () == page->GetChunkCount ()) {
            m_free_list_head.Unlink (page->GetListEntry ());
            CPage::Destroy (page);
        }
    }
};

Segregated Allocator

The fourth and last stage implementing our allocator is the description of a segregated allocator. It dispatches the allocation and deallocation requests to the appropriate fixed size allocator.

The segregated allocator is thread safe, because it uses a critical section for mutual exclusion.

The method Allocate compares the requested size to the minimal size of an allocation unit. The method Deallocate guards against double deallocation checking the (net) size of the chunk. If its size is 0 due to a recent deallocation, the actual request is ignored and the internal lists are guarded.

class CSegregatedAllocator {
private:
    CCriticalSection m_critical_section;
    CFixedAllocator m_fixed_allocator [32];
    int m_min_log_chunk_size;

public:
    CSegregatedAllocator (void): 
        m_critical_section () {
        for (int i = 0; i < 32; ++ i) 
            m_fixed_allocator [i].SetNetSize (1 << i);
        m_min_log_chunk_size = CEIL_ILOGB (CChunk::GetChunkSize (1));
    }
    ~CSegregatedAllocator (void) {}

    void *Allocate (unsigned size) {
        CCriticalSectionGuard guard (m_critical_section);
        if (size == 0)
            size = 1;
        int j = CEIL_ILOGB (size);
        if (j < m_min_log_chunk_size)
            j = m_min_log_chunk_size;
        return m_fixed_allocator [j].Allocate (size);
    }
    void *Reallocate (void *ptr, unsigned size) {
        CCriticalSectionGuard guard (m_critical_section);
        CChunk *chunk = CChunk::GetChunk (ptr);
        if (! chunk) 
            return 0;
        int i = CEIL_ILOGB (chunk->GetNetSize ());
        if (i < 0)
            return 0;
        if (i < m_min_log_chunk_size)
            i = m_min_log_chunk_size;
        if (size == 0)
            size = 1;
        int j = CEIL_ILOGB (size);
        if (j < m_min_log_chunk_size)
            j = m_min_log_chunk_size;
        if (i == j)
            return chunk->Reallocate (size);
        ptr = m_fixed_allocator [j].Allocate (size);
        if (! ptr)
            return 0;
        memmove (ptr, chunk->GetListEntry (), min (size, chunk->GetNetSize ()));
        m_fixed_allocator [i].Deallocate (chunk);
        return ptr;
    }
    void Deallocate (void *ptr) {
        CCriticalSectionGuard guard (m_critical_section);
        CChunk *chunk = CChunk::GetChunk (ptr);
        if (! chunk) 
            return;
        int i = CEIL_ILOGB (chunk->GetNetSize ());
        if (i < 0)
            return;
        if (i < m_min_log_chunk_size)
            i = m_min_log_chunk_size;
        m_fixed_allocator [i].Deallocate (chunk);
    }
};

Global Operator New and Delete

To test distinct allocation mechanisms, we implemented a procedural interface for external allocation. This also allows proper initialization of a singleton segregated allocator. The initialization of the singleton works using internal allocation with operator placement new.

Global operator new and delete then use this procedural interface.

#define EXTERNAL_MS_CRTL    0
#define EXTERNAL_MS_COM     1
#define EXTERNAL_MS_HEAP    2
#define EXTERNAL_DL         3
#define EXTERNAL_OWN        4

int etype = EXTERNAL_MS_CRTL;

IMalloc *g_com_allocator;
CSegregatedAllocator *g_segregated_allocator;

extern "C" {
    void *emalloc (unsigned size);
    void *erealloc (void *ptr, unsigned size);
    void efree (void *ptr);
};

void *emalloc (unsigned size) {
    switch (etype) {
    case EXTERNAL_MS_CRTL:
        return malloc (size);
    case EXTERNAL_MS_COM: {
            if (! g_com_allocator) {
                if (CoGetMalloc (1, &g_com_allocator) != S_OK)
                    return 0;
            }
            return g_com_allocator->Alloc (size);
        }
    case EXTERNAL_MS_HEAP:
        return ::HeapAlloc (::GetProcessHeap (), 0, size);
    case EXTERNAL_DL:
        return dlmalloc (size);
    case EXTERNAL_OWN: {
            if (! g_segregated_allocator) {                
                void *ptr = imalloc (sizeof (CSegregatedAllocator));
                if (! ptr)
                    return 0;
                g_segregated_allocator = new (ptr) CSegregatedAllocator;
                if (! g_segregated_allocator)
                    return 0;
            }
            void *ptr = g_segregated_allocator->Allocate (size);
            return ptr;
        }
    default:
        return 0;
    }
}
void *erealloc (void *ptr, unsigned size) {
    switch (etype) {
    case EXTERNAL_MS_CRTL:
        return realloc (ptr, size);
    case EXTERNAL_MS_COM: {
            if (! g_com_allocator) 
               return 0;
            return g_com_allocator->Realloc (ptr, size);
        }
    case EXTERNAL_MS_HEAP:
        return ::HeapReAlloc (::GetProcessHeap (), 0, ptr, size);
    case EXTERNAL_DL:
        return dlrealloc (ptr, size);
    case EXTERNAL_OWN: {
            if (! g_segregated_allocator) 
                return 0;
            ptr = g_segregated_allocator->Reallocate (ptr, size);
            return ptr;
        }
    default:
        return 0;
    }
}
void efree (void *ptr) {
    switch (etype) {
    case EXTERNAL_MS_CRTL:
        free (ptr);
        return;
    case EXTERNAL_MS_COM: {
            if (! g_com_allocator) 
               return;
            g_com_allocator->Free (ptr);
        }
        return;
    case EXTERNAL_MS_HEAP:
        if (! ::HeapFree (::GetProcessHeap (), 0, ptr))
            throw COSException (__FILE__, __LINE__);
        return;
    case EXTERNAL_DL:
        dlfree (ptr);
        return;
    case EXTERNAL_OWN: {
            if (! g_segregated_allocator) 
                return;
            g_segregated_allocator->Deallocate (ptr);
        }
        return;
    }
}

void * __cdecl operator new (unsigned size) {
    return emalloc (size);
}
void * __cdecl operator new [] (unsigned size) {
    return emalloc (size);
}
void __cdecl operator delete (void *ptr) {
    efree (ptr);
}
void __cdecl operator delete [] (void *ptr) {
    efree (ptr);
}

Navigate: Main, Top

© 2000-2002 GeNeSys mbH & Co. KG
Last revised: 12/14/2000