STL — allocator – 绿虫虫

本文原创

Allocators 是 STL 中封装了内存分配和释放操作的类，绝大多数容器的第二个模板参数都是分配器类型，在 SGI STL 里，默认模板参数为 std::alloc，下面以 SGI STL 为例简要分析内部的一些实现细节。

SGI STL 的 allocators 是通过 malloc 和 realloc 来分配内存的，而不是操作符 new，前者的优势在于可预见性强且易于错误检测，后者在要求轻便和线程安全的条件下会复杂些。

malloc_alloc 是一个简单封装了 malloc 库函数的分配器，它是线程安全的，但是速度慢。它是模板__malloc_alloc_template 的 typedef：

typedef __malloc_alloc_template<0> malloc_alloc;

模板 __malloc_alloc_template 的定义如下：

template <int __inst>
class __malloc_alloc_template
{
private:
    static void* _S_oom_malloc(size_t);
    static void* _S_oom_realloc(void*, size_t);
    static void (* __malloc_alloc_oom_handler)();

public:
    static void* allocate(size_t __n)
    {
        void* __result = malloc(__n);
        if (0 == __result) __result = _S_oom_malloc(__n);
        return __result;
    }

    static void deallocate(void* __p, size_t /* __n */)
    {
        free(__p);
    }

    static void* reallocate(void* __p, size_t /* old_sz */, size_t __new_sz)
    {
        void* __result = realloc(__p, __new_sz);
        if (0 == __result) __result = _S_oom_realloc(__p, __new_sz);
        return __result;
    }

    static void (* __set_malloc_handler(void (*__f)()))()
    {
        void (* __old)() = __malloc_alloc_oom_handler;
        __malloc_alloc_oom_handler = __f;
        return(__old);
    }
};

template <int __inst>
void (* __malloc_alloc_template<__inst>::__malloc_alloc_oom_handler)() = 0;

template <int __inst>
void* __malloc_alloc_template<__inst>::_S_oom_malloc(size_t __n)
{
    void (* __my_malloc_handler)();
    void* __result;

    for (;;) {
        __my_malloc_handler = __malloc_alloc_oom_handler;
        if (0 == __my_malloc_handler) { throw std::bad_alloc(); }
        (*__my_malloc_handler)();
        __result = malloc(__n);
        if (__result) return(__result);
    }
}

template <int __inst>
void* __malloc_alloc_template<__inst>::_S_oom_realloc(void* __p, size_t __n)
{
    void (* __my_malloc_handler)();
    void* __result;

    for (;;) {
        __my_malloc_handler = __malloc_alloc_oom_handler;
        if (0 == __my_malloc_handler) { throw std::bad_alloc(); }
        (*__my_malloc_handler)();
        __result = realloc(__p, __n);
        if (__result) return(__result);
    }
}

所有数据成员都是静态的，这意味着容器在分配内存的时候不需要任何分配器的成员，这避免了不需要的空间开销及容器的代码复杂度，这同时也避免了涉及多个容器的内存分配操作时的一些问题。

__set_malloc_handler 是一个形参类型和返回值类型都为函数指针的函数，完成设置 __malloc_alloc_oom_handler 为传入的形参 __f 后，返回原来的 __malloc_alloc_oom_handler。

成员函数 allocate 中 malloc 失败时，调用成员函数 _S_oom_malloc；同样，成员函数 reallocate 中 realloc 失败时，调用成员函数 _S_oom_realloc。这两个失败时被调用的函数中，都先声明了一个函数指针 __my_malloc_handler，在随后的 for 循环中，此函数指针被赋值为 __malloc_alloc_oom_handler，随后立即调用，一旦 malloc 或 realloc 调用成功就 return。默认的 __malloc_alloc_oom_handler 是定义为0的，需要时由 __set_malloc_handler 来进行设置。

上述模板 __malloc_alloc_template 的非类型参数 __inst 为0的时候就是 malloc_alloc 了。宏 __USE_MALLOC 用来选择 alloc 及 single_client_alloc 的类型：

#ifdef __USE_MALLOC
typedef malloc_alloc alloc;
typedef malloc_alloc single_client_alloc;
#else
typedef __default_alloc_template<true, 0> alloc;
typedef __default_alloc_template<false, 0> single_client_alloc;
#endif

模板 __default_alloc_template 的第一个模板参数是 bool 型非类型参数，用来表示是否线程安全，显然，alloc 是线程安全的，single_client_alloc 是线程不安全的，在程序只是单线程的情况下，single_client_alloc 可能会稍快一点点。

下面来看模板 __default_alloc_template：

template <bool threads, int inst>
class __default_alloc_template
{
private:
  // Really we should use static const int x = N
  // instead of enum { x = N }, but few compilers accept the former.
#if ! (defined(__SUNPRO_CC) || defined(__GNUC__))
    enum {_ALIGN = 8};
    enum {_MAX_BYTES = 128};
    enum {_NFREELISTS = 16}; // _MAX_BYTES/_ALIGN
#endif
    /* 在分配内存时，进行_ALIGN对齐 */
    static size_t _S_round_up(size_t __bytes)
    {
        return (((__bytes) + (size_t) _ALIGN-1) & ~((size_t) _ALIGN - 1));
    }
private:
    union _Obj // 空闲块链表
    {
        union _Obj* _M_free_list_link;
        char _M_client_data[1];    /* The client sees this. */
    };
private:
#if defined(__SUNPRO_CC) || defined(__GNUC__) || defined(__HP_aCC)
    static _Obj* volatile _S_free_list[]; // Specifying a size results in duplicate def for 4.1
#else
    static _Obj* volatile _S_free_list[_NFREELISTS]; 
#endif
    /* 根据__bytes选择使用哪个内存块进行分配 */
    static size_t _S_freelist_index(size_t __bytes)
    {
        return (((__bytes) + (size_t)_ALIGN-1)/(size_t)_ALIGN - 1);
    }

    // Returns an object of size __n, and optionally adds to size __n free list.
    static void* _S_refill(size_t __n);
    // Allocates a chunk for nobjs of size size.  nobjs may be reduced if it is inconvenient to allocate the requested number.
    static char* _S_chunk_alloc(size_t __size, int& __nobjs);

    // Chunk allocation state.
    static char* _S_start_free;
    static char* _S_end_free;
    static size_t _S_heap_size;

#ifdef __STL_THREADS
    static _STL_mutex_lock _S_node_allocator_lock;
#endif

    // It would be nice to use _STL_auto_lock here.  But we
    // don't need the NULL check.  And we do need a test whether
    // threads have actually been started.
    class _Lock;
    friend class _Lock;
    class _Lock
    {
    public:
        _Lock() { { if (threads) _S_node_allocator_lock._M_acquire_lock(); }; }
        ~_Lock() { { if (threads) _S_node_allocator_lock._M_release_lock(); }; }
    };

public:
    /* __n must be > 0      */
    static void* allocate(size_t __n)
    {
        void* __ret = 0;
        if (__n > (size_t) _MAX_BYTES) // 大于128时，直接用malloc_alloc
            __ret = malloc_alloc::allocate(__n);
        else
        {
            /* 获得待分配空间在16个小区块中的位置 */
            _Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);
            // Acquire the lock here with a constructor call.
            // This ensures that it is released in exit or during stack unwinding.
#ifndef _NOTHREADS
            /* 构造函数上锁 */
            _Lock __lock_instance;
#endif
            _Obj* __result = *__my_free_list;
            if (__result == 0) // 16个区块已用完
                __ret = _S_refill(_S_round_up(__n));
            else
            {
               *__my_free_list = __result->_M_free_list_link; // 重置记录的下个结点的分配位置
                __ret = __result;
            }
        }
        return __ret;
    };

    /* __p may not be 0 */
    static void deallocate(void* __p, size_t __n)
    {
        if (__n > (size_t) _MAX_BYTES) // 大于128时，直接用malloc_alloc
            malloc_alloc::deallocate(__p, __n);
        else
        {
            _Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);
            _Obj* __q = (_Obj*)__p;
#ifndef _NOTHREADS
            /* 构造函数上锁 */
            _Lock __lock_instance;
#endif
            /* 把当前要释放的地址作为空闲区块的首地址，而之前空闲区块的首地址成为其后继区块_M_free_list_link，这样当前释放的地址会被最先重新利用 */
            __q->_M_free_list_link = *__my_free_list;
            *__my_free_list = __q;
            /* 析构函数解锁 */
        }   
    }

    static void* reallocate(void* __p, size_t __old_sz, size_t __new_sz);
};

/* We allocate memory in large chunks in order to avoid fragmenting the malloc heap too much. */
/* We assume that size is properly aligned. We hold the allocation lock. */
template <bool __threads, int __inst>
char* __default_alloc_template<__threads, __inst>::_S_chunk_alloc(size_t __size, int& __nobjs)
{
    char* __result;
    size_t __total_bytes = __size * __nobjs; // 请求内存量
    size_t __bytes_left = _S_end_free - _S_start_free; // 内存池中剩余空间量

    if (__bytes_left >= __total_bytes) // 足够
    {
        __result = _S_start_free; // 返回空间内存的起始地址
        _S_start_free += __total_bytes; // 调整指针位置
        return(__result);
    }
    else if (__bytes_left >= __size) // 至少能分配一个
    {
        __nobjs = (int)(__bytes_left/__size); // 计算实际能满足分配多少个
        __total_bytes = __size * __nobjs;     // 重新计算分配量
        __result = _S_start_free;
        _S_start_free += __total_bytes;
        return(__result);
    }
    else
    {
        /* 准备申请的内存空间大小 */
        size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
        // Try to make use of the left-over piece.
        if (__bytes_left > 0) // 利用好剩余空间，在其邻近处申请新的空间
        {
            _Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__bytes_left);
            ((_Obj*)_S_start_free)->_M_free_list_link = *__my_free_list; // 把内存池中未分配的空间首地址链入空闲块链表中
            *__my_free_list = (_Obj*)_S_start_free; // 修正空闲块链表的起始结点指针为当前内存池
        }
        _S_start_free = (char*)malloc(__bytes_to_get); // 申请动作
        if (0 == _S_start_free) // 堆空间不足，尝试在已有空闲空间中寻找
        {
            size_t __i;
            _Obj* volatile* __my_free_list;
            _Obj* __p;
            // Try to make do with what we have.  That can't hurt.  We do not try smaller requests, since that tends to result in disaster on multi-process machines.
            for (__i = __size; __i <= (size_t) _MAX_BYTES; __i += (size_t) _ALIGN)
            {
                __my_free_list = _S_free_list + _S_freelist_index(__i); // 指向当前区块
                __p = *__my_free_list; // 记录空闲区块的头指针
                if (0 != __p) // 空闲块存在
                {
                    *__my_free_list = __p->_M_free_list_link; // 把该空闲块从链表中解出
                    _S_start_free = (char*)__p; // 起始空闲地址设为解出的空闲块地址
                    _S_end_free = _S_start_free + __i;
                    return(_S_chunk_alloc(__size, __nobjs)); // 递归调用，按先前的流程尝试分配内存
                    // Any leftover piece will eventually make it to the right free list.
                }
            }
            _S_end_free = 0;    // In case of exception.
            /* 如果第二级分配器解决不了，尝试使用第一级分配器的回调处理函数 */
            _S_start_free = (char*)malloc_alloc::allocate(__bytes_to_get);
            // This should either throw an exception or remedy the situation.  Thus we assume it succeeded.
        }
        /* 如果代码流程走到这里，一定是malloc申请成功 */
        _S_heap_size += __bytes_to_get;
        _S_end_free = _S_start_free + __bytes_to_get;
        return(_S_chunk_alloc(__size, __nobjs)); // 已经得到了一些空间，按先前流程再次尝试分配
    }
}

/* Returns an object of size __n, and optionally adds to size __n free list.*/
/* We assume that __n is properly aligned. We hold the allocation lock. */
template <bool __threads, int __inst>
void* __default_alloc_template<__threads, __inst>::_S_refill(size_t __n)
{
    int __nobjs = 20; // 试图分配20个大小为__n的空间
    char* __chunk = _S_chunk_alloc(__n, __nobjs); // __nobjs是引用传递
    _Obj* volatile* __my_free_list;
    _Obj* __result;
    _Obj* __current_obj;
    _Obj* __next_obj;
    int __i;

    if (1 == __nobjs) return(__chunk); // 如果只分配到了一块，直接返回
    __my_free_list = _S_free_list + _S_freelist_index(__n);

    /* Build free list in chunk */
    __result = (_Obj*)__chunk;
    /* 设置*__my_free_list指针指向空闲区块中下一次可分配的位置 */
    *__my_free_list = __next_obj = (_Obj*)(__chunk + __n);
    for (__i = 1; ; __i++) // 把连续的空间块用链表链起来
    {
        __current_obj = __next_obj;
        __next_obj = (_Obj*)((char*)__next_obj + __n); // 调整下次分配空间的地址
        if (__nobjs - 1 == __i)
        {
            __current_obj->_M_free_list_link = 0; // 表示无空闲空间可用了
            break;
        }
        else
            __current_obj->_M_free_list_link = __next_obj; // 链接动作
    }
    return(__result);
}

template <bool threads, int inst>
void* __default_alloc_template<threads, inst>::reallocate(void* __p, size_t __old_sz, size_t __new_sz)
{
    void* __result;
    size_t __copy_sz;
    if (__old_sz > (size_t) _MAX_BYTES && __new_sz > (size_t) _MAX_BYTES) // 大于128时，直接调用realloc
        return(realloc(__p, __new_sz));
    if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) // 旧空间大小和新空间大小一样，不执行任何动作
        return(__p);
    __result = allocate(__new_sz);    // 申请新内存
    __copy_sz = __new_sz > __old_sz ? __old_sz : __new_sz; // 需要copy的是较小值
    memcpy(__result, __p, __copy_sz); // 内存搬运
    deallocate(__p, __old_sz);        // 释放旧内存
    return(__result);
}

#ifdef __STL_THREADS
    template <bool __threads, int __inst>
    _STL_mutex_lock __default_alloc_template<__threads, __inst>::_S_node_allocator_lock = { 0 };
#endif

template <bool __threads, int __inst>
char* __default_alloc_template<__threads, __inst>::_S_start_free = 0;

template <bool __threads, int __inst>
char* __default_alloc_template<__threads, __inst>::_S_end_free = 0;

template <bool __threads, int __inst>
size_t __default_alloc_template<__threads, __inst>::_S_heap_size = 0;

template <bool __threads, int __inst>
typename __default_alloc_template<__threads, __inst>::_Obj* volatile
__default_alloc_template<__threads, __inst>::_S_free_list[
#if defined(__SUNPRO_CC) || defined(__GNUC__) || defined(__HP_aCC)
    _NFREELISTS
#else
    __default_alloc_template<__threads, __inst>::_NFREELISTS
#endif
] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
// The 16 zeros are necessary to make version 4.1 of the SunPro compiler happy. Otherwise it appears to allocate too little space for the array.

由于上述的 alloc 是按字节来分配内存的，使用的时候无法根据类型来决定需要分配多少内存，因此需要有一个 adapter 告诉 alloc 内存分配量，这个 adapter 是类 simple_alloc：

template<class _Tp, class _Alloc>
class simple_alloc
{
public:
    static _Tp* allocate(size_t __n)
    {
        return 0 == __n ? 0 : (_Tp*) _Alloc::allocate(__n * sizeof (_Tp));
    }
    static _Tp* allocate(void)
    {
        return (_Tp*) _Alloc::allocate(sizeof (_Tp));
    }
    static void deallocate(_Tp* __p, size_t __n)
    {
        if (0 != __n) _Alloc::deallocate(__p, __n * sizeof (_Tp));
    }
    static void deallocate(_Tp* __p)
    {
        _Alloc::deallocate(__p, sizeof (_Tp));
    }
};

上面的模板参数 _Alloc 一般是第二级分配器 alloc，提供的 allocate 和 deallocate 接口中使用 sizeof 运算符决定分配或释放的内存量，SGI STL 中几乎所有的容器都使用 simple_alloc 来进行内存管理。

Leave a comment 取消回复