0%

[gcc13] STL源码解析 std::shared_ptr

废话不多说直接看源码,以下基于gcc13版本shared_ptr实现

std::shared_ptr结构

1
2
3
4
5
6
7
8
9
//<bits/shared_ptr.h>
template<typename _Tp>
class shared_ptr : public __shared_ptr<_Tp>
{
//...
template<typename _Yp, typename = _Constructible<_Yp*>>
explicit
shared_ptr(_Yp* __p) : __shared_ptr<_Tp>(__p) { }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//<bits/shared_ptr_base.h>
// Forward declarations.
template<typename _Tp, _Lock_policy _Lp = __default_lock_policy>
class __shared_ptr;

template<typename _Tp, _Lock_policy _Lp>
class __shared_ptr
: public __shared_ptr_access<_Tp, _Lp>
{
public:
using element_type = typename remove_extent<_Tp>::type;

private:
//...
element_type* _M_ptr; // Contained pointer.
__shared_count<_Lp> _M_refcount; // Reference counter.
};

// Define operator* and operator-> for shared_ptr<T>.
template<typename _Tp, _Lock_policy _Lp,
bool = is_array<_Tp>::value, bool = is_void<_Tp>::value>
class __shared_ptr_access
{
}

以上贴出部分类型模板声明,可以看到大致的类继承结构及成员,也就是shared_ptr< T >继承自__shared_ptr<T, __default_lock_policy>,__shared_ptr拥有实际指针element_type*和成员引用计数__shared_count<__default_lock_policy>,__shared_ptr_access暂且不看,先看看这个__shared_count

__shared_count

同在<bits/shared_ptr_base.h>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
template<_Lock_policy _Lp = __default_lock_policy>
class _Sp_counted_base
: public _Mutex_base<_Lp>
{
public:
_Sp_counted_base() noexcept
: _M_use_count(1), _M_weak_count(1) { }


//...
private:
_Atomic_word _M_use_count; // #shared
_Atomic_word _M_weak_count; // #weak + (#shared != 0)
};

// Counted ptr with no deleter or allocator support
template<typename _Ptr, _Lock_policy _Lp>
class _Sp_counted_ptr final : public _Sp_counted_base<_Lp>
{
public:
explicit
_Sp_counted_ptr(_Ptr __p) noexcept
: _M_ptr(__p) { }

virtual void
_M_dispose() noexcept
{ delete _M_ptr; }

virtual void
_M_destroy() noexcept
{ delete this; }

virtual void*
_M_get_deleter(const std::type_info&) noexcept
{ return nullptr; }

_Sp_counted_ptr(const _Sp_counted_ptr&) = delete;
_Sp_counted_ptr& operator=(const _Sp_counted_ptr&) = delete;

private:
_Ptr _M_ptr;
};

template<_Lock_policy _Lp>
class __shared_count
{
//...
private:
_Sp_counted_base<_Lp>* _M_pi;
};

_Atomic_word来自于<bits/atomic_word.h>具体类型取决于架构和系统,在我测试的主机上x86_64-redhat下是typedef int _Atomic_word。那么主要看__shared_count和_M_pi这个_Sp_counted_base指针的计数操作,而_M_pi这个基类指针在构造时会因构造方式有所不同,以make_shared举例:

make_shared 中的__shared_count

<bits/shared_ptr.h>

1
2
3
4
5
6
7
8
9
template<typename _Tp, typename... _Args>
inline shared_ptr<_NonArray<_Tp>>
make_shared(_Args&&... __args)
{
using _Alloc = allocator<void>;
_Alloc __a;
return shared_ptr<_Tp>(_Sp_alloc_shared_tag<_Alloc>{__a},
std::forward<_Args>(__args)...);
}
1
2
3
4
5
6
7
8
9
10
template<typename _Tp>
class shared_ptr : public __shared_ptr<_Tp>
{
private:
// This constructor is non-standard, it is used by allocate_shared.
template<typename _Alloc, typename... _Args>
shared_ptr(_Sp_alloc_shared_tag<_Alloc> __tag, _Args&&... __args)
: __shared_ptr<_Tp>(__tag, std::forward<_Args>(__args)...)
{ }
}

<bits/shared_ptr_base.h>

1
2
3
4
5
6
7
8
9
10
11
template<typename _Tp, _Lock_policy _Lp>
class __shared_ptr
: public __shared_ptr_access<_Tp, _Lp>
{
protected:
// This constructor is non-standard, it is used by allocate_shared.
template<typename _Alloc, typename... _Args>
__shared_ptr(_Sp_alloc_shared_tag<_Alloc> __tag, _Args&&... __args)
: _M_ptr(), _M_refcount(_M_ptr, __tag, std::forward<_Args>(__args)...)
{ _M_enable_shared_from_this_with(_M_ptr); }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
template<_Lock_policy _Lp>
class __shared_count
{
template<typename _Tp, typename _Alloc, typename... _Args>
__shared_count(_Tp*& __p, _Sp_alloc_shared_tag<_Alloc> __a,
_Args&&... __args)
{
typedef _Sp_counted_ptr_inplace<_Tp, _Alloc, _Lp> _Sp_cp_type;
typename _Sp_cp_type::__allocator_type __a2(__a._M_a);
auto __guard = std::__allocate_guarded(__a2);
_Sp_cp_type* __mem = __guard.get();
auto __pi = ::new (__mem)
_Sp_cp_type(__a._M_a, std::forward<_Args>(__args)...);
__guard = nullptr;
_M_pi = __pi;
__p = __pi->_M_ptr();
}
}

也就是在make_shared构造时走了一个经由tag标签的特殊构造,在这个构造下_M_pi指向_Sp_counted_base的实现子类_Sp_counted_ptr_inplace(具体来说是_Sp_counted_ptr_inplace<T, alloc, lockpolicy>),那么再来看看这个_Sp_counted_ptr_inplace:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
template<typename _Tp, typename _Alloc, _Lock_policy _Lp>
class _Sp_counted_ptr_inplace final : public _Sp_counted_base<_Lp>
{
class _Impl : _Sp_ebo_helper<0, _Alloc>
{
typedef _Sp_ebo_helper<0, _Alloc> _A_base;

public:
explicit _Impl(_Alloc __a) noexcept : _A_base(__a) { }

_Alloc& _M_alloc() noexcept { return _A_base::_S_get(*this); }

__gnu_cxx::__aligned_buffer<_Tp> _M_storage;
};

// Alloc parameter is not a reference so doesn't alias anything in __args
template<typename... _Args>
_Sp_counted_ptr_inplace(_Alloc __a, _Args&&... __args)
: _M_impl(__a)
{
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 2070. allocate_shared should use allocator_traits<A>::construct
allocator_traits<_Alloc>::construct(__a, _M_ptr(),
std::forward<_Args>(__args)...); // might throw
}

virtual void
_M_dispose() noexcept
{
allocator_traits<_Alloc>::destroy(_M_impl._M_alloc(), _M_ptr());
}

// Override because the allocator needs to know the dynamic type
virtual void
_M_destroy() noexcept
{
__allocator_type __a(_M_impl._M_alloc());
__allocated_ptr<__allocator_type> __guard_ptr{ __a, this };
this->~_Sp_counted_ptr_inplace();
}

private:
_Impl _M_impl;
};

常看stl代码的人对ebo和align_buffer肯定不陌生,这里不再赘述。虽然看上去有点奇怪,这个_Sp_counted_ptr_inplace实际上是把make_shared的T对象存放在了自己对象的_M_impl内,同时重载了_Sp_counted_base虚函数_M_dispose和_M_destroy用于销毁该对象,那么再回来看下引用计数增加和减少时的操作。

引用计数操作

增加引用计数,shared_ptr和__shared_ptr的复制构造都使用默认编译器构造,也就是使用__shared_count复制构造:

1
2
3
4
5
6
  __shared_count(const __shared_count& __r) noexcept
: _M_pi(__r._M_pi)
{
if (_M_pi != nullptr)
_M_pi->_M_add_ref_copy();
}

直接复制_Sp_counted_base*,并调用其基类非虚函数_M_add_ref_copy:

1
2
3
4
// Increment the use count (used when the count is greater than zero).
void
_M_add_ref_copy()
{ __gnu_cxx::__atomic_add_dispatch(&_M_use_count, 1); }

<ext/atomicity.h>

1
2
3
4
5
6
7
8
9
inline void
__attribute__ ((__always_inline__))
__atomic_add_dispatch(_Atomic_word* __mem, int __val)
{
if (__is_single_threaded())
__atomic_add_single(__mem, __val);
else
__atomic_add(__mem, __val);
}

也就是原子性的保证_Sp_counted_base中的_M_use_count的增加,这里也可以顺带看下弱引用计数的增加,同样是原子性:

1
2
3
4
// Increment the weak count.
void
_M_weak_add_ref() noexcept
{ __gnu_cxx::__atomic_add_dispatch(&_M_weak_count, 1); }

减少应用计数,shared_ptr、__shared_ptr使用默认析构,工作最终一样落到了__shared_count的析构上:

1
2
3
4
5
  ~__shared_count() noexcept
{
if (_M_pi != nullptr)
_M_pi->_M_release();
}

同样是基类非虚函数_M_release,这里根据_Lock_policy特化了三个版本_M_release,这里终于见到了这个lock_policy,分别是
<ext/concurrence.h>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
// Available locking policies:
// _S_single single-threaded code that doesn't need to be locked.
// _S_mutex multi-threaded code that requires additional support
// from gthr.h or abstraction layers in concurrence.h.
// _S_atomic multi-threaded code using atomic operations.
enum _Lock_policy { _S_single, _S_mutex, _S_atomic };

// Compile time constant that indicates prefered locking policy in
// the current configuration.
_GLIBCXX17_INLINE const _Lock_policy __default_lock_policy =
#ifndef __GTHREADS
_S_single;
#elif defined _GLIBCXX_HAVE_ATOMIC_LOCK_POLICY
_S_atomic;
#else
_S_mutex;
#endif
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  template<>
inline void
_Sp_counted_base<_S_single>::_M_release() noexcept
{
if (--_M_use_count == 0)
{
_M_dispose();
if (--_M_weak_count == 0)
_M_destroy();
}
}

template<>
inline void
_Sp_counted_base<_S_mutex>::_M_release() noexcept
{
// Be race-detector-friendly. For more info see bits/c++config.
_GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count);
if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
{
_M_release_last_use();
}
}

template<>
inline void
_Sp_counted_base<_S_atomic>::_M_release() noexcept
{
_GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count);
#if ! _GLIBCXX_TSAN
constexpr bool __lock_free
= __atomic_always_lock_free(sizeof(long long), 0)
&& __atomic_always_lock_free(sizeof(_Atomic_word), 0);
constexpr bool __double_word
= sizeof(long long) == 2 * sizeof(_Atomic_word);
// The ref-count members follow the vptr, so are aligned to
// alignof(void*).
constexpr bool __aligned = __alignof(long long) <= alignof(void*);
if _GLIBCXX17_CONSTEXPR (__lock_free && __double_word && __aligned)
{
constexpr int __wordbits = __CHAR_BIT__ * sizeof(_Atomic_word);
constexpr int __shiftbits = __double_word ? __wordbits : 0;
constexpr long long __unique_ref = 1LL + (1LL << __shiftbits);
auto __both_counts = reinterpret_cast<long long*>(&_M_use_count);


_GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_weak_count);
if (__atomic_load_n(__both_counts, __ATOMIC_ACQUIRE) == __unique_ref)
{
// Both counts are 1, so there are no weak references and
// we are releasing the last strong reference. No other
// threads can observe the effects of this _M_release()
// call (e.g. calling use_count()) without a data race.
_M_weak_count = _M_use_count = 0;
_GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
_GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_weak_count);
_M_dispose();
_M_destroy();
return;
}
if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
[[__unlikely__]]
{
_M_release_last_use_cold();
return;
}
}
else
#endif
if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
{
_M_release_last_use();
}
}

逻辑以_S_single的最为简单,非原子性减少强引用计数,如果减少为0则调用虚函数_M_dispose析构实际对象本身,非原子性减少弱引用计数,如果减少为0则调用虚函数_M_destroy析构引用计数本身。
当然现在一般默认的__default_lock_policy是_S_atomic,也就是最长的这个版本,_GLIBCXX_SYNCHRONIZATION_HAPPENS_宏的解释在c++config中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// Macros for race detectors.
// _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(A) and
// _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(A) should be used to explain
// atomic (lock-free) synchronization to race detectors:
// the race detector will infer a happens-before arc from the former to the
// latter when they share the same argument pointer.
//
// The most frequent use case for these macros (and the only case in the
// current implementation of the library) is atomic reference counting:
// void _M_remove_reference()
// {
// _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&this->_M_refcount);
// if (__gnu_cxx::__exchange_and_add_dispatch(&this->_M_refcount, -1) <= 0)
// {
// _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&this->_M_refcount);
// _M_destroy(__a);
// }
// }
// The annotations in this example tell the race detector that all memory
// accesses occurred when the refcount was positive do not race with
// memory accesses which occurred after the refcount became zero.

意思是这一对宏是用于在无锁场景下给race detector检测用,告知检测器在BEFORE时间点前与AFTER时间点后对引用计数的访问都是安全的。
再回到_S_atomic的_M_release中,主要逻辑为判断当前是可原子性操作环境后,对强弱引用计数判断,将强引用地址开始的两个整型地址转成长整形,用以单语句原子性判断两个计数是否皆为1,如果皆为1则同样的调用_M_dispose和_M_destroy。如果仅仅是强引用计数为1,则调用非内联函数_M_release_last_use_cold,再调用_M_release_last_use

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  // Called by _M_release() when the use count reaches zero.
void
_M_release_last_use() noexcept
{
_GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
_M_dispose();
// There must be a memory barrier between dispose() and destroy()
// to ensure that the effects of dispose() are observed in the
// thread that runs destroy().
// See http://gcc.gnu.org/ml/libstdc++/2005-11/msg00136.html
if (_Mutex_base<_Lp>::_S_need_barriers)
{
__atomic_thread_fence (__ATOMIC_ACQ_REL);
}

// Be race-detector-friendly. For more info see bits/c++config.
_GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_weak_count);
if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count,
-1) == 1)
{
_GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_weak_count);
_M_destroy();
}
}

// As above, but 'noinline' to reduce code size on the cold path.
__attribute__((__noinline__))
void
_M_release_last_use_cold() noexcept
{ _M_release_last_use(); }

_Mutex_base<_Lp>::_S_need_barriers仅当lockpolicy为mutex时才生效,当弱引用计数降为0后调用_M_destory销毁引用计数对象。

UML结构图

shared_ptr结构

结语

综上std::shared_ptr在多线程环境中多个shard_ptr指向同一个对象时,他们的析构无需使用锁即可保证释放正确,不会出现多次析构或者多次free的情况。

写到这里倒是突然想起来,shared_ptr的释放线程安全性之前其实也是了解和测试过的,只是时间久了就忘了。看来这拳不打手生,很多东西还是得在使用中加深印象。