0%

[gcc13] STL源码解析 std::string_view

std::string_view 是自17标准引入的轻量级只读字符串视图,用于进一步减少字符串参数传递时的内存拷贝。
string_viewstring类似,同样是一个别名,实际类型为basic_string_view<char>,<string_view>中声明:

basic_string_view 成员

类型别名及数据成员

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
template<typename _CharT, typename _Traits = std::char_traits<_CharT>>
class basic_string_view
{
// types
using traits_type = _Traits;
using value_type = _CharT;
using pointer = value_type*;
using const_pointer = const value_type*;
using reference = value_type&;
using const_reference = const value_type&;
using const_iterator = const value_type*;
using iterator = const_iterator;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
using reverse_iterator = const_reverse_iterator;
using size_type = size_t;
using difference_type = ptrdiff_t;
static constexpr size_type npos = size_type(-1);

private:
//...
size_t _M_len;
const _CharT* _M_str;
};

可以看到两个成员字符串长度_M_len和实际指向字符串的指针_M_str,所以一个string_view对象的内存大小为16字节(64位机器)。

构造函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
      constexpr
basic_string_view() noexcept
: _M_len{0}, _M_str{nullptr}
{ }

constexpr basic_string_view(const basic_string_view&) noexcept = default;

[[__gnu__::__nonnull__]]
constexpr
basic_string_view(const _CharT* __str) noexcept
: _M_len{traits_type::length(__str)},
_M_str{__str}
{ }

constexpr
basic_string_view(const _CharT* __str, size_type __len) noexcept
: _M_len{__len}, _M_str{__str}
{ }

#if __cplusplus >= 202002L && __cpp_lib_concepts
template<contiguous_iterator _It, sized_sentinel_for<_It> _End>
requires same_as<iter_value_t<_It>, _CharT>
&& (!convertible_to<_End, size_type>)
constexpr
basic_string_view(_It __first, _End __last)
noexcept(noexcept(__last - __first))
: _M_len(__last - __first), _M_str(std::to_address(__first))
{ }

#if __cplusplus > 202002L
template<typename _Range, typename _DRange = remove_cvref_t<_Range>>
requires (!is_same_v<_DRange, basic_string_view>)
&& ranges::contiguous_range<_Range>
&& ranges::sized_range<_Range>
&& is_same_v<ranges::range_value_t<_Range>, _CharT>
&& (!is_convertible_v<_Range, const _CharT*>)
&& (!requires (_DRange& __d) {
__d.operator ::std::basic_string_view<_CharT, _Traits>();
})
&& (!requires { typename _DRange::traits_type; }
|| is_same_v<typename _DRange::traits_type, _Traits>)
constexpr explicit
basic_string_view(_Range&& __r)
noexcept(noexcept(ranges::size(__r)) && noexcept(ranges::data(__r)))
: _M_len(ranges::size(__r)), _M_str(ranges::data(__r))
{ }

basic_string_view(nullptr_t) = delete;
#endif // C++23
#endif // C++20

默认构造置长度为0、指针为空,使用编译器默认复制构造,从_CharT*构造则计算长度并复制指针,20版本后加入迭代器[first, last)范围的构造,23加入从范围Range的构造,逻辑都是求字符串长度并赋值。
需要注意的是basic_string_view将复制构造与复制赋值声明了=default,且没有声明移动构造、移动赋值,这将阻止编译器隐式生成移动构造、移动赋值,所以该类型在“移动”时将执行复制。

迭代器方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// [string.view.iterators], iterator support

[[nodiscard]]
constexpr const_iterator
begin() const noexcept
{ return this->_M_str; }

[[nodiscard]]
constexpr const_iterator
end() const noexcept
{ return this->_M_str + this->_M_len; }

[[nodiscard]]
constexpr const_iterator
cbegin() const noexcept
{ return this->_M_str; }

[[nodiscard]]
constexpr const_iterator
cend() const noexcept
{ return this->_M_str + this->_M_len; }

[[nodiscard]]
constexpr const_reverse_iterator
rbegin() const noexcept
{ return const_reverse_iterator(this->end()); }

[[nodiscard]]
constexpr const_reverse_iterator
rend() const noexcept
{ return const_reverse_iterator(this->begin()); }

[[nodiscard]]
constexpr const_reverse_iterator
crbegin() const noexcept
{ return const_reverse_iterator(this->end()); }

[[nodiscard]]
constexpr const_reverse_iterator
crend() const noexcept
{ return const_reverse_iterator(this->begin()); }

从开始的类型别名得知iteratorconst_iterator皆为const _CharT*,所以begin、end只需返回对应地址即可。

元素访问方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  // [string.view.access], element access

[[nodiscard]]
constexpr const_reference
operator[](size_type __pos) const noexcept
{
__glibcxx_assert(__pos < this->_M_len);
return *(this->_M_str + __pos);
}

[[nodiscard]]
constexpr const_reference
at(size_type __pos) const
{
if (__pos >= _M_len)
__throw_out_of_range_fmt(__N("basic_string_view::at: __pos "
"(which is %zu) >= this->size() "
"(which is %zu)"), __pos, this->size());
return *(this->_M_str + __pos);
}

[[nodiscard]]
constexpr const_reference
front() const noexcept
{
__glibcxx_assert(this->_M_len > 0);
return *this->_M_str;
}

[[nodiscard]]
constexpr const_reference
back() const noexcept
{
__glibcxx_assert(this->_M_len > 0);
return *(this->_M_str + this->_M_len - 1);
}

[[nodiscard]]
constexpr const_pointer
data() const noexcept
{ return this->_M_str; }

const_referenceconst _ChatT&const_pointerconst _ChatT*,访问代码也好理解简单的指针操作。

容量方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
  // [string.view.capacity], capacity

[[nodiscard]]
constexpr size_type
size() const noexcept
{ return this->_M_len; }

[[nodiscard]]
constexpr size_type
length() const noexcept
{ return _M_len; }

[[nodiscard]]
constexpr size_type
max_size() const noexcept
{
return (npos - sizeof(size_type) - sizeof(void*))
/ sizeof(value_type) / 4;
}

[[nodiscard]]
constexpr bool
empty() const noexcept
{ return this->_M_len == 0; }

直接返回当前成员长度。

修改器方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
  // [string.view.modifiers], modifiers:

constexpr void
remove_prefix(size_type __n) noexcept
{
__glibcxx_assert(this->_M_len >= __n);
this->_M_str += __n;
this->_M_len -= __n;
}

constexpr void
remove_suffix(size_type __n) noexcept
{
__glibcxx_assert(this->_M_len >= __n);
this->_M_len -= __n;
}

constexpr void
swap(basic_string_view& __sv) noexcept
{
auto __tmp = *this;
*this = __sv;
__sv = __tmp;
}

remove_prefix向前移动指针位置、减少长度,remove_suffix减少长度,两者为string_view特殊方法用于移除头部、尾部字符。swap利用operator=和临时对象交换两个对象内的指针和长度。

操作方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  // [string.view.ops], string operations:

_GLIBCXX20_CONSTEXPR
size_type
copy(_CharT* __str, size_type __n, size_type __pos = 0) const
{
__glibcxx_requires_string_len(__str, __n);
__pos = std::__sv_check(size(), __pos, "basic_string_view::copy");
const size_type __rlen = std::min<size_t>(__n, _M_len - __pos);
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 2777. basic_string_view::copy should use char_traits::copy
traits_type::copy(__str, data() + __pos, __rlen);
return __rlen;
}

[[nodiscard]]
constexpr basic_string_view
substr(size_type __pos = 0, size_type __n = npos) const noexcept(false)
{
__pos = std::__sv_check(size(), __pos, "basic_string_view::substr");
const size_type __rlen = std::min<size_t>(__n, _M_len - __pos);
return basic_string_view{_M_str + __pos, __rlen};
}

[[nodiscard]]
constexpr int
compare(basic_string_view __str) const noexcept
{
const size_type __rlen = std::min(this->_M_len, __str._M_len);
int __ret = traits_type::compare(this->_M_str, __str._M_str, __rlen);
if (__ret == 0)
__ret = _S_compare(this->_M_len, __str._M_len);
return __ret;
}

operation方法较多,重载版本也多,这里挑了几个有代表的版本列出来,copy方法调用traits_type::copy复制子串到目标地址,substr移动指针修改长度后构造新对象返回,compare利用traits_type::compare比较两个指针指定长度同时比较两个长度大小。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
  [[nodiscard]]
constexpr bool
starts_with(basic_string_view __x) const noexcept
{ return this->substr(0, __x.size()) == __x; }

[[nodiscard]]
constexpr bool
ends_with(basic_string_view __x) const noexcept
{
const auto __len = this->size();
const auto __xlen = __x.size();
return __len >= __xlen
&& traits_type::compare(end() - __xlen, __x.data(), __xlen) == 0;
}

starts_with利用substr截取前段字符串进行比较,ends_witch利用compare重载版本比较尾段字符串。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
template<typename _CharT, typename _Traits>
constexpr typename basic_string_view<_CharT, _Traits>::size_type
basic_string_view<_CharT, _Traits>::
find(const _CharT* __str, size_type __pos, size_type __n) const noexcept
{
__glibcxx_requires_string_len(__str, __n);

if (__n == 0)
return __pos <= _M_len ? __pos : npos;
if (__pos >= _M_len)
return npos;

const _CharT __elem0 = __str[0];
const _CharT* __first = _M_str + __pos;
const _CharT* const __last = _M_str + _M_len;
size_type __len = _M_len - __pos;

while (__len >= __n)
{
// Find the first occurrence of __elem0:
__first = traits_type::find(__first, __len - __n + 1, __elem0);
if (!__first)
return npos;
// Compare the full strings from the first occurrence of __elem0.
// We already know that __first[0] == __s[0] but compare them again
// anyway because __s is probably aligned, which helps memcmp.
if (traits_type::compare(__first, __str, __n) == 0)
return __first - _M_str;
__len = __last - ++__first;
}
return npos;
}

template<typename _CharT, typename _Traits>
constexpr typename basic_string_view<_CharT, _Traits>::size_type
basic_string_view<_CharT, _Traits>::
rfind(const _CharT* __str, size_type __pos, size_type __n) const noexcept
{
__glibcxx_requires_string_len(__str, __n);


if (__n <= this->_M_len)
{
__pos = std::min(size_type(this->_M_len - __n), __pos);
do
{
if (traits_type::compare(this->_M_str + __pos, __str, __n) == 0)
return __pos;
}
while (__pos-- > 0);
}
return npos;
}

以上为findrfind两个函数的实现,正向、反向查找const _Chart*字符串,正向查先找子串第一个字符的出现位置,再按子串长度比较子串。rfind则是从后往前逐一比较子串。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
template<typename _CharT, typename _Traits>
constexpr typename basic_string_view<_CharT, _Traits>::size_type
basic_string_view<_CharT, _Traits>::
find_first_of(const _CharT* __str, size_type __pos,
size_type __n) const noexcept
{
__glibcxx_requires_string_len(__str, __n);
for (; __n && __pos < this->_M_len; ++__pos)
{
const _CharT* __p = traits_type::find(__str, __n,
this->_M_str[__pos]);
if (__p)
return __pos;
}
return npos;
}


template<typename _CharT, typename _Traits>
constexpr typename basic_string_view<_CharT, _Traits>::size_type
basic_string_view<_CharT, _Traits>::
find_last_of(const _CharT* __str, size_type __pos,
size_type __n) const noexcept
{
__glibcxx_requires_string_len(__str, __n);
size_type __size = this->size();
if (__size && __n)
{
if (--__size > __pos)
__size = __pos;
do
{
if (traits_type::find(__str, __n, this->_M_str[__size]))
return __size;
}
while (__size-- != 0);
}
return npos;
}

find_first_of在目标字符串上逐一查找_M_str的从前往后每个字符,find_last_of则是逐一查找_M_str的从后往前的每个字符。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
template<typename _CharT, typename _Traits>
constexpr typename basic_string_view<_CharT, _Traits>::size_type
basic_string_view<_CharT, _Traits>::
find_first_not_of(const _CharT* __str, size_type __pos,
size_type __n) const noexcept
{
__glibcxx_requires_string_len(__str, __n);
for (; __pos < this->_M_len; ++__pos)
if (!traits_type::find(__str, __n, this->_M_str[__pos]))
return __pos;
return npos;
}

template<typename _CharT, typename _Traits>
constexpr typename basic_string_view<_CharT, _Traits>::size_type
basic_string_view<_CharT, _Traits>::
find_last_not_of(const _CharT* __str, size_type __pos,
size_type __n) const noexcept
{
__glibcxx_requires_string_len(__str, __n);
size_type __size = this->_M_len;
if (__size)
{
if (--__size > __pos)
__size = __pos;
do
{
if (!traits_type::find(__str, __n, this->_M_str[__size]))
return __size;
}
while (__size--);
}
return npos;
}

find_first_not_of从前往后循环内部字符串上每个字符,如果目标子串上没有查到该字符则返回该位置;find_last_not_of从后往前循环,如果目标子串上没有查到该字符则返回该位置。

basic_string_view 非成员相关函数

basic_string_view 对比方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
template<typename _CharT, typename _Traits>
[[nodiscard]]
constexpr bool
operator==(basic_string_view<_CharT, _Traits> __x,
basic_string_view<_CharT, _Traits> __y) noexcept
{ return __x.size() == __y.size() && __x.compare(__y) == 0; }

template<typename _CharT, typename _Traits>
[[nodiscard]]
constexpr bool
operator==(basic_string_view<_CharT, _Traits> __x,
__type_identity_t<basic_string_view<_CharT, _Traits>> __y)
noexcept
{ return __x.size() == __y.size() && __x.compare(__y) == 0; }

两个string_view的判等,由于basic_string_view::compare是先比较子串,所以这里先比较两个的长度以提高性能。第二个版本在这列出来,因为实际上两个std::string_view的比较会进到这个版本里,有关这个__type_indentity_tcppreference

basic_string_view和basic_string的转换

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#if __cplusplus >= 201703L
// A helper type for avoiding boiler-plate.
typedef basic_string_view<_CharT, _Traits> __sv_type;
#endif

#if __cplusplus >= 201703L
private:
// Wraps a string_view by explicit conversion and thus
// allows to add an internal constructor that does not
// participate in overload resolution when a string_view
// is provided.
struct __sv_wrapper
{
_GLIBCXX20_CONSTEXPR explicit
__sv_wrapper(__sv_type __sv) noexcept : _M_sv(__sv) { }

__sv_type _M_sv;
};

/**
* @brief Only internally used: Construct string from a string view
* wrapper.
* @param __svw string view wrapper.
* @param __a Allocator to use.
*/
_GLIBCXX20_CONSTEXPR
explicit
basic_string(__sv_wrapper __svw, const _Alloc& __a)
: basic_string(__svw._M_sv.data(), __svw._M_sv.size(), __a) { }

public:
/**
* @brief Construct string from a string_view.
* @param __t Source object convertible to string view.
* @param __a Allocator to use (default is default allocator).
*/
template<typename _Tp, typename = _If_sv<_Tp, void>>
_GLIBCXX20_CONSTEXPR
explicit
basic_string(const _Tp& __t, const _Alloc& __a = _Alloc())
: basic_string(__sv_wrapper(_S_to_string_view(__t)), __a) { }
#endif // C++17

c++17版本后basic_string增加了从string_view构造,相关代码见上面这段摘抄(省略很多代码),实际逻辑为拷贝len长度的字符串,注意这里为explicit构造,也就是说string_view无法隐式转换到string

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#if __cplusplus >= 201703L
/**
* @brief Set value to string constructed from a string_view.
* @param __svt An object convertible to string_view.
*/
template<typename _Tp>
_GLIBCXX20_CONSTEXPR
_If_sv<_Tp, basic_string&>
operator=(const _Tp& __svt)
{ return this->assign(__svt); }

/**
* @brief Convert to a string_view.
* @return A string_view.
*/
_GLIBCXX20_CONSTEXPR
operator __sv_type() const noexcept
{ return __sv_type(data(), size()); }
#endif // C++17

上面两个函数因为在一块就一起贴了,basic_stringoperator=operator __sv_type(),前者为从string_viewstring的赋值(自然需要拷贝),后者是从stringstring_view的隐式转换,比如

1
2
3
std::string ori{"asd"};
std::string_view ori_view{ori};
ori_view = ori;

第二行的构造和第三行的赋值都是经由string operator __sv_type()隐式构造的string_view