9 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 10 #define BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 13 #include <boost/nowide/utf/utf.hpp> 20 static_assert(
sizeof(std::mbstate_t) >= 2,
"mbstate_t is to small to store an UTF-16 codepoint");
23 inline void copy_uint16_t(
void* dst,
const void* src)
25 unsigned char* cdst = static_cast<unsigned char*>(dst);
26 const unsigned char* csrc = static_cast<const unsigned char*>(src);
30 inline std::uint16_t read_state(
const std::mbstate_t& src)
33 copy_uint16_t(&dst, &src);
36 inline void write_state(std::mbstate_t& dst,
const std::uint16_t src)
38 copy_uint16_t(&dst, &src);
48 template<
typename CharType,
int CharSize = sizeof(CharType)>
52 template<
typename CharType>
53 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 2> :
public std::codecvt<CharType, char, std::mbstate_t>
56 static_assert(
sizeof(CharType) >= 2,
"CharType must be able to store UTF16 code point");
58 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
62 using uchar = CharType;
64 std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const override 66 if(detail::read_state(s) != 0)
67 return std::codecvt_base::error;
69 return std::codecvt_base::ok;
71 int do_encoding()
const noexcept
override 75 int do_max_length()
const noexcept
override 79 bool do_always_noconv()
const noexcept
override 84 int do_length(std::mbstate_t& std_state,
const char* from,
const char* from_end,
size_t max)
const override 87 std::uint16_t state = detail::read_state(std_state);
88 const char* save_from = from;
94 while(max > 0 && from < from_end)
96 const char* prev_from = from;
107 if(BOOST_LIKELY(static_cast<size_t>(utf16_traits::width(ch)) <= max))
109 max -= utf16_traits::width(ch);
112 static_assert(utf16_traits::max_width == 2,
"Required for below");
113 std::uint16_t tmpOut[2]{};
114 utf16_traits::encode(ch, tmpOut);
119 detail::write_state(std_state, state);
120 return static_cast<int>(from - save_from);
123 std::codecvt_base::result do_in(std::mbstate_t& std_state,
125 const char* from_end,
126 const char*& from_next,
129 uchar*& to_next)
const override 131 std::codecvt_base::result r = std::codecvt_base::ok;
137 std::uint16_t state = detail::read_state(std_state);
139 if(state && to < to_end)
141 *to++ = static_cast<CharType>(state);
144 while(to < to_end && from < from_end)
146 const char* from_saved = from;
156 r = std::codecvt_base::partial;
160 if(BOOST_LIKELY(utf16_traits::width(ch) <= to_end - to))
162 to = utf16_traits::encode(ch, to);
165 static_assert(utf16_traits::max_width == 2,
"Required for below");
166 std::uint16_t tmpOut[2]{};
167 utf16_traits::encode(ch, tmpOut);
168 *to++ = static_cast<CharType>(tmpOut[0]);
175 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
176 r = std::codecvt_base::partial;
177 detail::write_state(std_state, state);
181 std::codecvt_base::result do_out(std::mbstate_t& std_state,
183 const uchar* from_end,
184 const uchar*& from_next,
187 char*& to_next)
const override 189 std::codecvt_base::result r = std::codecvt_base::ok;
194 std::uint16_t state = detail::read_state(std_state);
195 for(; to < to_end && from < from_end; ++from)
197 std::uint32_t ch = 0;
201 std::uint16_t w1 = state;
202 std::uint16_t w2 = *from;
203 if(BOOST_LIKELY(utf16_traits::is_trail(w2)))
205 ch = utf16_traits::combine_surrogate(w1, w2);
212 std::uint16_t w1 = *from;
213 if(BOOST_LIKELY(utf16_traits::is_single_codepoint(w1)))
216 }
else if(BOOST_LIKELY(utf16_traits::is_first_surrogate(w1)))
230 r = std::codecvt_base::error;
234 if(to_end - to < len)
236 r = std::codecvt_base::partial;
244 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
245 r = std::codecvt_base::partial;
246 detail::write_state(std_state, state);
252 template<
typename CharType>
253 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 4> :
public std::codecvt<CharType, char, std::mbstate_t>
256 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
260 using uchar = CharType;
262 std::codecvt_base::result
263 do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const override 266 return std::codecvt_base::ok;
268 int do_encoding()
const noexcept
override 272 int do_max_length()
const noexcept
override 276 bool do_always_noconv()
const noexcept
override 281 int do_length(std::mbstate_t& ,
const char* from,
const char* from_end,
size_t max)
const override 283 const char* start_from = from;
285 while(max > 0 && from < from_end)
287 const char* save_from = from;
299 return from - start_from;
302 std::codecvt_base::result do_in(std::mbstate_t& ,
304 const char* from_end,
305 const char*& from_next,
308 uchar*& to_next)
const override 310 std::codecvt_base::result r = std::codecvt_base::ok;
312 while(to < to_end && from < from_end)
314 const char* from_saved = from;
323 r = std::codecvt_base::partial;
331 if(r == std::codecvt_base::ok && from != from_end)
332 r = std::codecvt_base::partial;
336 std::codecvt_base::result do_out(std::mbstate_t& ,
338 const uchar* from_end,
339 const uchar*& from_next,
342 char*& to_next)
const override 344 std::codecvt_base::result r = std::codecvt_base::ok;
345 while(to < to_end && from < from_end)
347 std::uint32_t ch = 0;
354 if(to_end - to < len)
356 r = std::codecvt_base::partial;
364 if(r == std::codecvt_base::ok && from != from_end)
365 r = std::codecvt_base::partial;
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:57
static Iterator encode(code_point value, Iterator out)
Definition: utf8_codecvt.hpp:49
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:33
#define BOOST_NOWIDE_REPLACEMENT_CHARACTER
Definition: replacement.hpp:16
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:38
static int width(code_point value)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:43
static code_point decode(Iterator &p, Iterator e)