mirror of
https://github.com/fmang/opustags.git
synced 2025-01-28 19:05:03 +01:00
Use //IGNORE instead of //TRANSLIT when transcoding
//TRANSLIT is not a well supported, and in most cases there’s not much transliteration can help with when the encoding is limiting. Besides, it sounds reasonable to assume most people use UTF-8 nowadays.
This commit is contained in:
parent
f98208c1a1
commit
c43704a0a7
@ -171,7 +171,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
|
||||
*/
|
||||
void ot::print_comments(const std::list<std::string>& comments, FILE* output)
|
||||
{
|
||||
static ot::encoding_converter from_utf8("UTF-8", "//TRANSLIT");
|
||||
static ot::encoding_converter from_utf8("UTF-8", "//IGNORE");
|
||||
std::string local;
|
||||
bool info_lost = false;
|
||||
bool bad_comments = false;
|
||||
@ -195,7 +195,7 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output)
|
||||
putc('\n', output);
|
||||
}
|
||||
if (info_lost)
|
||||
fputs("warning: Some tags have been transliterated to your system encoding.\n", stderr);
|
||||
fputs("warning: Some characters are not supported by your system encoding and have been discarded.\n", stderr);
|
||||
if (bad_comments)
|
||||
fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr);
|
||||
if (has_newline)
|
||||
|
@ -118,13 +118,24 @@ ot::status ot::encoding_converter::operator()(const char* in, size_t n, std::str
|
||||
char *out_cursor = chunk;
|
||||
size_t out_left = chunk_size;
|
||||
size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
|
||||
if (rc == (size_t) -1 && errno != E2BIG)
|
||||
out.append(chunk, out_cursor - chunk);
|
||||
|
||||
// With //IGNORE, iconv yields EILSEQ on bad conversion but still returns reasonable
|
||||
// data. Note than EILSEQ is returned at the very end so it’s basically like a fatal
|
||||
// error on the last chunk. When the output buffer is too small, it yields E2BIG and
|
||||
// we need to loop. Any other error is fatal. A return code other than 0 or -1
|
||||
// indicates a lossy transliteration.
|
||||
if (rc == (size_t) -1 && errno == EILSEQ) {
|
||||
lost_information = true;
|
||||
break;
|
||||
} else if (rc == (size_t) -1 && errno != E2BIG) {
|
||||
return {ot::st::badly_encoded,
|
||||
"Could not convert string '" + std::string(in, n) + "': " +
|
||||
strerror(errno)};
|
||||
if (rc != 0)
|
||||
} else if (rc != 0 && rc != (size_t) -1) {
|
||||
lost_information = true;
|
||||
out.append(chunk, out_cursor - chunk);
|
||||
}
|
||||
|
||||
if (in_cursor == nullptr)
|
||||
break;
|
||||
else if (in_left == 0)
|
||||
@ -132,8 +143,7 @@ ot::status ot::encoding_converter::operator()(const char* in, size_t n, std::str
|
||||
}
|
||||
if (lost_information)
|
||||
return {ot::st::information_lost,
|
||||
"Some characters could not be converted into the target encoding "
|
||||
"in string '" + std::string(in, n) + "'."};
|
||||
"Some characters could not be converted into the target encoding."};
|
||||
return ot::st::ok;
|
||||
}
|
||||
|
||||
|
@ -275,11 +275,11 @@ is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['',
|
||||
|
||||
is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1');
|
||||
encoder=Lavc58.18.100 libopus
|
||||
TITLE=???
|
||||
TITLE=
|
||||
ARTIST=\xe9\xe0\xe7
|
||||
I=\xf9\xce
|
||||
END_OUT
|
||||
warning: Some tags have been transliterated to your system encoding.
|
||||
warning: Some characters are not supported by your system encoding and have been discarded.
|
||||
END_ERR
|
||||
|
||||
$ENV{LC_ALL} = '';
|
||||
|
@ -34,7 +34,7 @@ void check_converter()
|
||||
{
|
||||
const char* ephemere_iso = "\xc9\x70\x68\xe9\x6d\xe8\x72\x65";
|
||||
ot::encoding_converter to_utf8("ISO_8859-1", "UTF-8");
|
||||
ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//TRANSLIT");
|
||||
ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//IGNORE");
|
||||
std::string out;
|
||||
|
||||
ot::status rc = to_utf8(ephemere_iso, out);
|
||||
@ -46,7 +46,7 @@ void check_converter()
|
||||
is(out, ephemere_iso, "conversion from UTF-8 is correct");
|
||||
|
||||
rc = from_utf8("\xFF\xFF", out);
|
||||
is(rc, ot::st::badly_encoded, "conversion from bad UTF-8 fails");
|
||||
is(rc, ot::st::information_lost, "conversion from bad UTF-8 is lossy");
|
||||
}
|
||||
|
||||
void check_shell_esape()
|
||||
|
Loading…
x
Reference in New Issue
Block a user