diff --git a/src/cli.cc b/src/cli.cc index 68c9c00..db8d8cc 100644 --- a/src/cli.cc +++ b/src/cli.cc @@ -171,7 +171,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm */ void ot::print_comments(const std::list& comments, FILE* output) { - static ot::encoding_converter from_utf8("UTF-8", "//TRANSLIT"); + static ot::encoding_converter from_utf8("UTF-8", "//IGNORE"); std::string local; bool info_lost = false; bool bad_comments = false; @@ -195,7 +195,7 @@ void ot::print_comments(const std::list& comments, FILE* output) putc('\n', output); } if (info_lost) - fputs("warning: Some tags have been transliterated to your system encoding.\n", stderr); + fputs("warning: Some characters are not supported by your system encoding and have been discarded.\n", stderr); if (bad_comments) fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr); if (has_newline) diff --git a/src/system.cc b/src/system.cc index 5420e74..abcdb44 100644 --- a/src/system.cc +++ b/src/system.cc @@ -118,13 +118,24 @@ ot::status ot::encoding_converter::operator()(const char* in, size_t n, std::str char *out_cursor = chunk; size_t out_left = chunk_size; size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left); - if (rc == (size_t) -1 && errno != E2BIG) + out.append(chunk, out_cursor - chunk); + + // With //IGNORE, iconv yields EILSEQ on bad conversion but still returns reasonable + // data. Note than EILSEQ is returned at the very end so it’s basically like a fatal + // error on the last chunk. When the output buffer is too small, it yields E2BIG and + // we need to loop. Any other error is fatal. A return code other than 0 or -1 + // indicates a lossy transliteration. + if (rc == (size_t) -1 && errno == EILSEQ) { + lost_information = true; + break; + } else if (rc == (size_t) -1 && errno != E2BIG) { return {ot::st::badly_encoded, "Could not convert string '" + std::string(in, n) + "': " + strerror(errno)}; - if (rc != 0) + } else if (rc != 0 && rc != (size_t) -1) { lost_information = true; - out.append(chunk, out_cursor - chunk); + } + if (in_cursor == nullptr) break; else if (in_left == 0) @@ -132,8 +143,7 @@ ot::status ot::encoding_converter::operator()(const char* in, size_t n, std::str } if (lost_information) return {ot::st::information_lost, - "Some characters could not be converted into the target encoding " - "in string '" + std::string(in, n) + "'."}; + "Some characters could not be converted into the target encoding."}; return ot::st::ok; } diff --git a/t/opustags.t b/t/opustags.t index df31bf9..2d10112 100755 --- a/t/opustags.t +++ b/t/opustags.t @@ -275,11 +275,11 @@ is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['', is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1'); encoder=Lavc58.18.100 libopus -TITLE=??? +TITLE= ARTIST=\xe9\xe0\xe7 I=\xf9\xce END_OUT -warning: Some tags have been transliterated to your system encoding. +warning: Some characters are not supported by your system encoding and have been discarded. END_ERR $ENV{LC_ALL} = ''; diff --git a/t/system.cc b/t/system.cc index 1ef558e..003d7e5 100644 --- a/t/system.cc +++ b/t/system.cc @@ -34,7 +34,7 @@ void check_converter() { const char* ephemere_iso = "\xc9\x70\x68\xe9\x6d\xe8\x72\x65"; ot::encoding_converter to_utf8("ISO_8859-1", "UTF-8"); - ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//TRANSLIT"); + ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//IGNORE"); std::string out; ot::status rc = to_utf8(ephemere_iso, out); @@ -46,7 +46,7 @@ void check_converter() is(out, ephemere_iso, "conversion from UTF-8 is correct"); rc = from_utf8("\xFF\xFF", out); - is(rc, ot::st::badly_encoded, "conversion from bad UTF-8 fails"); + is(rc, ot::st::information_lost, "conversion from bad UTF-8 is lossy"); } void check_shell_esape()