From 3e7b42062a356288df2ef06300882e2de99167e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mangano?= Date: Sun, 27 Dec 2020 10:17:22 +0100 Subject: [PATCH] Discard incompatible comments entirely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit //IGNORE is not portable either. Now that we have --raw it’s less an issue though. --- src/cli.cc | 11 +++-------- src/opustags.h | 4 +--- src/system.cc | 25 +++++++------------------ t/cli.cc | 6 +++--- t/opustags.t | 7 ++----- t/system.cc | 2 +- 6 files changed, 17 insertions(+), 38 deletions(-) diff --git a/src/cli.cc b/src/cli.cc index bcbb344..62dcb13 100644 --- a/src/cli.cc +++ b/src/cli.cc @@ -183,9 +183,8 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm */ void ot::print_comments(const std::list& comments, FILE* output, bool raw) { - static ot::encoding_converter from_utf8("UTF-8", "//IGNORE"); + static ot::encoding_converter from_utf8("UTF-8", ""); std::string local; - bool info_lost = false; bool bad_comments = false; bool has_newline = false; bool has_control = false; @@ -197,9 +196,7 @@ void ot::print_comments(const std::list& comments, FILE* output, bo } else { ot::status rc = from_utf8(utf8_comment, local); comment = &local; - if (rc == ot::st::information_lost) { - info_lost = true; - } else if (rc != ot::st::ok) { + if (rc != ot::st::ok) { bad_comments = true; continue; } @@ -214,10 +211,8 @@ void ot::print_comments(const std::list& comments, FILE* output, bo fwrite(comment->data(), 1, comment->size(), output); putc('\n', output); } - if (info_lost) - fputs("warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.\n", stderr); if (bad_comments) - fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr); + fputs("warning: Some tags could not be displayed because of incompatible encoding. See --raw.\n", stderr); if (has_newline) fputs("warning: Some tags contain newline characters. " "These are not supported by --set-all.\n", stderr); diff --git a/src/opustags.h b/src/opustags.h index 441657e..30601e5 100644 --- a/src/opustags.h +++ b/src/opustags.h @@ -65,7 +65,6 @@ enum class st { cancel, /* System */ badly_encoded, - information_lost, child_process_failed, /* Ogg */ bad_stream, @@ -158,8 +157,7 @@ public: ~encoding_converter(); /** * Convert text using iconv. If the input sequence is invalid, return #st::badly_encoded and - * abort the processing. If some character could not be converted perfectly, keep converting - * the string and finally return #st::information_lost. + * abort the processing, leaving out in an undefined state. */ status operator()(std::string_view in, std::string& out); private: diff --git a/src/system.cc b/src/system.cc index 6a7ba32..23d1a2b 100644 --- a/src/system.cc +++ b/src/system.cc @@ -113,37 +113,26 @@ ot::status ot::encoding_converter::operator()(std::string_view in, std::string& size_t in_left = in.size(); constexpr size_t chunk_size = 1024; char chunk[chunk_size]; - bool lost_information = false; for (;;) { char *out_cursor = chunk; size_t out_left = chunk_size; size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left); - out.append(chunk, out_cursor - chunk); - // With //IGNORE, iconv yields EILSEQ on bad conversion but still returns reasonable - // data. Note than EILSEQ is returned at the very end so it’s basically like a fatal - // error on the last chunk. When the output buffer is too small, it yields E2BIG and - // we need to loop. Any other error is fatal. A return code other than 0 or -1 - // indicates a lossy transliteration. - if (rc == (size_t) -1 && errno == EILSEQ) { - lost_information = true; - break; - } else if (rc == (size_t) -1 && errno != E2BIG) { + if (rc == (size_t) -1 && errno == E2BIG) { + // Loop normally. + } else if (rc == (size_t) -1) { + return {ot::st::badly_encoded, strerror(errno)}; + } else if (rc != 0) { return {ot::st::badly_encoded, - "Could not convert string '" + std::string(in) + "': " + - strerror(errno)}; - } else if (rc != 0 && rc != (size_t) -1) { - lost_information = true; + "Some characters could not be converted into the target encoding."}; } + out.append(chunk, out_cursor - chunk); if (in_cursor == nullptr) break; else if (in_left == 0) in_cursor = nullptr; } - if (lost_information) - return {ot::st::information_lost, - "Some characters could not be converted into the target encoding."}; return ot::st::ok; } diff --git a/t/cli.cc b/t/cli.cc index 19153da..a946fd9 100644 --- a/t/cli.cc +++ b/t/cli.cc @@ -153,13 +153,13 @@ void check_bad_arguments() error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D"); error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S"); error_case({"opustags", "-d", "\xFF", "x"}, - "Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.", + "Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character", "-d with binary data"); error_case({"opustags", "-a", "X=\xFF", "x"}, - "Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.", + "Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character", "-a with binary data"); error_case({"opustags", "-s", "X=\xFF", "x"}, - "Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.", + "Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character", "-s with binary data"); } diff --git a/t/opustags.t b/t/opustags.t index fdc5654..a311de9 100755 --- a/t/opustags.t +++ b/t/opustags.t @@ -276,11 +276,10 @@ is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['', is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1'); encoder=Lavc58.18.100 libopus -TITLE= ARTIST=\xe9\xe0\xe7 I=\xf9\xce END_OUT -warning: Some characters could not be converted to your system encoding and have been discarded. See --raw. +warning: Some tags could not be displayed because of incompatible encoding. See --raw. END_ERR $ENV{LC_ALL} = ''; @@ -302,10 +301,8 @@ T=\xFF END_IN is_deeply(opustags(qw(out.opus)), [<<"END_OUT", <<'END_ERR', 0], 'default read with binary data'); -T= -U= END_OUT -warning: Some characters could not be converted to your system encoding and have been discarded. See --raw. +warning: Some tags could not be displayed because of incompatible encoding. See --raw. END_ERR is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read'); diff --git a/t/system.cc b/t/system.cc index 003d7e5..49c5a64 100644 --- a/t/system.cc +++ b/t/system.cc @@ -46,7 +46,7 @@ void check_converter() is(out, ephemere_iso, "conversion from UTF-8 is correct"); rc = from_utf8("\xFF\xFF", out); - is(rc, ot::st::information_lost, "conversion from bad UTF-8 is lossy"); + is(rc, ot::st::badly_encoded, "conversion from bad UTF-8 fails"); } void check_shell_esape()