Discard incompatible comments entirely

//IGNORE is not portable either. Now that we have --raw it’s less an
issue though.
This commit is contained in:
Frédéric Mangano 2020-12-27 10:17:22 +01:00
parent 4cae6c44ee
commit 3e7b42062a
6 changed files with 17 additions and 38 deletions

View File

@ -183,9 +183,8 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
*/
void ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw)
{
static ot::encoding_converter from_utf8("UTF-8", "//IGNORE");
static ot::encoding_converter from_utf8("UTF-8", "");
std::string local;
bool info_lost = false;
bool bad_comments = false;
bool has_newline = false;
bool has_control = false;
@ -197,9 +196,7 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output, bo
} else {
ot::status rc = from_utf8(utf8_comment, local);
comment = &local;
if (rc == ot::st::information_lost) {
info_lost = true;
} else if (rc != ot::st::ok) {
if (rc != ot::st::ok) {
bad_comments = true;
continue;
}
@ -214,10 +211,8 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output, bo
fwrite(comment->data(), 1, comment->size(), output);
putc('\n', output);
}
if (info_lost)
fputs("warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.\n", stderr);
if (bad_comments)
fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr);
fputs("warning: Some tags could not be displayed because of incompatible encoding. See --raw.\n", stderr);
if (has_newline)
fputs("warning: Some tags contain newline characters. "
"These are not supported by --set-all.\n", stderr);

View File

@ -65,7 +65,6 @@ enum class st {
cancel,
/* System */
badly_encoded,
information_lost,
child_process_failed,
/* Ogg */
bad_stream,
@ -158,8 +157,7 @@ public:
~encoding_converter();
/**
* Convert text using iconv. If the input sequence is invalid, return #st::badly_encoded and
* abort the processing. If some character could not be converted perfectly, keep converting
* the string and finally return #st::information_lost.
* abort the processing, leaving out in an undefined state.
*/
status operator()(std::string_view in, std::string& out);
private:

View File

@ -113,37 +113,26 @@ ot::status ot::encoding_converter::operator()(std::string_view in, std::string&
size_t in_left = in.size();
constexpr size_t chunk_size = 1024;
char chunk[chunk_size];
bool lost_information = false;
for (;;) {
char *out_cursor = chunk;
size_t out_left = chunk_size;
size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
out.append(chunk, out_cursor - chunk);
// With //IGNORE, iconv yields EILSEQ on bad conversion but still returns reasonable
// data. Note than EILSEQ is returned at the very end so its basically like a fatal
// error on the last chunk. When the output buffer is too small, it yields E2BIG and
// we need to loop. Any other error is fatal. A return code other than 0 or -1
// indicates a lossy transliteration.
if (rc == (size_t) -1 && errno == EILSEQ) {
lost_information = true;
break;
} else if (rc == (size_t) -1 && errno != E2BIG) {
if (rc == (size_t) -1 && errno == E2BIG) {
// Loop normally.
} else if (rc == (size_t) -1) {
return {ot::st::badly_encoded, strerror(errno)};
} else if (rc != 0) {
return {ot::st::badly_encoded,
"Could not convert string '" + std::string(in) + "': " +
strerror(errno)};
} else if (rc != 0 && rc != (size_t) -1) {
lost_information = true;
"Some characters could not be converted into the target encoding."};
}
out.append(chunk, out_cursor - chunk);
if (in_cursor == nullptr)
break;
else if (in_left == 0)
in_cursor = nullptr;
}
if (lost_information)
return {ot::st::information_lost,
"Some characters could not be converted into the target encoding."};
return ot::st::ok;
}

View File

@ -153,13 +153,13 @@ void check_bad_arguments()
error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D");
error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S");
error_case({"opustags", "-d", "\xFF", "x"},
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character",
"-d with binary data");
error_case({"opustags", "-a", "X=\xFF", "x"},
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character",
"-a with binary data");
error_case({"opustags", "-s", "X=\xFF", "x"},
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character",
"-s with binary data");
}

View File

@ -276,11 +276,10 @@ is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['',
is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1');
encoder=Lavc58.18.100 libopus
TITLE=
ARTIST=\xe9\xe0\xe7
I=\xf9\xce
END_OUT
warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.
warning: Some tags could not be displayed because of incompatible encoding. See --raw.
END_ERR
$ENV{LC_ALL} = '';
@ -302,10 +301,8 @@ T=\xFF
END_IN
is_deeply(opustags(qw(out.opus)), [<<"END_OUT", <<'END_ERR', 0], 'default read with binary data');
T=
U=
END_OUT
warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.
warning: Some tags could not be displayed because of incompatible encoding. See --raw.
END_ERR
is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read');

View File

@ -46,7 +46,7 @@ void check_converter()
is(out, ephemere_iso, "conversion from UTF-8 is correct");
rc = from_utf8("\xFF\xFF", out);
is(rc, ot::st::information_lost, "conversion from bad UTF-8 is lossy");
is(rc, ot::st::badly_encoded, "conversion from bad UTF-8 fails");
}
void check_shell_esape()