mirror of
https://github.com/fmang/opustags.git
synced 2025-01-15 12:43:17 +01:00
Discard incompatible comments entirely
//IGNORE is not portable either. Now that we have --raw it’s less an issue though.
This commit is contained in:
parent
4cae6c44ee
commit
3e7b42062a
11
src/cli.cc
11
src/cli.cc
@ -183,9 +183,8 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
|
||||
*/
|
||||
void ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw)
|
||||
{
|
||||
static ot::encoding_converter from_utf8("UTF-8", "//IGNORE");
|
||||
static ot::encoding_converter from_utf8("UTF-8", "");
|
||||
std::string local;
|
||||
bool info_lost = false;
|
||||
bool bad_comments = false;
|
||||
bool has_newline = false;
|
||||
bool has_control = false;
|
||||
@ -197,9 +196,7 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output, bo
|
||||
} else {
|
||||
ot::status rc = from_utf8(utf8_comment, local);
|
||||
comment = &local;
|
||||
if (rc == ot::st::information_lost) {
|
||||
info_lost = true;
|
||||
} else if (rc != ot::st::ok) {
|
||||
if (rc != ot::st::ok) {
|
||||
bad_comments = true;
|
||||
continue;
|
||||
}
|
||||
@ -214,10 +211,8 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output, bo
|
||||
fwrite(comment->data(), 1, comment->size(), output);
|
||||
putc('\n', output);
|
||||
}
|
||||
if (info_lost)
|
||||
fputs("warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.\n", stderr);
|
||||
if (bad_comments)
|
||||
fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr);
|
||||
fputs("warning: Some tags could not be displayed because of incompatible encoding. See --raw.\n", stderr);
|
||||
if (has_newline)
|
||||
fputs("warning: Some tags contain newline characters. "
|
||||
"These are not supported by --set-all.\n", stderr);
|
||||
|
@ -65,7 +65,6 @@ enum class st {
|
||||
cancel,
|
||||
/* System */
|
||||
badly_encoded,
|
||||
information_lost,
|
||||
child_process_failed,
|
||||
/* Ogg */
|
||||
bad_stream,
|
||||
@ -158,8 +157,7 @@ public:
|
||||
~encoding_converter();
|
||||
/**
|
||||
* Convert text using iconv. If the input sequence is invalid, return #st::badly_encoded and
|
||||
* abort the processing. If some character could not be converted perfectly, keep converting
|
||||
* the string and finally return #st::information_lost.
|
||||
* abort the processing, leaving out in an undefined state.
|
||||
*/
|
||||
status operator()(std::string_view in, std::string& out);
|
||||
private:
|
||||
|
@ -113,37 +113,26 @@ ot::status ot::encoding_converter::operator()(std::string_view in, std::string&
|
||||
size_t in_left = in.size();
|
||||
constexpr size_t chunk_size = 1024;
|
||||
char chunk[chunk_size];
|
||||
bool lost_information = false;
|
||||
for (;;) {
|
||||
char *out_cursor = chunk;
|
||||
size_t out_left = chunk_size;
|
||||
size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
|
||||
out.append(chunk, out_cursor - chunk);
|
||||
|
||||
// With //IGNORE, iconv yields EILSEQ on bad conversion but still returns reasonable
|
||||
// data. Note than EILSEQ is returned at the very end so it’s basically like a fatal
|
||||
// error on the last chunk. When the output buffer is too small, it yields E2BIG and
|
||||
// we need to loop. Any other error is fatal. A return code other than 0 or -1
|
||||
// indicates a lossy transliteration.
|
||||
if (rc == (size_t) -1 && errno == EILSEQ) {
|
||||
lost_information = true;
|
||||
break;
|
||||
} else if (rc == (size_t) -1 && errno != E2BIG) {
|
||||
if (rc == (size_t) -1 && errno == E2BIG) {
|
||||
// Loop normally.
|
||||
} else if (rc == (size_t) -1) {
|
||||
return {ot::st::badly_encoded, strerror(errno)};
|
||||
} else if (rc != 0) {
|
||||
return {ot::st::badly_encoded,
|
||||
"Could not convert string '" + std::string(in) + "': " +
|
||||
strerror(errno)};
|
||||
} else if (rc != 0 && rc != (size_t) -1) {
|
||||
lost_information = true;
|
||||
"Some characters could not be converted into the target encoding."};
|
||||
}
|
||||
|
||||
out.append(chunk, out_cursor - chunk);
|
||||
if (in_cursor == nullptr)
|
||||
break;
|
||||
else if (in_left == 0)
|
||||
in_cursor = nullptr;
|
||||
}
|
||||
if (lost_information)
|
||||
return {ot::st::information_lost,
|
||||
"Some characters could not be converted into the target encoding."};
|
||||
return ot::st::ok;
|
||||
}
|
||||
|
||||
|
6
t/cli.cc
6
t/cli.cc
@ -153,13 +153,13 @@ void check_bad_arguments()
|
||||
error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D");
|
||||
error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S");
|
||||
error_case({"opustags", "-d", "\xFF", "x"},
|
||||
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
|
||||
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character",
|
||||
"-d with binary data");
|
||||
error_case({"opustags", "-a", "X=\xFF", "x"},
|
||||
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
|
||||
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character",
|
||||
"-a with binary data");
|
||||
error_case({"opustags", "-s", "X=\xFF", "x"},
|
||||
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
|
||||
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character",
|
||||
"-s with binary data");
|
||||
}
|
||||
|
||||
|
@ -276,11 +276,10 @@ is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['',
|
||||
|
||||
is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1');
|
||||
encoder=Lavc58.18.100 libopus
|
||||
TITLE=
|
||||
ARTIST=\xe9\xe0\xe7
|
||||
I=\xf9\xce
|
||||
END_OUT
|
||||
warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.
|
||||
warning: Some tags could not be displayed because of incompatible encoding. See --raw.
|
||||
END_ERR
|
||||
|
||||
$ENV{LC_ALL} = '';
|
||||
@ -302,10 +301,8 @@ T=\xFF
|
||||
END_IN
|
||||
|
||||
is_deeply(opustags(qw(out.opus)), [<<"END_OUT", <<'END_ERR', 0], 'default read with binary data');
|
||||
T=
|
||||
U=
|
||||
END_OUT
|
||||
warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.
|
||||
warning: Some tags could not be displayed because of incompatible encoding. See --raw.
|
||||
END_ERR
|
||||
|
||||
is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read');
|
||||
|
@ -46,7 +46,7 @@ void check_converter()
|
||||
is(out, ephemere_iso, "conversion from UTF-8 is correct");
|
||||
|
||||
rc = from_utf8("\xFF\xFF", out);
|
||||
is(rc, ot::st::information_lost, "conversion from bad UTF-8 is lossy");
|
||||
is(rc, ot::st::badly_encoded, "conversion from bad UTF-8 fails");
|
||||
}
|
||||
|
||||
void check_shell_esape()
|
||||
|
Loading…
x
Reference in New Issue
Block a user