mirror of
https://github.com/fmang/opustags.git
synced 2025-01-15 12:43:17 +01:00
Introduce --raw for disabling transcoding
This commit is contained in:
parent
6db7f07bd5
commit
4cae6c44ee
@ -60,7 +60,6 @@ More generally, here are a few features that could be added in the future:
|
||||
- Logicial stream listing and selection for multiplexed files.
|
||||
- Escaping control characters with --escape.
|
||||
- Dump binary packets with --binary.
|
||||
- Skip encoding conversion with --raw.
|
||||
- Edition of the vendor string.
|
||||
- Edition of the arbitrary binary block past the comments.
|
||||
- Support for OpusTags packets spanning multiple pages (> 64 kB).
|
||||
|
@ -62,5 +62,6 @@ Documentation
|
||||
-s, --set FIELD=VALUE replace a comment
|
||||
-S, --set-all import comments from standard input
|
||||
-e, --edit edit tags interactively in VISUAL/EDITOR
|
||||
--raw disable encoding conversion
|
||||
|
||||
See the man page, `opustags.1`, for extensive documentation.
|
||||
|
@ -103,6 +103,13 @@ Blank lines and lines starting with \fI#\fP are ignored.
|
||||
Edit tags interactively by spawning the program specified by the EDITOR
|
||||
environment variable. The allowed format is the same as \fB--set-all\fP.
|
||||
If TERM and VISUAL are set, VISUAL takes precedence over EDITOR.
|
||||
.TP
|
||||
.B \-\-raw
|
||||
OpusTags metadata should always be encoded in UTF-8, as per RFC 7845. However, some files may be
|
||||
corrupted or possibly even contain intentional binary data. In that case, --raw lets you edit that
|
||||
kind of binary data without ensuring the validity of the tags encoding. This option may also be
|
||||
useful when your system encoding is different from UTF-8 and you wish to preserve the full UTF-8
|
||||
character set even though your system cannot display it.
|
||||
.SH EXAMPLES
|
||||
.PP
|
||||
List all the tags in file foo.opus:
|
||||
|
79
src/cli.cc
79
src/cli.cc
@ -38,6 +38,7 @@ Options:
|
||||
-s, --set FIELD=VALUE replace a comment
|
||||
-S, --set-all import comments from standard input
|
||||
-e, --edit edit tags interactively in VISUAL/EDITOR
|
||||
--raw disable encoding conversion
|
||||
|
||||
See the man page for extensive documentation.
|
||||
)raw";
|
||||
@ -53,6 +54,7 @@ static struct option getopt_options[] = {
|
||||
{"delete-all", no_argument, 0, 'D'},
|
||||
{"set-all", no_argument, 0, 'S'},
|
||||
{"edit", no_argument, 0, 'e'},
|
||||
{"raw", no_argument, 0, 'r'},
|
||||
{NULL, 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -107,6 +109,9 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
|
||||
case 'e':
|
||||
opt.edit_interactively = true;
|
||||
break;
|
||||
case 'r':
|
||||
opt.raw = true;
|
||||
break;
|
||||
case ':':
|
||||
return {st::bad_arguments,
|
||||
"Missing value for option '"s + argv[optind - 1] + "'."};
|
||||
@ -126,12 +131,14 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
|
||||
}
|
||||
|
||||
// Convert arguments to UTF-8.
|
||||
for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) {
|
||||
for (std::string& arg : *args) {
|
||||
rc = to_utf8(arg, utf8);
|
||||
if (rc != ot::st::ok)
|
||||
return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
|
||||
arg = std::move(utf8);
|
||||
if (!opt.raw) {
|
||||
for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) {
|
||||
for (std::string& arg : *args) {
|
||||
rc = to_utf8(arg, utf8);
|
||||
if (rc != ot::st::ok)
|
||||
return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
|
||||
arg = std::move(utf8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -159,7 +166,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
|
||||
if (set_all) {
|
||||
// Read comments from stdin and prepend them to opt.to_add.
|
||||
std::list<std::string> comments;
|
||||
auto rc = read_comments(comments_input, comments);
|
||||
auto rc = read_comments(comments_input, comments, opt.raw);
|
||||
if (rc != st::ok)
|
||||
return rc;
|
||||
opt.to_add.splice(opt.to_add.begin(), std::move(comments));
|
||||
@ -174,7 +181,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
|
||||
* callers that don’t escape backslashes. Maybe add options to select a mode between simple,
|
||||
* raw, and escaped.
|
||||
*/
|
||||
void ot::print_comments(const std::list<std::string>& comments, FILE* output)
|
||||
void ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw)
|
||||
{
|
||||
static ot::encoding_converter from_utf8("UTF-8", "//IGNORE");
|
||||
std::string local;
|
||||
@ -182,25 +189,33 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output)
|
||||
bool bad_comments = false;
|
||||
bool has_newline = false;
|
||||
bool has_control = false;
|
||||
for (const std::string& comment : comments) {
|
||||
ot::status rc = from_utf8(comment, local);
|
||||
if (rc == ot::st::information_lost) {
|
||||
info_lost = true;
|
||||
} else if (rc != ot::st::ok) {
|
||||
bad_comments = true;
|
||||
continue;
|
||||
for (const std::string& utf8_comment : comments) {
|
||||
const std::string* comment;
|
||||
// Convert the comment from UTF-8 to the system encoding if relevant.
|
||||
if (raw) {
|
||||
comment = &utf8_comment;
|
||||
} else {
|
||||
ot::status rc = from_utf8(utf8_comment, local);
|
||||
comment = &local;
|
||||
if (rc == ot::st::information_lost) {
|
||||
info_lost = true;
|
||||
} else if (rc != ot::st::ok) {
|
||||
bad_comments = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (unsigned char c : comment) {
|
||||
|
||||
for (unsigned char c : *comment) {
|
||||
if (c == '\n')
|
||||
has_newline = true;
|
||||
else if (c < 0x20)
|
||||
has_control = true;
|
||||
}
|
||||
fwrite(local.data(), 1, local.size(), output);
|
||||
fwrite(comment->data(), 1, comment->size(), output);
|
||||
putc('\n', output);
|
||||
}
|
||||
if (info_lost)
|
||||
fputs("warning: Some characters are not supported by your system encoding and have been discarded.\n", stderr);
|
||||
fputs("warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.\n", stderr);
|
||||
if (bad_comments)
|
||||
fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr);
|
||||
if (has_newline)
|
||||
@ -210,7 +225,7 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output)
|
||||
fputs("warning: Some tags contain control characters.\n", stderr);
|
||||
}
|
||||
|
||||
ot::status ot::read_comments(FILE* input, std::list<std::string>& comments)
|
||||
ot::status ot::read_comments(FILE* input, std::list<std::string>& comments, bool raw)
|
||||
{
|
||||
static ot::encoding_converter to_utf8("", "UTF-8");
|
||||
comments.clear();
|
||||
@ -229,13 +244,17 @@ ot::status ot::read_comments(FILE* input, std::list<std::string>& comments)
|
||||
free(line);
|
||||
return rc;
|
||||
}
|
||||
std::string utf8;
|
||||
ot::status rc = to_utf8(std::string_view(line, nread), utf8);
|
||||
if (rc == ot::st::ok) {
|
||||
comments.emplace_back(std::move(utf8));
|
||||
if (raw) {
|
||||
comments.emplace_back(line, nread);
|
||||
} else {
|
||||
free(line);
|
||||
return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
|
||||
std::string utf8;
|
||||
ot::status rc = to_utf8(std::string_view(line, nread), utf8);
|
||||
if (rc == ot::st::ok) {
|
||||
comments.emplace_back(std::move(utf8));
|
||||
} else {
|
||||
free(line);
|
||||
return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
|
||||
}
|
||||
}
|
||||
}
|
||||
free(line);
|
||||
@ -281,7 +300,7 @@ static ot::status edit_tags(ot::opus_tags& tags, const ot::options& opt)
|
||||
}
|
||||
|
||||
/** Spawn VISUAL or EDITOR to edit the given tags. */
|
||||
static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path)
|
||||
static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, bool raw)
|
||||
{
|
||||
const char* editor = nullptr;
|
||||
if (getenv("TERM") != nullptr)
|
||||
@ -299,7 +318,7 @@ static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::option
|
||||
if (fd == -1 || (tags_file = fdopen(fd, "w")) == nullptr)
|
||||
return {ot::st::standard_error,
|
||||
"Could not open '" + tags_path + "': " + strerror(errno)};
|
||||
ot::print_comments(tags.comments, tags_file);
|
||||
ot::print_comments(tags.comments, tags_file, raw);
|
||||
if (fclose(tags_file) != 0)
|
||||
return {ot::st::standard_error, tags_path + ": fclose error: "s + strerror(errno)};
|
||||
|
||||
@ -328,7 +347,7 @@ static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::option
|
||||
tags_file = fopen(tags_path.c_str(), "re");
|
||||
if (tags_file == nullptr)
|
||||
return {ot::st::standard_error, "Error opening " + tags_path + ": " + strerror(errno)};
|
||||
if ((rc = ot::read_comments(tags_file, tags.comments)) != ot::st::ok) {
|
||||
if ((rc = ot::read_comments(tags_file, tags.comments, raw)) != ot::st::ok) {
|
||||
fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str());
|
||||
return rc;
|
||||
}
|
||||
@ -390,7 +409,7 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const
|
||||
if (writer) {
|
||||
if (opt.edit_interactively) {
|
||||
fflush(writer->file); // flush before calling the subprocess
|
||||
if ((rc = edit_tags_interactively(tags, writer->path)) != ot::st::ok)
|
||||
if ((rc = edit_tags_interactively(tags, writer->path, opt.raw)) != ot::st::ok)
|
||||
return rc;
|
||||
}
|
||||
auto packet = ot::render_tags(tags);
|
||||
@ -398,7 +417,7 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const
|
||||
if (rc != ot::st::ok)
|
||||
return rc;
|
||||
} else {
|
||||
ot::print_comments(tags.comments, stdout);
|
||||
ot::print_comments(tags.comments, stdout, opt.raw);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -466,6 +466,12 @@ struct options {
|
||||
* Options: --add, --set, --set-all
|
||||
*/
|
||||
std::list<std::string> to_add;
|
||||
/**
|
||||
* Disable encoding conversions. OpusTags are specified to always be encoded as UTF-8, but
|
||||
* if for some reason a specific file contains binary tags that someone would like to
|
||||
* extract and set as-is, encoding conversion would get in the way.
|
||||
*/
|
||||
bool raw = false;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -484,14 +490,14 @@ status parse_options(int argc, char** argv, options& opt, FILE* comments);
|
||||
*
|
||||
* The output generated is meant to be parseable by #ot::read_comments.
|
||||
*/
|
||||
void print_comments(const std::list<std::string>& comments, FILE* output);
|
||||
void print_comments(const std::list<std::string>& comments, FILE* output, bool raw);
|
||||
|
||||
/**
|
||||
* Parse the comments outputted by #ot::print_comments.
|
||||
*
|
||||
* The comments are converted from the system encoding to UTF-8, and returned as UTF-8.
|
||||
*/
|
||||
status read_comments(FILE* input, std::list<std::string>& comments);
|
||||
status read_comments(FILE* input, std::list<std::string>& comments, bool raw);
|
||||
|
||||
/**
|
||||
* Remove all comments matching the specified selector, which may either be a field name or a
|
||||
|
28
t/cli.cc
28
t/cli.cc
@ -12,7 +12,7 @@ void check_read_comments()
|
||||
{
|
||||
std::string txt = "TITLE=a b c\n\nARTIST=X\nArtist=Y\n"s;
|
||||
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
|
||||
rc = ot::read_comments(input.get(), comments);
|
||||
rc = ot::read_comments(input.get(), comments, false);
|
||||
if (rc != ot::st::ok)
|
||||
throw failure("could not read comments");
|
||||
auto&& expected = {"TITLE=a b c", "ARTIST=X", "Artist=Y"};
|
||||
@ -22,14 +22,23 @@ void check_read_comments()
|
||||
{
|
||||
std::string txt = "CORRUPTED=\xFF\xFF\n"s;
|
||||
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
|
||||
rc = ot::read_comments(input.get(), comments);
|
||||
rc = ot::read_comments(input.get(), comments, false);
|
||||
if (rc != ot::st::badly_encoded)
|
||||
throw failure("did not get the expected error reading corrupted data");
|
||||
}
|
||||
{
|
||||
std::string txt = "RAW=\xFF\xFF\n"s;
|
||||
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
|
||||
rc = ot::read_comments(input.get(), comments, true);
|
||||
if (rc != ot::st::ok)
|
||||
throw failure("could not read comments");
|
||||
if (comments.front() != "RAW=\xFF\xFF")
|
||||
throw failure("parsed user comments did not match expectations");
|
||||
}
|
||||
{
|
||||
std::string txt = "MALFORMED\n"s;
|
||||
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
|
||||
rc = ot::read_comments(input.get(), comments);
|
||||
rc = ot::read_comments(input.get(), comments, false);
|
||||
if (rc != ot::st::error)
|
||||
throw failure("did not get the expected error reading malformed comments");
|
||||
}
|
||||
@ -90,6 +99,10 @@ void check_good_arguments()
|
||||
if (opt.paths_in.size() != 1 || opt.paths_in[0] != "x" ||
|
||||
!opt.edit_interactively || !opt.overwrite || !opt.in_place)
|
||||
throw failure("unexpected option parsing result for case #4");
|
||||
|
||||
opt = parse({"opustags", "-a", "X=\xFF", "--raw", "x"});
|
||||
if (!opt.raw || opt.to_add.front() != "X=\xFF")
|
||||
throw failure("--raw did not disable transcoding");
|
||||
}
|
||||
|
||||
void check_bad_arguments()
|
||||
@ -139,6 +152,15 @@ void check_bad_arguments()
|
||||
error_case({"opustags", "--edit", "x", "-i", "-d", "X"}, "Cannot mix --edit with -adDsS.", "mixing -e and -d");
|
||||
error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D");
|
||||
error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S");
|
||||
error_case({"opustags", "-d", "\xFF", "x"},
|
||||
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
|
||||
"-d with binary data");
|
||||
error_case({"opustags", "-a", "X=\xFF", "x"},
|
||||
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
|
||||
"-a with binary data");
|
||||
error_case({"opustags", "-s", "X=\xFF", "x"},
|
||||
"Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.",
|
||||
"-s with binary data");
|
||||
}
|
||||
|
||||
static void check_delete_comments()
|
||||
|
25
t/opustags.t
25
t/opustags.t
@ -4,7 +4,7 @@ use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More tests => 47;
|
||||
use Test::More tests => 50;
|
||||
|
||||
use Digest::MD5;
|
||||
use File::Basename;
|
||||
@ -72,6 +72,7 @@ Options:
|
||||
-s, --set FIELD=VALUE replace a comment
|
||||
-S, --set-all import comments from standard input
|
||||
-e, --edit edit tags interactively in VISUAL/EDITOR
|
||||
--raw disable encoding conversion
|
||||
|
||||
See the man page for extensive documentation.
|
||||
EOF
|
||||
@ -279,7 +280,7 @@ TITLE=
|
||||
ARTIST=\xe9\xe0\xe7
|
||||
I=\xf9\xce
|
||||
END_OUT
|
||||
warning: Some characters are not supported by your system encoding and have been discarded.
|
||||
warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.
|
||||
END_ERR
|
||||
|
||||
$ENV{LC_ALL} = '';
|
||||
@ -291,3 +292,23 @@ ARTIST=éàç
|
||||
I=ùÎ
|
||||
END_OUT
|
||||
}
|
||||
|
||||
|
||||
####################################################################################################
|
||||
# Raw edition
|
||||
|
||||
is_deeply(opustags(qw(-S out.opus -i --raw -a), "U=\xFE", {in => <<"END_IN", mode => ':raw'}), ['', '', 0], 'raw set-all with binary data');
|
||||
T=\xFF
|
||||
END_IN
|
||||
|
||||
is_deeply(opustags(qw(out.opus)), [<<"END_OUT", <<'END_ERR', 0], 'default read with binary data');
|
||||
T=
|
||||
U=
|
||||
END_OUT
|
||||
warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.
|
||||
END_ERR
|
||||
|
||||
is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read');
|
||||
T=\xFF
|
||||
U=\xFE
|
||||
END_OUT
|
||||
|
Loading…
x
Reference in New Issue
Block a user