mirror of
https://github.com/fmang/opustags.git
synced 2024-11-05 21:17:22 +01:00
Support NUL delimiters with -z
This commit is contained in:
parent
7ec3551f62
commit
0067162ffb
@ -69,5 +69,6 @@ Documentation
|
||||
--vendor print the vendor string
|
||||
--set-vendor VALUE set the vendor string
|
||||
--raw disable encoding conversion
|
||||
-z delimit tags with NUL
|
||||
|
||||
See the man page, `opustags.1`, for extensive documentation.
|
||||
|
22
opustags.1
22
opustags.1
@ -1,4 +1,4 @@
|
||||
.TH opustags 1 "March 2023" "@PROJECT_NAME@ @PROJECT_VERSION@"
|
||||
.TH opustags 1 "April 2024" "@PROJECT_NAME@ @PROJECT_VERSION@"
|
||||
.SH NAME
|
||||
opustags \- Ogg Opus tag editor
|
||||
.SH SYNOPSIS
|
||||
@ -135,6 +135,14 @@ corrupted or possibly even contain intentional binary data. In that case, --raw
|
||||
kind of binary data without ensuring the validity of the tags encoding. This option may also be
|
||||
useful when your system encoding is different from UTF-8 and you wish to preserve the full UTF-8
|
||||
character set even though your system cannot display it.
|
||||
.TP
|
||||
.B \-z
|
||||
When editing tags programmatically with line-based tools like grep or sed, tags containing newlines
|
||||
are likely to corrupt the result because these tools won’t interpret multi-line tags as a whole. To
|
||||
make automatic processing easier, \fB-z\fP delimits tags by a null byte (ASCII NUL) instead of line
|
||||
feeds. That same \fB-z\fP flag is also supported by GNU grep or GNU sed and, combined with opustags
|
||||
-z, would make them process the input tag-by-tag instead of line-by-line, thus supporting multi-line
|
||||
tags as well. This option also disables the TAB prefix for continuation lines after a line feed.
|
||||
.SH EXAMPLES
|
||||
.PP
|
||||
List all the tags in file foo.opus:
|
||||
@ -145,10 +153,6 @@ Copy in.opus to out.opus, with the TITLE tag added:
|
||||
.PP
|
||||
opustags in.opus --output out.opus --add "TITLE=Hello world!"
|
||||
.PP
|
||||
Replace all the tags in dest.opus with the ones from src.opus:
|
||||
.PP
|
||||
opustags src.opus | opustags --in-place dest.opus --set-all
|
||||
.PP
|
||||
Remove the previously existing ARTIST tags and add the two X and Y ARTIST tags, then display the new
|
||||
tags without writing them to the Opus file:
|
||||
.PP
|
||||
@ -157,6 +161,14 @@ tags without writing them to the Opus file:
|
||||
Edit tags interactively in Vim:
|
||||
.PP
|
||||
EDITOR=vim opustags --in-place --edit file.opus
|
||||
.PP
|
||||
Replace all the tags in dest.opus with the ones from src.opus:
|
||||
.PP
|
||||
opustags src.opus | opustags --in-place dest.opus --set-all
|
||||
.PP
|
||||
Use GNU grep to remove all the CHAPTER* tags, with -z to support multi-line tags:
|
||||
.PP
|
||||
opustags -z file.opus | grep -z -v ^CHAPTER | opustags -z --in-place file.opus --set-all
|
||||
.SH CAVEATS
|
||||
.PP
|
||||
\fBopustags\fP currently has the following limitations:
|
||||
|
50
src/cli.cc
50
src/cli.cc
@ -41,6 +41,7 @@ Options:
|
||||
--vendor print the vendor string
|
||||
--set-vendor VALUE set the vendor string
|
||||
--raw disable encoding conversion
|
||||
-z delimit tags with NUL
|
||||
|
||||
See the man page for extensive documentation.
|
||||
)raw";
|
||||
@ -79,7 +80,7 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
|
||||
throw status {st::bad_arguments, "No arguments specified. Use -h for help."};
|
||||
int c;
|
||||
optind = 0;
|
||||
while ((c = getopt_long(argc, argv, ":ho:iyd:a:s:DSe", getopt_options, NULL)) != -1) {
|
||||
while ((c = getopt_long(argc, argv, ":ho:iyd:a:s:DSez", getopt_options, NULL)) != -1) {
|
||||
switch (c) {
|
||||
case 'h':
|
||||
opt.print_help = true;
|
||||
@ -139,6 +140,9 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
|
||||
case 'r':
|
||||
opt.raw = true;
|
||||
break;
|
||||
case 'z':
|
||||
opt.tag_delimiter = '\0';
|
||||
break;
|
||||
case ':':
|
||||
throw status {st::bad_arguments, "Missing value for option '"s + argv[optind - 1] + "'."};
|
||||
default:
|
||||
@ -226,17 +230,17 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
|
||||
|
||||
if (set_all) {
|
||||
// Read comments from stdin and prepend them to opt.to_add.
|
||||
std::list<std::u8string> comments = read_comments(comments_input, opt.raw);
|
||||
std::list<std::u8string> comments = read_comments(comments_input, opt);
|
||||
opt.to_add.splice(opt.to_add.begin(), std::move(comments));
|
||||
}
|
||||
return opt;
|
||||
}
|
||||
|
||||
/** Format a UTF-8 string by adding tabulations (\t) after line feeds (\n) to mark continuation for
|
||||
* multiline values. */
|
||||
static std::u8string format_value(const std::u8string& source)
|
||||
* multiline values. With -z, this behavior applies for embedded NUL characters instead of LF. */
|
||||
static std::u8string format_value(const std::u8string& source, const ot::options& opt)
|
||||
{
|
||||
auto newline_count = std::count(source.begin(), source.end(), u8'\n');
|
||||
auto newline_count = std::count(source.begin(), source.end(), opt.tag_delimiter);
|
||||
|
||||
// General case: the value fits on a single line. Use std::string’s copy constructor for the
|
||||
// most efficient copy we could hope for.
|
||||
@ -247,7 +251,7 @@ static std::u8string format_value(const std::u8string& source)
|
||||
formatted.reserve(source.size() + newline_count);
|
||||
for (auto c : source) {
|
||||
formatted.push_back(c);
|
||||
if (c == '\n')
|
||||
if (c == opt.tag_delimiter)
|
||||
formatted.push_back(u8'\t');
|
||||
}
|
||||
return formatted;
|
||||
@ -257,9 +261,9 @@ static std::u8string format_value(const std::u8string& source)
|
||||
* Convert the comment from UTF-8 to the system encoding if relevant, and print it with a trailing
|
||||
* line feed.
|
||||
*/
|
||||
static void puts_utf8(std::u8string_view str, FILE* output, bool raw)
|
||||
static void puts_utf8(std::u8string_view str, FILE* output, const ot::options& opt)
|
||||
{
|
||||
if (raw) {
|
||||
if (opt.raw) {
|
||||
fwrite(str.data(), 1, str.size(), output);
|
||||
} else {
|
||||
try {
|
||||
@ -270,7 +274,7 @@ static void puts_utf8(std::u8string_view str, FILE* output, bool raw)
|
||||
throw;
|
||||
}
|
||||
}
|
||||
putc('\n', output);
|
||||
putc(opt.tag_delimiter, output);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -279,7 +283,7 @@ static void puts_utf8(std::u8string_view str, FILE* output, bool raw)
|
||||
* To disambiguate between a newline embedded in a comment and a newline representing the start of
|
||||
* the next tag, continuation lines always have a single TAB (^I) character added to the beginning.
|
||||
*/
|
||||
void ot::print_comments(const std::list<std::u8string>& comments, FILE* output, bool raw)
|
||||
void ot::print_comments(const std::list<std::u8string>& comments, FILE* output, const ot::options& opt)
|
||||
{
|
||||
bool has_control = false;
|
||||
for (const std::u8string& source_comment : comments) {
|
||||
@ -291,14 +295,14 @@ void ot::print_comments(const std::list<std::u8string>& comments, FILE* output,
|
||||
}
|
||||
}
|
||||
}
|
||||
std::u8string utf8_comment = format_value(source_comment);
|
||||
puts_utf8(utf8_comment, output, raw);
|
||||
std::u8string utf8_comment = format_value(source_comment, opt);
|
||||
puts_utf8(utf8_comment, output, opt);
|
||||
}
|
||||
if (has_control)
|
||||
fputs("warning: Some tags contain control characters.\n", stderr);
|
||||
}
|
||||
|
||||
std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
|
||||
std::list<std::u8string> ot::read_comments(FILE* input, const ot::options& opt)
|
||||
{
|
||||
std::list<std::u8string> comments;
|
||||
comments.clear();
|
||||
@ -306,12 +310,12 @@ std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
|
||||
size_t buflen = 0;
|
||||
ssize_t nread;
|
||||
std::u8string* previous_comment = nullptr;
|
||||
while ((nread = getline(&source_line, &buflen, input)) != -1) {
|
||||
if (nread > 0 && source_line[nread - 1] == '\n')
|
||||
while ((nread = getdelim(&source_line, &buflen, opt.tag_delimiter, input)) != -1) {
|
||||
if (nread > 0 && source_line[nread - 1] == opt.tag_delimiter)
|
||||
--nread; // Chomp.
|
||||
|
||||
std::u8string line;
|
||||
if (raw) {
|
||||
if (opt.raw) {
|
||||
line = std::u8string(reinterpret_cast<char8_t*>(source_line), nread);
|
||||
} else {
|
||||
try {
|
||||
@ -335,7 +339,7 @@ std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
|
||||
free(source_line);
|
||||
throw rc;
|
||||
} else {
|
||||
line[0] = '\n';
|
||||
line[0] = opt.tag_delimiter;
|
||||
previous_comment->append(line);
|
||||
}
|
||||
} else if (line.find(u8'=') == decltype(line)::npos) {
|
||||
@ -391,7 +395,7 @@ static void edit_tags(ot::opus_tags& tags, const ot::options& opt)
|
||||
}
|
||||
|
||||
/** Spawn VISUAL or EDITOR to edit the given tags. */
|
||||
static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, bool raw)
|
||||
static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, const ot::options& opt)
|
||||
{
|
||||
const char* editor = nullptr;
|
||||
if (getenv("TERM") != nullptr)
|
||||
@ -410,7 +414,7 @@ static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std
|
||||
if (fd == -1 || (tags_file = fdopen(fd, "w")) == nullptr)
|
||||
throw ot::status {ot::st::standard_error,
|
||||
"Could not open '" + tags_path + "': " + strerror(errno)};
|
||||
ot::print_comments(tags.comments, tags_file.get(), raw);
|
||||
ot::print_comments(tags.comments, tags_file.get(), opt);
|
||||
tags_file.reset();
|
||||
|
||||
// Spawn the editor, and watch the modification timestamps.
|
||||
@ -441,7 +445,7 @@ static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std
|
||||
if (tags_file == nullptr)
|
||||
throw ot::status {ot::st::standard_error, "Error opening " + tags_path + ": " + strerror(errno)};
|
||||
try {
|
||||
tags.comments = ot::read_comments(tags_file.get(), raw);
|
||||
tags.comments = ot::read_comments(tags_file.get(), opt);
|
||||
} catch (const ot::status& rc) {
|
||||
fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str());
|
||||
throw;
|
||||
@ -524,7 +528,7 @@ static void process(ot::ogg_reader& reader, ot::ogg_writer* writer, const ot::op
|
||||
if (writer) {
|
||||
if (opt.edit_interactively) {
|
||||
fflush(writer->file); // flush before calling the subprocess
|
||||
edit_tags_interactively(tags, writer->path, opt.raw);
|
||||
edit_tags_interactively(tags, writer->path, opt);
|
||||
}
|
||||
auto packet = ot::render_tags(tags);
|
||||
writer->write_header_packet(serialno, pageno, packet);
|
||||
@ -532,9 +536,9 @@ static void process(ot::ogg_reader& reader, ot::ogg_writer* writer, const ot::op
|
||||
} else {
|
||||
if (opt.cover_out != "-") {
|
||||
if (opt.print_vendor)
|
||||
puts_utf8(tags.vendor, stdout, opt.raw);
|
||||
puts_utf8(tags.vendor, stdout, opt);
|
||||
else
|
||||
ot::print_comments(tags.comments, stdout, opt.raw);
|
||||
ot::print_comments(tags.comments, stdout, opt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -534,6 +534,13 @@ struct options {
|
||||
* extract and set as-is, encoding conversion would get in the way.
|
||||
*/
|
||||
bool raw = false;
|
||||
/**
|
||||
* In text mode (default), tags are separated by a line feed. However, when combining
|
||||
* opustags with grep or other line-based tools, this proves to be a bad separator because
|
||||
* tag values may contain newlines. Changing the delimiter to '\0' with -z eases the
|
||||
* processing of multi-line tags with other tools that support null-terminated lines.
|
||||
*/
|
||||
char tag_delimiter = '\n';
|
||||
};
|
||||
|
||||
/**
|
||||
@ -551,13 +558,13 @@ options parse_options(int argc, char** argv, FILE* comments);
|
||||
*
|
||||
* The output generated is meant to be parseable by #ot::read_comments.
|
||||
*/
|
||||
void print_comments(const std::list<std::u8string>& comments, FILE* output, bool raw);
|
||||
void print_comments(const std::list<std::u8string>& comments, FILE* output, const options& opt);
|
||||
|
||||
/**
|
||||
* Parse the comments outputted by #ot::print_comments. Unless raw is true, the comments are
|
||||
* converted from the system encoding to UTF-8, and returned as UTF-8.
|
||||
*/
|
||||
std::list<std::u8string> read_comments(FILE* input, bool raw);
|
||||
std::list<std::u8string> read_comments(FILE* input, const options& opt);
|
||||
|
||||
/**
|
||||
* Remove all comments matching the specified selector, which may either be a field name or a
|
||||
|
4
t/cli.cc
4
t/cli.cc
@ -5,8 +5,10 @@
|
||||
|
||||
static ot::status read_comments(FILE* input, std::list<std::u8string>& comments, bool raw)
|
||||
{
|
||||
ot::options opt;
|
||||
opt.raw = raw;
|
||||
try {
|
||||
comments = ot::read_comments(input, raw);
|
||||
comments = ot::read_comments(input, opt);
|
||||
} catch (const ot::status& rc) {
|
||||
return rc;
|
||||
}
|
||||
|
17
t/opustags.t
17
t/opustags.t
@ -4,7 +4,7 @@ use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More tests => 62;
|
||||
use Test::More tests => 66;
|
||||
use Test::Deep qw(cmp_deeply re);
|
||||
|
||||
use Digest::MD5;
|
||||
@ -327,3 +327,18 @@ is_deeply(opustags(qw(--vendor gobble.opus)), ["Lavf58.12.100\n", '', 0], 'print
|
||||
is_deeply(opustags(qw(--set-vendor opustags gobble.opus -o out.opus)), ['', '', 0], 'set the vendor string');
|
||||
is_deeply(opustags(qw(--vendor out.opus)), ["opustags\n", '', 0], 'the vendor string was updated');
|
||||
unlink('out.opus');
|
||||
|
||||
####################################################################################################
|
||||
# Multi-line tags
|
||||
|
||||
is_deeply(opustags(qw(--set-all gobble.opus -o out.opus), { in => "MULTILINE=one\n\ttwo\nSIMPLE=three\n" }), ['', '', 0], 'parses continuation lines');
|
||||
is_deeply(opustags(qw(out.opus -z)), ["MULTILINE=one\ntwo\0SIMPLE=three\0", '', 0], 'delimits output with NUL on -z');
|
||||
unlink('out.opus');
|
||||
|
||||
is_deeply(opustags(qw(--set-all gobble.opus -o out.opus -z), { in => "MULTILINE=one\ntwo\0SIMPLE=three\0" }), ['', '', 0], 'delimits input with NUL on -z');
|
||||
is_deeply(opustags(qw(out.opus)), [<<'END', '', 0], 'indents continuation lines');
|
||||
MULTILINE=one
|
||||
two
|
||||
SIMPLE=three
|
||||
END
|
||||
unlink('out.opus');
|
||||
|
Loading…
Reference in New Issue
Block a user