Support NUL delimiters with -z

This commit is contained in:
Frédéric Mangano 2024-04-30 12:17:00 +09:00
parent 7ec3551f62
commit 0067162ffb
6 changed files with 73 additions and 32 deletions

View File

@ -69,5 +69,6 @@ Documentation
--vendor print the vendor string
--set-vendor VALUE set the vendor string
--raw disable encoding conversion
-z delimit tags with NUL
See the man page, `opustags.1`, for extensive documentation.

View File

@ -1,4 +1,4 @@
.TH opustags 1 "March 2023" "@PROJECT_NAME@ @PROJECT_VERSION@"
.TH opustags 1 "April 2024" "@PROJECT_NAME@ @PROJECT_VERSION@"
.SH NAME
opustags \- Ogg Opus tag editor
.SH SYNOPSIS
@ -135,6 +135,14 @@ corrupted or possibly even contain intentional binary data. In that case, --raw
kind of binary data without ensuring the validity of the tags encoding. This option may also be
useful when your system encoding is different from UTF-8 and you wish to preserve the full UTF-8
character set even though your system cannot display it.
.TP
.B \-z
When editing tags programmatically with line-based tools like grep or sed, tags containing newlines
are likely to corrupt the result because these tools wont interpret multi-line tags as a whole. To
make automatic processing easier, \fB-z\fP delimits tags by a null byte (ASCII NUL) instead of line
feeds. That same \fB-z\fP flag is also supported by GNU grep or GNU sed and, combined with opustags
-z, would make them process the input tag-by-tag instead of line-by-line, thus supporting multi-line
tags as well. This option also disables the TAB prefix for continuation lines after a line feed.
.SH EXAMPLES
.PP
List all the tags in file foo.opus:
@ -145,10 +153,6 @@ Copy in.opus to out.opus, with the TITLE tag added:
.PP
opustags in.opus --output out.opus --add "TITLE=Hello world!"
.PP
Replace all the tags in dest.opus with the ones from src.opus:
.PP
opustags src.opus | opustags --in-place dest.opus --set-all
.PP
Remove the previously existing ARTIST tags and add the two X and Y ARTIST tags, then display the new
tags without writing them to the Opus file:
.PP
@ -157,6 +161,14 @@ tags without writing them to the Opus file:
Edit tags interactively in Vim:
.PP
EDITOR=vim opustags --in-place --edit file.opus
.PP
Replace all the tags in dest.opus with the ones from src.opus:
.PP
opustags src.opus | opustags --in-place dest.opus --set-all
.PP
Use GNU grep to remove all the CHAPTER* tags, with -z to support multi-line tags:
.PP
opustags -z file.opus | grep -z -v ^CHAPTER | opustags -z --in-place file.opus --set-all
.SH CAVEATS
.PP
\fBopustags\fP currently has the following limitations:

View File

@ -41,6 +41,7 @@ Options:
--vendor print the vendor string
--set-vendor VALUE set the vendor string
--raw disable encoding conversion
-z delimit tags with NUL
See the man page for extensive documentation.
)raw";
@ -79,7 +80,7 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
throw status {st::bad_arguments, "No arguments specified. Use -h for help."};
int c;
optind = 0;
while ((c = getopt_long(argc, argv, ":ho:iyd:a:s:DSe", getopt_options, NULL)) != -1) {
while ((c = getopt_long(argc, argv, ":ho:iyd:a:s:DSez", getopt_options, NULL)) != -1) {
switch (c) {
case 'h':
opt.print_help = true;
@ -139,6 +140,9 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
case 'r':
opt.raw = true;
break;
case 'z':
opt.tag_delimiter = '\0';
break;
case ':':
throw status {st::bad_arguments, "Missing value for option '"s + argv[optind - 1] + "'."};
default:
@ -226,17 +230,17 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
if (set_all) {
// Read comments from stdin and prepend them to opt.to_add.
std::list<std::u8string> comments = read_comments(comments_input, opt.raw);
std::list<std::u8string> comments = read_comments(comments_input, opt);
opt.to_add.splice(opt.to_add.begin(), std::move(comments));
}
return opt;
}
/** Format a UTF-8 string by adding tabulations (\t) after line feeds (\n) to mark continuation for
* multiline values. */
static std::u8string format_value(const std::u8string& source)
* multiline values. With -z, this behavior applies for embedded NUL characters instead of LF. */
static std::u8string format_value(const std::u8string& source, const ot::options& opt)
{
auto newline_count = std::count(source.begin(), source.end(), u8'\n');
auto newline_count = std::count(source.begin(), source.end(), opt.tag_delimiter);
// General case: the value fits on a single line. Use std::strings copy constructor for the
// most efficient copy we could hope for.
@ -247,7 +251,7 @@ static std::u8string format_value(const std::u8string& source)
formatted.reserve(source.size() + newline_count);
for (auto c : source) {
formatted.push_back(c);
if (c == '\n')
if (c == opt.tag_delimiter)
formatted.push_back(u8'\t');
}
return formatted;
@ -257,9 +261,9 @@ static std::u8string format_value(const std::u8string& source)
* Convert the comment from UTF-8 to the system encoding if relevant, and print it with a trailing
* line feed.
*/
static void puts_utf8(std::u8string_view str, FILE* output, bool raw)
static void puts_utf8(std::u8string_view str, FILE* output, const ot::options& opt)
{
if (raw) {
if (opt.raw) {
fwrite(str.data(), 1, str.size(), output);
} else {
try {
@ -270,7 +274,7 @@ static void puts_utf8(std::u8string_view str, FILE* output, bool raw)
throw;
}
}
putc('\n', output);
putc(opt.tag_delimiter, output);
}
/**
@ -279,7 +283,7 @@ static void puts_utf8(std::u8string_view str, FILE* output, bool raw)
* To disambiguate between a newline embedded in a comment and a newline representing the start of
* the next tag, continuation lines always have a single TAB (^I) character added to the beginning.
*/
void ot::print_comments(const std::list<std::u8string>& comments, FILE* output, bool raw)
void ot::print_comments(const std::list<std::u8string>& comments, FILE* output, const ot::options& opt)
{
bool has_control = false;
for (const std::u8string& source_comment : comments) {
@ -291,14 +295,14 @@ void ot::print_comments(const std::list<std::u8string>& comments, FILE* output,
}
}
}
std::u8string utf8_comment = format_value(source_comment);
puts_utf8(utf8_comment, output, raw);
std::u8string utf8_comment = format_value(source_comment, opt);
puts_utf8(utf8_comment, output, opt);
}
if (has_control)
fputs("warning: Some tags contain control characters.\n", stderr);
}
std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
std::list<std::u8string> ot::read_comments(FILE* input, const ot::options& opt)
{
std::list<std::u8string> comments;
comments.clear();
@ -306,12 +310,12 @@ std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
size_t buflen = 0;
ssize_t nread;
std::u8string* previous_comment = nullptr;
while ((nread = getline(&source_line, &buflen, input)) != -1) {
if (nread > 0 && source_line[nread - 1] == '\n')
while ((nread = getdelim(&source_line, &buflen, opt.tag_delimiter, input)) != -1) {
if (nread > 0 && source_line[nread - 1] == opt.tag_delimiter)
--nread; // Chomp.
std::u8string line;
if (raw) {
if (opt.raw) {
line = std::u8string(reinterpret_cast<char8_t*>(source_line), nread);
} else {
try {
@ -335,7 +339,7 @@ std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
free(source_line);
throw rc;
} else {
line[0] = '\n';
line[0] = opt.tag_delimiter;
previous_comment->append(line);
}
} else if (line.find(u8'=') == decltype(line)::npos) {
@ -391,7 +395,7 @@ static void edit_tags(ot::opus_tags& tags, const ot::options& opt)
}
/** Spawn VISUAL or EDITOR to edit the given tags. */
static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, bool raw)
static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, const ot::options& opt)
{
const char* editor = nullptr;
if (getenv("TERM") != nullptr)
@ -410,7 +414,7 @@ static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std
if (fd == -1 || (tags_file = fdopen(fd, "w")) == nullptr)
throw ot::status {ot::st::standard_error,
"Could not open '" + tags_path + "': " + strerror(errno)};
ot::print_comments(tags.comments, tags_file.get(), raw);
ot::print_comments(tags.comments, tags_file.get(), opt);
tags_file.reset();
// Spawn the editor, and watch the modification timestamps.
@ -441,7 +445,7 @@ static void edit_tags_interactively(ot::opus_tags& tags, const std::optional<std
if (tags_file == nullptr)
throw ot::status {ot::st::standard_error, "Error opening " + tags_path + ": " + strerror(errno)};
try {
tags.comments = ot::read_comments(tags_file.get(), raw);
tags.comments = ot::read_comments(tags_file.get(), opt);
} catch (const ot::status& rc) {
fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str());
throw;
@ -524,7 +528,7 @@ static void process(ot::ogg_reader& reader, ot::ogg_writer* writer, const ot::op
if (writer) {
if (opt.edit_interactively) {
fflush(writer->file); // flush before calling the subprocess
edit_tags_interactively(tags, writer->path, opt.raw);
edit_tags_interactively(tags, writer->path, opt);
}
auto packet = ot::render_tags(tags);
writer->write_header_packet(serialno, pageno, packet);
@ -532,9 +536,9 @@ static void process(ot::ogg_reader& reader, ot::ogg_writer* writer, const ot::op
} else {
if (opt.cover_out != "-") {
if (opt.print_vendor)
puts_utf8(tags.vendor, stdout, opt.raw);
puts_utf8(tags.vendor, stdout, opt);
else
ot::print_comments(tags.comments, stdout, opt.raw);
ot::print_comments(tags.comments, stdout, opt);
}
break;
}

View File

@ -534,6 +534,13 @@ struct options {
* extract and set as-is, encoding conversion would get in the way.
*/
bool raw = false;
/**
* In text mode (default), tags are separated by a line feed. However, when combining
* opustags with grep or other line-based tools, this proves to be a bad separator because
* tag values may contain newlines. Changing the delimiter to '\0' with -z eases the
* processing of multi-line tags with other tools that support null-terminated lines.
*/
char tag_delimiter = '\n';
};
/**
@ -551,13 +558,13 @@ options parse_options(int argc, char** argv, FILE* comments);
*
* The output generated is meant to be parseable by #ot::read_comments.
*/
void print_comments(const std::list<std::u8string>& comments, FILE* output, bool raw);
void print_comments(const std::list<std::u8string>& comments, FILE* output, const options& opt);
/**
* Parse the comments outputted by #ot::print_comments. Unless raw is true, the comments are
* converted from the system encoding to UTF-8, and returned as UTF-8.
*/
std::list<std::u8string> read_comments(FILE* input, bool raw);
std::list<std::u8string> read_comments(FILE* input, const options& opt);
/**
* Remove all comments matching the specified selector, which may either be a field name or a

View File

@ -5,8 +5,10 @@
static ot::status read_comments(FILE* input, std::list<std::u8string>& comments, bool raw)
{
ot::options opt;
opt.raw = raw;
try {
comments = ot::read_comments(input, raw);
comments = ot::read_comments(input, opt);
} catch (const ot::status& rc) {
return rc;
}

View File

@ -4,7 +4,7 @@ use strict;
use warnings;
use utf8;
use Test::More tests => 62;
use Test::More tests => 66;
use Test::Deep qw(cmp_deeply re);
use Digest::MD5;
@ -327,3 +327,18 @@ is_deeply(opustags(qw(--vendor gobble.opus)), ["Lavf58.12.100\n", '', 0], 'print
is_deeply(opustags(qw(--set-vendor opustags gobble.opus -o out.opus)), ['', '', 0], 'set the vendor string');
is_deeply(opustags(qw(--vendor out.opus)), ["opustags\n", '', 0], 'the vendor string was updated');
unlink('out.opus');
####################################################################################################
# Multi-line tags
is_deeply(opustags(qw(--set-all gobble.opus -o out.opus), { in => "MULTILINE=one\n\ttwo\nSIMPLE=three\n" }), ['', '', 0], 'parses continuation lines');
is_deeply(opustags(qw(out.opus -z)), ["MULTILINE=one\ntwo\0SIMPLE=three\0", '', 0], 'delimits output with NUL on -z');
unlink('out.opus');
is_deeply(opustags(qw(--set-all gobble.opus -o out.opus -z), { in => "MULTILINE=one\ntwo\0SIMPLE=three\0" }), ['', '', 0], 'delimits input with NUL on -z');
is_deeply(opustags(qw(out.opus)), [<<'END', '', 0], 'indents continuation lines');
MULTILINE=one
two
SIMPLE=three
END
unlink('out.opus');