30 Commits
1.4.0 ... 1.6.0

Author SHA1 Message Date
2d5db09bda Release 1.6.0 2021-01-01 11:41:03 +01:00
3e0b3fa56e Make encoding errors fatal
With --raw there is a workaround.

The tolerant approach was cool and nice until you want to edit something
non-interactively and get the warning telling you you might have lost
data after the file was written. Failing fast is most likely the better
option here.
2020-12-27 10:55:25 +01:00
3e7b42062a Discard incompatible comments entirely
//IGNORE is not portable either. Now that we have --raw it’s less an
issue though.
2020-12-27 10:55:20 +01:00
4cae6c44ee Introduce --raw for disabling transcoding 2020-12-26 16:51:36 +01:00
6db7f07bd5 Factor CLI argument transcoding 2020-12-26 13:00:20 +01:00
fd5fa3cd5f Make ot::encoding_converter use string views 2020-12-26 12:42:37 +01:00
c43704a0a7 Use //IGNORE instead of //TRANSLIT when transcoding
//TRANSLIT is not a well supported, and in most cases there’s not much
transliteration can help with when the encoding is limiting. Besides,
it sounds reasonable to assume most people use UTF-8 nowadays.
2020-12-26 12:30:44 +01:00
f98208c1a1 Support the various stat structures across systems 2020-11-25 20:07:23 +01:00
64fc6f8f6d Include config.h globally 2020-11-25 20:05:46 +01:00
1d03da324c Release 1.5.1 2020-11-21 11:05:56 +01:00
30b7f44ead Include endian.h or sys/endian.h depending on the platform 2020-11-14 20:27:08 +01:00
b8c8be453f Include headers for mkstemps
Linux requires <stdlib.h>, but FreeBSD requires <unistd.h>.
2020-11-14 18:18:42 +01:00
4a1b8705cc Release 1.5.0 2020-11-08 10:32:46 +01:00
7c8396ca45 run_editor: Pass the editor command through the shell
wordexp doesn’t work on OpenBSD, and escaping the path ourselves then
calling system() is actually easier than using wordexp.
2020-11-01 11:57:48 +01:00
639d46ed0f Introduce ot::shell_escape 2020-11-01 10:41:24 +01:00
d54bada7e6 Open handles with O_CLOEXEC
opustags’s only use of a sub-process is for spawning the EDITOR, and we
don’t want it to access our file handles.
2020-10-31 18:44:46 +01:00
57a4c0d5a0 Flush the writer before exec’ing
In the unlikely event the child process fails without exec’ing, we don’t
want both the child process and parent process to flush the OpusHead
header.

Thanks @omar-polo for reporting this!
2020-10-31 18:44:46 +01:00
d071b6cabd Fix error reporting when EDITOR fails 2020-10-31 18:10:33 +01:00
d8c36a3d3f Forbid mixing --edit with non-interactive edition options 2020-10-31 12:15:01 +01:00
ba2236facb Cancel --edit when the editor closes without saving 2020-10-31 12:11:26 +01:00
b3b092d241 Expand EDITOR/VISUAL with wordexp 2020-10-25 11:09:18 +01:00
8f0f29c056 Support VISUAL with --edit 2020-10-24 12:00:43 +02:00
e4ca6ca6ef Introduce the --edit option 2020-10-12 07:55:27 +02:00
df03cdf951 Introduce ot::execute_process 2020-10-11 18:06:40 +02:00
8252f94084 --set-all: Ignore comments starting with # 2020-10-11 18:06:39 +02:00
a1dcc8c47e Fix print_comments when output is not stdout 2020-10-11 17:43:04 +02:00
7206604f85 Make read_comments work on std::list
For consistency with ot::opus_tags.
2020-10-11 17:43:04 +02:00
6da5545b30 Flatten option compatibility checking
The more options we have the more nested it gets. It was getting
complicated.
2020-10-11 17:40:52 +02:00
537094fd53 use CMake’s FindIconv to detect iconv portably 2020-10-10 15:20:19 +02:00
be9740fe05 Explicitely include <optional>
It should have been included since we use std::optional, and not
including it breaks the build on OpenBSD.
2020-10-10 15:10:59 +02:00
13 changed files with 459 additions and 115 deletions

View File

@ -1,6 +1,25 @@
opustags changelog
==================
1.6.0 - 2021-01-01
------------------
- UTF-8 conversion errors are now fatal.
- Introduce --raw for disabling encoding conversions.
- Improve platform compatibility.
This also happens to be opustagss 8-year anniversary!
1.5.1 - 2020-11-21
------------------
- Improve BSD support.
1.5.0 - 2020-11-08
------------------
- Introduce --edit for interactive edition.
1.4.0 - 2020-10-04
------------------

View File

@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.9)
project(
opustags
VERSION 1.4.0
VERSION 1.6.0
LANGUAGES CXX
)
@ -19,8 +19,19 @@ pkg_check_modules(OGG REQUIRED ogg)
add_compile_options(${OGG_CFLAGS})
link_directories(${OGG_LIBRARY_DIRS})
include(FindIconv)
# We need endian.h on Linux, and sys/endian.h on BSD.
include(CheckIncludeFileCXX)
check_include_file_cxx(endian.h HAVE_ENDIAN_H)
check_include_file_cxx(sys/endian.h HAVE_SYS_ENDIAN_H)
include(CheckStructHasMember)
check_struct_has_member("struct stat" st_mtim sys/stat.h HAVE_STAT_ST_MTIM LANGUAGE CXX)
check_struct_has_member("struct stat" st_mtimespec sys/stat.h HAVE_STAT_ST_MTIMESPEC LANGUAGE CXX)
configure_file(src/config.h.in config.h @ONLY)
include_directories(BEFORE src "${CMAKE_BINARY_DIR}" ${OGG_INCLUDE_DIRS})
include_directories(BEFORE src "${CMAKE_BINARY_DIR}" ${OGG_INCLUDE_DIRS} ${Iconv_INCLUDE_DIRS})
add_library(
ot
@ -30,11 +41,7 @@ add_library(
src/opus.cc
src/system.cc
)
target_link_libraries(ot PUBLIC ${OGG_LIBRARIES})
if (APPLE)
target_link_libraries(ot PUBLIC iconv)
endif()
target_link_libraries(ot PUBLIC ${OGG_LIBRARIES} ${Iconv_LIBRARIES})
add_executable(opustags src/opustags.cc)
target_link_libraries(opustags ot)

View File

@ -60,7 +60,6 @@ More generally, here are a few features that could be added in the future:
- Logicial stream listing and selection for multiplexed files.
- Escaping control characters with --escape.
- Dump binary packets with --binary.
- Skip encoding conversion with --raw.
- Edition of the vendor string.
- Edition of the arbitrary binary block past the comments.
- Support for OpusTags packets spanning multiple pages (> 64 kB).

View File

@ -61,5 +61,7 @@ Documentation
-D, --delete-all delete all the previously existing comments
-s, --set FIELD=VALUE replace a comment
-S, --set-all import comments from standard input
-e, --edit edit tags interactively in VISUAL/EDITOR
--raw disable encoding conversion
See the man page, `opustags.1`, for extensive documentation.

View File

@ -97,7 +97,19 @@ Delete all the previously existing tags.
Sets the tags from scratch.
All the original tags are deleted and new ones are read from standard input.
Each line must specify a \fIFIELD=VALUE\fP pair and be separated with line feeds.
Blank lines are ignored.
Blank lines and lines starting with \fI#\fP are ignored.
.TP
.B \-e, \-\-edit
Edit tags interactively by spawning the program specified by the EDITOR
environment variable. The allowed format is the same as \fB--set-all\fP.
If TERM and VISUAL are set, VISUAL takes precedence over EDITOR.
.TP
.B \-\-raw
OpusTags metadata should always be encoded in UTF-8, as per RFC 7845. However, some files may be
corrupted or possibly even contain intentional binary data. In that case, --raw lets you edit that
kind of binary data without ensuring the validity of the tags encoding. This option may also be
useful when your system encoding is different from UTF-8 and you wish to preserve the full UTF-8
character set even though your system cannot display it.
.SH EXAMPLES
.PP
List all the tags in file foo.opus:
@ -116,6 +128,10 @@ Remove the previously existing ARTIST tags and add the two X and Y ARTIST tags,
tags without writing them to the Opus file:
.PP
opustags in.opus --add ARTIST=X --add ARTIST=Y --delete ARTIST
.PP
Edit tags interactively in Vim:
.PP
EDITOR=vim opustags --in-place --edit file.opus
.SH CAVEATS
.PP
\fBopustags\fP currently has the following limitations:

View File

@ -6,14 +6,15 @@
* this module from the main one is to allow easy testing.
*/
#include <config.h>
#include <opustags.h>
#include <errno.h>
#include <getopt.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
using namespace std::literals::string_literals;
@ -36,6 +37,8 @@ Options:
-D, --delete-all delete all the previously existing comments
-s, --set FIELD=VALUE replace a comment
-S, --set-all import comments from standard input
-e, --edit edit tags interactively in VISUAL/EDITOR
--raw disable encoding conversion
See the man page for extensive documentation.
)raw";
@ -50,6 +53,8 @@ static struct option getopt_options[] = {
{"set", required_argument, 0, 's'},
{"delete-all", no_argument, 0, 'D'},
{"set-all", no_argument, 0, 'S'},
{"edit", no_argument, 0, 'e'},
{"raw", no_argument, 0, 'r'},
{NULL, 0, 0, 0}
};
@ -57,7 +62,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
{
static ot::encoding_converter to_utf8("", "UTF-8");
std::string utf8;
std::string::size_type equal;
const char* equal;
ot::status rc;
bool set_all = false;
opt = {};
@ -65,7 +70,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
return {st::bad_arguments, "No arguments specified. Use -h for help."};
int c;
optind = 0;
while ((c = getopt_long(argc, argv, ":ho:iyd:a:s:DS", getopt_options, NULL)) != -1) {
while ((c = getopt_long(argc, argv, ":ho:iyd:a:s:DSe", getopt_options, NULL)) != -1) {
switch (c) {
case 'h':
opt.print_help = true;
@ -77,26 +82,22 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
break;
case 'i':
opt.in_place = true;
opt.overwrite = true;
break;
case 'y':
opt.overwrite = true;
break;
case 'd':
rc = to_utf8(optarg, strlen(optarg), utf8);
if (rc != ot::st::ok)
return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
opt.to_delete.emplace_back(std::move(utf8));
opt.to_delete.emplace_back(optarg);
break;
case 'a':
case 's':
rc = to_utf8(optarg, strlen(optarg), utf8);
if (rc != ot::st::ok)
return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
if ((equal = utf8.find('=')) == std::string::npos)
equal = strchr(optarg, '=');
if (equal == nullptr)
return {st::bad_arguments, "Comment does not contain an equal sign: "s + optarg + "."};
if (c == 's')
opt.to_delete.emplace_back(utf8.substr(0, equal));
opt.to_add.emplace_back(std::move(utf8));
opt.to_delete.emplace_back(optarg, equal - optarg);
opt.to_add.emplace_back(optarg);
break;
case 'S':
opt.delete_all = true;
@ -105,6 +106,12 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
case 'D':
opt.delete_all = true;
break;
case 'e':
opt.edit_interactively = true;
break;
case 'r':
opt.raw = true;
break;
case ':':
return {st::bad_arguments,
"Missing value for option '"s + argv[optind - 1] + "'."};
@ -115,32 +122,54 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
}
if (opt.print_help)
return st::ok;
if (opt.in_place) {
if (opt.path_out)
return {st::bad_arguments, "Cannot combine --in-place and --output."};
opt.overwrite = true;
for (int i = optind; i < argc; i++) {
if (strcmp(argv[i], "-") == 0)
return {st::bad_arguments, "Cannot modify standard input in place."};
opt.paths_in.emplace_back(argv[i]);
}
} else {
if (optind != argc - 1)
return {st::bad_arguments, "Exactly one input file must be specified."};
if (set_all && strcmp(argv[optind], "-") == 0)
return {st::bad_arguments,
"Cannot use standard input as input file when --set-all is specified."};
opt.paths_in.emplace_back(argv[optind]);
// All non-option arguments are input files.
bool stdin_as_input = false;
for (int i = optind; i < argc; i++) {
stdin_as_input = stdin_as_input || strcmp(argv[i], "-") == 0;
opt.paths_in.emplace_back(argv[i]);
}
// Convert arguments to UTF-8.
if (!opt.raw) {
for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) {
for (std::string& arg : *args) {
rc = to_utf8(arg, utf8);
if (rc != ot::st::ok)
return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
arg = std::move(utf8);
}
}
}
if (opt.in_place && opt.path_out)
return {st::bad_arguments, "Cannot combine --in-place and --output."};
if (opt.in_place && stdin_as_input)
return {st::bad_arguments, "Cannot modify standard input in place."};
if ((!opt.in_place || opt.edit_interactively) && opt.paths_in.size() != 1)
return {st::bad_arguments, "Exactly one input file must be specified."};
if (set_all && stdin_as_input)
return {st::bad_arguments, "Cannot use standard input as input file when --set-all is specified."};
if (opt.edit_interactively && (stdin_as_input || opt.path_out == "-"))
return {st::bad_arguments, "Cannot edit interactively when standard input or standard output are already used."};
if (opt.edit_interactively && !opt.path_out.has_value() && !opt.in_place)
return {st::bad_arguments, "Cannot edit interactively when no output is specified."};
if (opt.edit_interactively && (opt.delete_all || !opt.to_add.empty() || !opt.to_delete.empty()))
return {st::bad_arguments, "Cannot mix --edit with -adDsS."};
if (set_all) {
// Read comments from stdin and prepend them to opt.to_add.
std::vector<std::string> comments;
auto rc = read_comments(comments_input, comments);
std::list<std::string> comments;
auto rc = read_comments(comments_input, comments, opt.raw);
if (rc != st::ok)
return rc;
comments.reserve(comments.size() + opt.to_add.size());
std::move(opt.to_add.begin(), opt.to_add.end(), std::back_inserter(comments));
opt.to_add = std::move(comments);
opt.to_add.splice(opt.to_add.begin(), std::move(comments));
}
return st::ok;
}
@ -152,43 +181,43 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
* callers that dont escape backslashes. Maybe add options to select a mode between simple,
* raw, and escaped.
*/
void ot::print_comments(const std::list<std::string>& comments, FILE* output)
ot::status ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw)
{
static ot::encoding_converter from_utf8("UTF-8", "//TRANSLIT");
static ot::encoding_converter from_utf8("UTF-8", "");
std::string local;
bool info_lost = false;
bool bad_comments = false;
bool has_newline = false;
bool has_control = false;
for (const std::string& comment : comments) {
ot::status rc = from_utf8(comment, local);
if (rc == ot::st::information_lost) {
info_lost = true;
} else if (rc != ot::st::ok) {
bad_comments = true;
continue;
for (const std::string& utf8_comment : comments) {
const std::string* comment;
// Convert the comment from UTF-8 to the system encoding if relevant.
if (raw) {
comment = &utf8_comment;
} else {
ot::status rc = from_utf8(utf8_comment, local);
comment = &local;
if (rc != ot::st::ok) {
rc.message += " See --raw.";
return rc;
}
}
for (unsigned char c : comment) {
for (unsigned char c : *comment) {
if (c == '\n')
has_newline = true;
else if (c < 0x20)
has_control = true;
}
fwrite(local.data(), 1, local.size(), output);
putchar('\n');
fwrite(comment->data(), 1, comment->size(), output);
putc('\n', output);
}
if (info_lost)
fputs("warning: Some tags have been transliterated to your system encoding.\n", stderr);
if (bad_comments)
fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr);
if (has_newline)
fputs("warning: Some tags contain newline characters. "
"These are not supported by --set-all.\n", stderr);
fputs("warning: Some tags contain unsupported newline characters.\n", stderr);
if (has_control)
fputs("warning: Some tags contain control characters.\n", stderr);
return st::ok;
}
ot::status ot::read_comments(FILE* input, std::vector<std::string>& comments)
ot::status ot::read_comments(FILE* input, std::list<std::string>& comments, bool raw)
{
static ot::encoding_converter to_utf8("", "UTF-8");
comments.clear();
@ -200,18 +229,24 @@ ot::status ot::read_comments(FILE* input, std::vector<std::string>& comments)
--nread;
if (nread == 0)
continue;
if (line[0] == '#') // comment
continue;
if (memchr(line, '=', nread) == nullptr) {
ot::status rc = {ot::st::error, "Malformed tag: " + std::string(line, nread)};
free(line);
return rc;
}
std::string utf8;
ot::status rc = to_utf8(line, nread, utf8);
if (rc == ot::st::ok) {
comments.emplace_back(std::move(utf8));
if (raw) {
comments.emplace_back(line, nread);
} else {
free(line);
return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
std::string utf8;
ot::status rc = to_utf8(std::string_view(line, nread), utf8);
if (rc == ot::st::ok) {
comments.emplace_back(std::move(utf8));
} else {
free(line);
return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
}
}
}
free(line);
@ -256,6 +291,68 @@ static ot::status edit_tags(ot::opus_tags& tags, const ot::options& opt)
return ot::st::ok;
}
/** Spawn VISUAL or EDITOR to edit the given tags. */
static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, bool raw)
{
const char* editor = nullptr;
if (getenv("TERM") != nullptr)
editor = getenv("VISUAL");
if (editor == nullptr) // without a terminal, or if VISUAL is unset
editor = getenv("EDITOR");
if (editor == nullptr)
return {ot::st::error,
"No editor specified in environment variable VISUAL or EDITOR."};
// Building the temporary tags file.
ot::status rc;
std::string tags_path = base_path.value_or("tags") + ".XXXXXX.opustags";
int fd = mkstemps(const_cast<char*>(tags_path.data()), 9);
ot::file tags_file;
if (fd == -1 || (tags_file = fdopen(fd, "w")) == nullptr)
return {ot::st::standard_error,
"Could not open '" + tags_path + "': " + strerror(errno)};
if ((rc = ot::print_comments(tags.comments, tags_file.get(), raw)) != ot::st::ok)
return rc;
tags_file.reset();
// Spawn the editor, and watch the modification timestamps.
timespec before, after;
if ((rc = ot::get_file_timestamp(tags_path.c_str(), before)) != ot::st::ok)
return rc;
ot::status editor_rc = ot::run_editor(editor, tags_path);
if ((rc = ot::get_file_timestamp(tags_path.c_str(), after)) != ot::st::ok)
return rc; // probably because the file was deleted
bool modified = (before.tv_sec != after.tv_sec || before.tv_nsec != after.tv_nsec);
if (editor_rc != ot::st::ok) {
if (modified)
fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str());
else
remove(tags_path.c_str());
return editor_rc;
} else if (!modified) {
remove(tags_path.c_str());
fputs("Cancelling edition because the tags file was not modified.\n", stderr);
return ot::st::cancel;
}
// Applying the new tags.
tags_file = fopen(tags_path.c_str(), "re");
if (tags_file == nullptr)
return {ot::st::standard_error, "Error opening " + tags_path + ": " + strerror(errno)};
if ((rc = ot::read_comments(tags_file.get(), tags.comments, raw)) != ot::st::ok) {
fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str());
return rc;
}
tags_file.reset();
// Remove the temporary tags file only on success, because unlike the
// partial Ogg file that is irrecoverable, the edited tags file
// contains user data, so lets leave users a chance to recover it.
remove(tags_path.c_str());
return ot::st::ok;
}
/**
* Main loop of opustags. Read the packets from the reader, and forwards them to the writer.
* Transform the OpusTags packet on the fly.
@ -302,12 +399,18 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const
if ((rc = edit_tags(tags, opt)) != ot::st::ok)
return rc;
if (writer) {
if (opt.edit_interactively) {
fflush(writer->file); // flush before calling the subprocess
if ((rc = edit_tags_interactively(tags, writer->path, opt.raw)) != ot::st::ok)
return rc;
}
auto packet = ot::render_tags(tags);
rc = writer->write_header_packet(serialno, pageno, packet);
if (rc != ot::st::ok)
return rc;
} else {
ot::print_comments(tags.comments, stdout);
if ((rc = ot::print_comments(tags.comments, stdout, opt.raw)) != ot::st::ok)
return rc;
break;
}
} else {
@ -325,7 +428,7 @@ static ot::status run_single(const ot::options& opt, const std::string& path_in,
ot::file input;
if (path_in == "-")
input = stdin;
else if ((input = fopen(path_in.c_str(), "r")) == nullptr)
else if ((input = fopen(path_in.c_str(), "re")) == nullptr)
return {ot::st::standard_error,
"Could not open '" + path_in + "' for reading: " + strerror(errno)};
ot::ogg_reader reader(input.get());
@ -365,7 +468,7 @@ static ot::status run_single(const ot::options& opt, const std::string& path_in,
/* The output file exists. */
if (!S_ISREG(output_info.st_mode)) {
/* Special files are opened for writing directly. */
if ((final_output = fopen(path_out->c_str(), "w")) == nullptr)
if ((final_output = fopen(path_out->c_str(), "we")) == nullptr)
rc = {ot::st::standard_error,
"Could not open '" + path_out.value() + "' for writing: " +
strerror(errno)};
@ -388,6 +491,7 @@ static ot::status run_single(const ot::options& opt, const std::string& path_in,
return rc;
ot::ogg_writer writer(output);
writer.path = path_out;
rc = process(reader, &writer, opt);
if (rc == ot::st::ok)
rc = temporary_output.commit();

View File

@ -1,2 +1,7 @@
#cmakedefine PROJECT_NAME "@PROJECT_NAME@"
#cmakedefine PROJECT_VERSION "@PROJECT_VERSION@"
#cmakedefine HAVE_ENDIAN_H @HAVE_ENDIAN_H@
#cmakedefine HAVE_SYS_ENDIAN_H @HAVE_SYS_ENDIAN_H@
#cmakedefine HAVE_STAT_ST_MTIM @HAVE_STAT_ST_MTIM@
#cmakedefine HAVE_STAT_ST_MTIMESPEC @HAVE_STAT_ST_MTIMESPEC@

View File

@ -25,6 +25,14 @@
#include <string.h>
#ifdef HAVE_ENDIAN_H
# include <endian.h>
#endif
#ifdef HAVE_SYS_ENDIAN_H
# include <sys/endian.h>
#endif
#ifdef __APPLE__
#include <libkern/OSByteOrder.h>
#define htole32(x) OSSwapHostToLittleInt32(x)

View File

@ -24,14 +24,19 @@
#pragma once
#include <config.h>
#include <iconv.h>
#include <ogg/ogg.h>
#include <stdio.h>
#include <time.h>
#include <functional>
#include <list>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
namespace ot {
@ -57,9 +62,10 @@ enum class st {
error,
standard_error, /**< Error raised by the C standard library. */
int_overflow,
cancel,
/* System */
badly_encoded,
information_lost,
child_process_failed,
/* Ogg */
bad_stream,
end_of_stream,
@ -151,16 +157,31 @@ public:
~encoding_converter();
/**
* Convert text using iconv. If the input sequence is invalid, return #st::badly_encoded and
* abort the processing. If some character could not be converted perfectly, keep converting
* the string and finally return #st::information_lost.
* abort the processing, leaving out in an undefined state.
*/
status operator()(const std::string& in, std::string& out)
{ return (*this)(in.data(), in.size(), out); }
status operator()(const char* in, size_t n, std::string& out);
status operator()(std::string_view in, std::string& out);
private:
iconv_t cd; /**< conversion descriptor */
};
/** Escape a string so that a POSIX shell interprets it as a single argument. */
std::string shell_escape(std::string_view word);
/**
* Execute the editor process specified in editor. Wait for the process to exit and
* return st::ok on success, or st::child_process_failed if it did not exit with 0.
*
* editor is passed unescaped to the shell, and may contain CLI options.
* path is the name of the file to edit, which will be passed as the last argument to editor.
*/
ot::status run_editor(std::string_view editor, std::string_view path);
/**
* Return the specified paths mtime, i.e. the last data modification
* timestamp.
*/
ot::status get_file_timestamp(const char* path, timespec& mtime);
/** \} */
/***********************************************************************************************//**
@ -282,6 +303,10 @@ struct ogg_writer {
* represented as a block of data and a length.
*/
FILE* file;
/**
* Path to the output file.
*/
std::optional<std::string> path;
};
/**
@ -401,6 +426,15 @@ struct options {
* Options: --in-place
*/
bool in_place = false;
/**
* Spawn EDITOR to edit tags interactively.
*
* stdin and stdout must be left free for the editor, so paths_in and
* path_out cant take `-`, and --set-all is not supported.
*
* Option: --edit
*/
bool edit_interactively = false;
/**
* List of comments to delete. Each string is a selector according to the definition of
* #delete_comments.
@ -414,7 +448,7 @@ struct options {
*
* Option: --delete, --set
*/
std::vector<std::string> to_delete;
std::list<std::string> to_delete;
/**
* Delete all the existing comments.
*
@ -429,7 +463,13 @@ struct options {
*
* Options: --add, --set, --set-all
*/
std::vector<std::string> to_add;
std::list<std::string> to_add;
/**
* Disable encoding conversions. OpusTags are specified to always be encoded as UTF-8, but
* if for some reason a specific file contains binary tags that someone would like to
* extract and set as-is, encoding conversion would get in the way.
*/
bool raw = false;
};
/**
@ -448,14 +488,14 @@ status parse_options(int argc, char** argv, options& opt, FILE* comments);
*
* The output generated is meant to be parseable by #ot::read_comments.
*/
void print_comments(const std::list<std::string>& comments, FILE* output);
status print_comments(const std::list<std::string>& comments, FILE* output, bool raw);
/**
* Parse the comments outputted by #ot::print_comments.
*
* The comments are converted from the system encoding to UTF-8, and returned as UTF-8.
*/
status read_comments(FILE* input, std::vector<std::string>& comments);
status read_comments(FILE* input, std::list<std::string>& comments, bool raw);
/**
* Remove all comments matching the specified selector, which may either be a field name or a

View File

@ -12,10 +12,14 @@
#include <opustags.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
using namespace std::string_literals;
ot::status ot::partial_file::open(const char* destination)
{
abort();
@ -100,35 +104,87 @@ ot::encoding_converter::~encoding_converter()
iconv_close(cd);
}
ot::status ot::encoding_converter::operator()(const char* in, size_t n, std::string& out)
ot::status ot::encoding_converter::operator()(std::string_view in, std::string& out)
{
iconv(cd, nullptr, nullptr, nullptr, nullptr);
out.clear();
out.reserve(n);
char* in_cursor = const_cast<char*>(in);
size_t in_left = n;
out.reserve(in.size());
char* in_cursor = const_cast<char*>(in.data());
size_t in_left = in.size();
constexpr size_t chunk_size = 1024;
char chunk[chunk_size];
bool lost_information = false;
for (;;) {
char *out_cursor = chunk;
size_t out_left = chunk_size;
size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
if (rc == (size_t) -1 && errno != E2BIG)
if (rc == (size_t) -1 && errno == E2BIG) {
// Loop normally.
} else if (rc == (size_t) -1) {
return {ot::st::badly_encoded, strerror(errno) + "."s};
} else if (rc != 0) {
return {ot::st::badly_encoded,
"Could not convert string '" + std::string(in, n) + "': " +
strerror(errno)};
if (rc != 0)
lost_information = true;
"Some characters could not be converted into the target encoding."};
}
out.append(chunk, out_cursor - chunk);
if (in_cursor == nullptr)
break;
else if (in_left == 0)
in_cursor = nullptr;
}
if (lost_information)
return {ot::st::information_lost,
"Some characters could not be converted into the target encoding "
"in string '" + std::string(in, n) + "'."};
return ot::st::ok;
}
std::string ot::shell_escape(std::string_view word)
{
std::string escaped_word;
// Pre-allocate the result, assuming most of the time enclosing it in single quotes is enough.
escaped_word.reserve(2 + word.size());
escaped_word += '\'';
for (char c : word) {
if (c == '\'')
escaped_word += "'\\''";
else if (c == '!')
escaped_word += "'\\!'";
else
escaped_word += c;
}
escaped_word += '\'';
return escaped_word;
}
ot::status ot::run_editor(std::string_view editor, std::string_view path)
{
std::string command = std::string(editor) + " " + shell_escape(path);
int status = system(command.c_str());
if (status == -1)
return {st::standard_error, "waitpid error: "s + strerror(errno)};
else if (!WIFEXITED(status))
return {st::child_process_failed,
"Child process did not terminate normally: "s + strerror(errno)};
else if (WEXITSTATUS(status) != 0)
return {st::child_process_failed,
"Child process exited with " + std::to_string(WEXITSTATUS(status))};
return st::ok;
}
ot::status ot::get_file_timestamp(const char* path, timespec& mtime)
{
struct stat st;
if (stat(path, &st) == -1)
return {st::standard_error, path + ": stat error: "s + strerror(errno)};
#if defined(HAVE_STAT_ST_MTIM)
mtime = st.st_mtim;
#elif defined(HAVE_STAT_ST_MTIMESPEC)
mtime = st.st_mtimespec;
#else
mtime.tv_sec = st.st_mtime;
mtime.tv_nsec = st.st_mtimensec;
#endif
return st::ok;
}

View File

@ -7,12 +7,12 @@ using namespace std::literals::string_literals;
void check_read_comments()
{
std::vector<std::string> comments;
std::list<std::string> comments;
ot::status rc;
{
std::string txt = "TITLE=a b c\n\nARTIST=X\nArtist=Y\n"s;
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
rc = ot::read_comments(input.get(), comments);
rc = ot::read_comments(input.get(), comments, false);
if (rc != ot::st::ok)
throw failure("could not read comments");
auto&& expected = {"TITLE=a b c", "ARTIST=X", "Artist=Y"};
@ -22,14 +22,23 @@ void check_read_comments()
{
std::string txt = "CORRUPTED=\xFF\xFF\n"s;
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
rc = ot::read_comments(input.get(), comments);
rc = ot::read_comments(input.get(), comments, false);
if (rc != ot::st::badly_encoded)
throw failure("did not get the expected error reading corrupted data");
}
{
std::string txt = "RAW=\xFF\xFF\n"s;
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
rc = ot::read_comments(input.get(), comments, true);
if (rc != ot::st::ok)
throw failure("could not read comments");
if (comments.front() != "RAW=\xFF\xFF")
throw failure("parsed user comments did not match expectations");
}
{
std::string txt = "MALFORMED\n"s;
ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
rc = ot::read_comments(input.get(), comments);
rc = ot::read_comments(input.get(), comments, false);
if (rc != ot::st::error)
throw failure("did not get the expected error reading malformed comments");
}
@ -71,20 +80,29 @@ void check_good_arguments()
opt = parse({"opustags", "x", "--output", "y", "-D", "-s", "X=Y Z", "-d", "a=b"});
if (opt.paths_in.size() != 1 || opt.paths_in.front() != "x" || !opt.path_out ||
opt.path_out != "y" || !opt.delete_all || opt.overwrite || opt.to_delete.size() != 2 ||
opt.to_delete[0] != "X" || opt.to_delete[1] != "a=b" ||
opt.to_add.size() != 1 || opt.to_add[0] != "X=Y Z")
opt.to_delete.front() != "X" || *std::next(opt.to_delete.begin()) != "a=b" ||
opt.to_add != std::list<std::string>{"X=Y Z"})
throw failure("unexpected option parsing result for case #1");
opt = parse({"opustags", "-S", "x", "-S", "-a", "x=y z", "-i"});
if (opt.paths_in.size() != 1 || opt.paths_in.front() != "x" || opt.path_out ||
!opt.overwrite || opt.to_delete.size() != 0 ||
opt.to_add.size() != 2 || opt.to_add[0] != "N=1" || opt.to_add[1] != "x=y z")
opt.to_add != std::list<std::string>{"N=1", "x=y z"})
throw failure("unexpected option parsing result for case #2");
opt = parse({"opustags", "-i", "x", "y", "z"});
if (opt.paths_in.size() != 3 || opt.paths_in[0] != "x" || opt.paths_in[1] != "y" ||
opt.paths_in[2] != "z" || !opt.overwrite || !opt.in_place)
throw failure("unexpected option parsing result for case #3");
opt = parse({"opustags", "-ie", "x"});
if (opt.paths_in.size() != 1 || opt.paths_in[0] != "x" ||
!opt.edit_interactively || !opt.overwrite || !opt.in_place)
throw failure("unexpected option parsing result for case #4");
opt = parse({"opustags", "-a", "X=\xFF", "--raw", "x"});
if (!opt.raw || opt.to_add.front() != "X=\xFF")
throw failure("--raw did not disable transcoding");
}
void check_bad_arguments()
@ -121,6 +139,28 @@ void check_bad_arguments()
error_code_case({"opustags", "-S", "x"}, "Malformed tag: INVALID", ot::st::error, "attempt to read invalid argument with -S");
error_case({"opustags", "-o", "", "--output", "y", "z"},
"Cannot specify --output more than once.", "double output with first filename empty");
error_case({"opustags", "-e", "-i", "x", "y"},
"Exactly one input file must be specified.", "editing interactively two files at once");
error_case({"opustags", "--edit", "-", "-o", "x"},
"Cannot edit interactively when standard input or standard output are already used.",
"editing interactively from stdandard intput");
error_case({"opustags", "--edit", "x", "-o", "-"},
"Cannot edit interactively when standard input or standard output are already used.",
"editing interactively to stdandard output");
error_case({"opustags", "--edit", "x"}, "Cannot edit interactively when no output is specified.", "editing without output");
error_case({"opustags", "--edit", "x", "-i", "-a", "X=Y"}, "Cannot mix --edit with -adDsS.", "mixing -e and -a");
error_case({"opustags", "--edit", "x", "-i", "-d", "X"}, "Cannot mix --edit with -adDsS.", "mixing -e and -d");
error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D");
error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S");
error_case({"opustags", "-d", "\xFF", "x"},
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character.",
"-d with binary data");
error_case({"opustags", "-a", "X=\xFF", "x"},
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character.",
"-a with binary data");
error_case({"opustags", "-s", "X=\xFF", "x"},
"Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character.",
"-s with binary data");
}
static void check_delete_comments()

View File

@ -4,7 +4,7 @@ use strict;
use warnings;
use utf8;
use Test::More tests => 41;
use Test::More tests => 50;
use Digest::MD5;
use File::Basename;
@ -71,6 +71,8 @@ Options:
-D, --delete-all delete all the previously existing comments
-s, --set FIELD=VALUE replace a comment
-S, --set-all import comments from standard input
-e, --edit edit tags interactively in VISUAL/EDITOR
--raw disable encoding conversion
See the man page for extensive documentation.
EOF
@ -160,7 +162,7 @@ is_deeply(opustags('out.opus', '-D', '-a', "X=foo\nbar\tquux"), [<<'END_OUT', <<
X=foo
bar quux
END_OUT
warning: Some tags contain newline characters. These are not supported by --set-all.
warning: Some tags contain unsupported newline characters.
warning: Some tags contain control characters.
END_ERR
@ -179,6 +181,7 @@ ARTIST=七面鳥
A=A
X=Y
#IGNORE=COMMENTS
END_IN
OK=yes again
ARTIST=七面鳥
@ -221,6 +224,23 @@ is(md5('out2.opus'), '0a4d20c287b2e46b26cb0eee353c2069', 'the tags were added co
unlink('out.opus');
unlink('out2.opus');
####################################################################################################
# Interactive edition
$ENV{EDITOR} = 'sed -i -e y/aeiou/AEIOU/ `sleep 0.1`';
is_deeply(opustags('gobble.opus', '-eo', "'screaming !'.opus"), ['', '', 0], 'edit a file with EDITOR');
is(md5("'screaming !'.opus"), '56e85ccaa83a13c15576d75bbd6d835f', 'the tags were modified');
$ENV{EDITOR} = 'true';
is_deeply(opustags('-ie', "'screaming !'.opus"), ['', "Cancelling edition because the tags file was not modified.\n", 256], 'close -e without saving');
is(md5("'screaming !'.opus"), '56e85ccaa83a13c15576d75bbd6d835f', 'the tags were not modified');
$ENV{EDITOR} = 'false';
is_deeply(opustags('-ie', "'screaming !'.opus"), ['', "'screaming !'.opus: error: Child process exited with 1\n", 256], 'editor exiting with an error');
is(md5("'screaming !'.opus"), '56e85ccaa83a13c15576d75bbd6d835f', 'the tags were not modified');
unlink("'screaming !'.opus");
####################################################################################################
# Test muxed streams
@ -236,15 +256,16 @@ unlink('muxed.ogg');
####################################################################################################
# Locale
my $locale = 'fr_FR.iso88591';
my $locale = 'en_US.iso88591';
my @all_locales = split(' ', `locale -a`);
SKIP: {
skip "locale $locale is not present", 4 unless (any { $_ eq $locale } @all_locales);
skip "locale $locale is not present", 5 unless (any { $_ eq $locale } @all_locales);
opustags(qw(gobble.opus -a TITLE=七面鳥 -a ARTIST=éàç -o out.opus -y));
local $ENV{LC_ALL} = $locale;
local $ENV{LANGUAGE} = '';
is_deeply(opustags(qw(-S out.opus), {in => <<"END_IN", mode => ':raw'}), [<<"END_OUT", '', 0], 'set all in ISO-8859-1');
T=\xef\xef\xf6
@ -254,14 +275,16 @@ END_OUT
is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['', '', 0], 'write tags in ISO-8859-1');
is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1');
is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<"END_ERR", 256], 'read tags in ISO-8859-1 with incompatible characters');
encoder=Lavc58.18.100 libopus
END_OUT
out.opus: error: Invalid or incomplete multibyte or wide character. See --raw.
END_ERR
is_deeply(opustags(qw(out.opus -d TITLE -d ARTIST), {mode => ':raw'}), [<<"END_OUT", '', 0], 'read tags in ISO-8859-1');
encoder=Lavc58.18.100 libopus
TITLE=???
ARTIST=\xe9\xe0\xe7
I=\xf9\xce
END_OUT
warning: Some tags have been transliterated to your system encoding.
END_ERR
$ENV{LC_ALL} = '';
@ -271,4 +294,20 @@ TITLE=七面鳥
ARTIST=éàç
I=ùÎ
END_OUT
unlink('out.opus');
}
####################################################################################################
# Raw edition
is_deeply(opustags(qw(-S gobble.opus -o out.opus --raw -a), "U=\xFE", {in => <<"END_IN", mode => ':raw'}), ['', '', 0], 'raw set-all with binary data');
T=\xFF
END_IN
is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read');
T=\xFF
U=\xFE
END_OUT
unlink('out.opus');

View File

@ -34,7 +34,7 @@ void check_converter()
{
const char* ephemere_iso = "\xc9\x70\x68\xe9\x6d\xe8\x72\x65";
ot::encoding_converter to_utf8("ISO_8859-1", "UTF-8");
ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//TRANSLIT");
ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//IGNORE");
std::string out;
ot::status rc = to_utf8(ephemere_iso, out);
@ -49,10 +49,19 @@ void check_converter()
is(rc, ot::st::badly_encoded, "conversion from bad UTF-8 fails");
}
void check_shell_esape()
{
is(ot::shell_escape("foo"), "'foo'", "simple string");
is(ot::shell_escape("a'b"), "'a'\\''b'", "string with a simple quote");
is(ot::shell_escape("a!b"), "'a'\\!'b'", "string with a bang");
is(ot::shell_escape("a!b'c!d'e"), "'a'\\!'b'\\''c'\\!'d'\\''e'", "string with a bang");
}
int main(int argc, char **argv)
{
plan(2);
plan(3);
run(check_partial_files, "test partial files");
run(check_converter, "test encoding converter");
run(check_shell_esape, "test shell escaping");
return 0;
}