Release 1.6.0

Make encoding errors fatal
With --raw there is a workaround. The tolerant approach was cool and nice until you want to edit something non-interactively and get the warning telling you you might have lost data after the file was written. Failing fast is most likely the better option here.
2025-07-06 17:47:51 +02:00 · 2021-01-01 11:41:03 +01:00 · 2020-12-27 10:55:25 +01:00 · 2020-12-27 10:55:20 +01:00 · 2020-12-26 16:51:36 +01:00 · 2020-12-26 13:00:20 +01:00
12 changed files with 179 additions and 89 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,6 +1,15 @@
 opustags changelog
 ==================

+1.6.0 - 2021-01-01
+------------------
+
+- UTF-8 conversion errors are now fatal.
+- Introduce --raw for disabling encoding conversions.
+- Improve platform compatibility.
+
+This also happens to be opustags’s 8-year anniversary!
+
 1.5.1 - 2020-11-21
 ------------------

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.9)

 project(
 	opustags
-	VERSION 1.5.1
+	VERSION 1.6.0
 	LANGUAGES CXX
 )

@ -26,6 +26,10 @@ include(CheckIncludeFileCXX)
 check_include_file_cxx(endian.h HAVE_ENDIAN_H)
 check_include_file_cxx(sys/endian.h HAVE_SYS_ENDIAN_H)

+include(CheckStructHasMember)
+check_struct_has_member("struct stat" st_mtim sys/stat.h HAVE_STAT_ST_MTIM LANGUAGE CXX)
+check_struct_has_member("struct stat" st_mtimespec sys/stat.h HAVE_STAT_ST_MTIMESPEC LANGUAGE CXX)
+
 configure_file(src/config.h.in config.h @ONLY)
 include_directories(BEFORE src "${CMAKE_BINARY_DIR}" ${OGG_INCLUDE_DIRS} ${Iconv_INCLUDE_DIRS})

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -60,7 +60,6 @@ More generally, here are a few features that could be added in the future:
 - Logicial stream listing and selection for multiplexed files.
 - Escaping control characters with --escape.
 - Dump binary packets with --binary.
- Skip encoding conversion with --raw.
 - Edition of the vendor string.
 - Edition of the arbitrary binary block past the comments.
 - Support for OpusTags packets spanning multiple pages (> 64 kB).
--- a/README.md
+++ b/README.md
@ -62,5 +62,6 @@ Documentation
      -s, --set FIELD=VALUE         replace a comment
      -S, --set-all                 import comments from standard input
      -e, --edit                    edit tags interactively in VISUAL/EDITOR
+      --raw                         disable encoding conversion

 See the man page, `opustags.1`, for extensive documentation.
--- a/opustags.1
+++ b/opustags.1
@ -103,6 +103,13 @@ Blank lines and lines starting with \fI#\fP are ignored.
 Edit tags interactively by spawning the program specified by the EDITOR
 environment variable. The allowed format is the same as \fB--set-all\fP.
 If TERM and VISUAL are set, VISUAL takes precedence over EDITOR.
+.TP
+.B \-\-raw
+OpusTags metadata should always be encoded in UTF-8, as per RFC 7845. However, some files may be
+corrupted or possibly even contain intentional binary data. In that case, --raw lets you edit that
+kind of binary data without ensuring the validity of the tags encoding. This option may also be
+useful when your system encoding is different from UTF-8 and you wish to preserve the full UTF-8
+character set even though your system cannot display it.
 .SH EXAMPLES
 .PP
 List all the tags in file foo.opus:
--- a/src/cli.cc
+++ b/src/cli.cc
@ -6,7 +6,6 @@
 * this module from the main one is to allow easy testing.
 */

-#include <config.h>
 #include <opustags.h>

 #include <errno.h>
@ -39,6 +38,7 @@ Options:
  -s, --set FIELD=VALUE         replace a comment
  -S, --set-all                 import comments from standard input
  -e, --edit                    edit tags interactively in VISUAL/EDITOR
+  --raw                         disable encoding conversion

 See the man page for extensive documentation.
 )raw";
@ -54,6 +54,7 @@ static struct option getopt_options[] = {
 	{"delete-all", no_argument, 0, 'D'},
 	{"set-all", no_argument, 0, 'S'},
 	{"edit", no_argument, 0, 'e'},
+	{"raw", no_argument, 0, 'r'},
 	{NULL, 0, 0, 0}
 };

@ -61,7 +62,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
 {
 	static ot::encoding_converter to_utf8("", "UTF-8");
 	std::string utf8;
-	std::string::size_type equal;
+	const char* equal;
 	ot::status rc;
 	bool set_all = false;
 	opt = {};
@ -87,21 +88,16 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
 			opt.overwrite = true;
 			break;
 		case 'd':
-			rc = to_utf8(optarg, strlen(optarg), utf8);
-			if (rc != ot::st::ok)
-				return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
-			opt.to_delete.emplace_back(std::move(utf8));
+			opt.to_delete.emplace_back(optarg);
 			break;
 		case 'a':
 		case 's':
-			rc = to_utf8(optarg, strlen(optarg), utf8);
-			if (rc != ot::st::ok)
-				return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
-			if ((equal = utf8.find('=')) == std::string::npos)
+			equal = strchr(optarg, '=');
+			if (equal == nullptr)
 				return {st::bad_arguments, "Comment does not contain an equal sign: "s + optarg + "."};
 			if (c == 's')
-				opt.to_delete.emplace_back(utf8.substr(0, equal));
-			opt.to_add.emplace_back(std::move(utf8));
+				opt.to_delete.emplace_back(optarg, equal - optarg);
+			opt.to_add.emplace_back(optarg);
 			break;
 		case 'S':
 			opt.delete_all = true;
@ -113,6 +109,9 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
 		case 'e':
 			opt.edit_interactively = true;
 			break;
+		case 'r':
+			opt.raw = true;
+			break;
 		case ':':
 			return {st::bad_arguments,
 			        "Missing value for option '"s + argv[optind - 1] + "'."};
@ -131,6 +130,18 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
 		opt.paths_in.emplace_back(argv[i]);
 	}

+	// Convert arguments to UTF-8.
+	if (!opt.raw) {
+		for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) {
+			for (std::string& arg : *args) {
+				rc = to_utf8(arg, utf8);
+				if (rc != ot::st::ok)
+					return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
+				arg = std::move(utf8);
+			}
+		}
+	}
+
 	if (opt.in_place && opt.path_out)
 		return {st::bad_arguments, "Cannot combine --in-place and --output."};

@ -155,7 +166,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
 	if (set_all) {
 		// Read comments from stdin and prepend them to opt.to_add.
 		std::list<std::string> comments;
-		auto rc = read_comments(comments_input, comments);
+		auto rc = read_comments(comments_input, comments, opt.raw);
 		if (rc != st::ok)
 			return rc;
 		opt.to_add.splice(opt.to_add.begin(), std::move(comments));
@ -170,43 +181,43 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm
 *       callers that don’t escape backslashes. Maybe add options to select a mode between simple,
 *       raw, and escaped.
 */
-void ot::print_comments(const std::list<std::string>& comments, FILE* output)
+ot::status ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw)
 {
-	static ot::encoding_converter from_utf8("UTF-8", "//TRANSLIT");
+	static ot::encoding_converter from_utf8("UTF-8", "");
 	std::string local;
-	bool info_lost = false;
-	bool bad_comments = false;
 	bool has_newline = false;
 	bool has_control = false;
-	for (const std::string& comment : comments) {
-		ot::status rc = from_utf8(comment, local);
-		if (rc == ot::st::information_lost) {
-			info_lost = true;
-		} else if (rc != ot::st::ok) {
-			bad_comments = true;
-			continue;
+	for (const std::string& utf8_comment : comments) {
+		const std::string* comment;
+		// Convert the comment from UTF-8 to the system encoding if relevant.
+		if (raw) {
+			comment = &utf8_comment;
+		} else {
+			ot::status rc = from_utf8(utf8_comment, local);
+			comment = &local;
+			if (rc != ot::st::ok) {
+				rc.message += " See --raw.";
+				return rc;
+			}
 		}
-		for (unsigned char c : comment) {
+
+		for (unsigned char c : *comment) {
 			if (c == '\n')
 				has_newline = true;
 			else if (c < 0x20)
 				has_control = true;
 		}
-		fwrite(local.data(), 1, local.size(), output);
+		fwrite(comment->data(), 1, comment->size(), output);
 		putc('\n', output);
 	}
-	if (info_lost)
-		fputs("warning: Some tags have been transliterated to your system encoding.\n", stderr);
-	if (bad_comments)
-		fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr);
 	if (has_newline)
-		fputs("warning: Some tags contain newline characters. "
-		      "These are not supported by --set-all.\n", stderr);
+		fputs("warning: Some tags contain unsupported newline characters.\n", stderr);
 	if (has_control)
 		fputs("warning: Some tags contain control characters.\n", stderr);
+	return st::ok;
 }

-ot::status ot::read_comments(FILE* input, std::list<std::string>& comments)
+ot::status ot::read_comments(FILE* input, std::list<std::string>& comments, bool raw)
 {
 	static ot::encoding_converter to_utf8("", "UTF-8");
 	comments.clear();
@ -225,13 +236,17 @@ ot::status ot::read_comments(FILE* input, std::list<std::string>& comments)
 			free(line);
 			return rc;
 		}
-		std::string utf8;
-		ot::status rc = to_utf8(line, nread, utf8);
-		if (rc == ot::st::ok) {
-			comments.emplace_back(std::move(utf8));
+		if (raw) {
+			comments.emplace_back(line, nread);
 		} else {
-			free(line);
-			return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
+			std::string utf8;
+			ot::status rc = to_utf8(std::string_view(line, nread), utf8);
+			if (rc == ot::st::ok) {
+				comments.emplace_back(std::move(utf8));
+			} else {
+				free(line);
+				return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
+			}
 		}
 	}
 	free(line);
@ -277,7 +292,7 @@ static ot::status edit_tags(ot::opus_tags& tags, const ot::options& opt)
 }

 /** Spawn VISUAL or EDITOR to edit the given tags. */
-static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path)
+static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, bool raw)
 {
 	const char* editor = nullptr;
 	if (getenv("TERM") != nullptr)
@ -289,18 +304,18 @@ static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::option
 		        "No editor specified in environment variable VISUAL or EDITOR."};

 	// Building the temporary tags file.
+	ot::status rc;
 	std::string tags_path = base_path.value_or("tags") + ".XXXXXX.opustags";
 	int fd = mkstemps(const_cast<char*>(tags_path.data()), 9);
-	FILE* tags_file;
+	ot::file tags_file;
 	if (fd == -1 || (tags_file = fdopen(fd, "w")) == nullptr)
 		return {ot::st::standard_error,
 		        "Could not open '" + tags_path + "': " + strerror(errno)};
-	ot::print_comments(tags.comments, tags_file);
-	if (fclose(tags_file) != 0)
-		return {ot::st::standard_error, tags_path + ": fclose error: "s + strerror(errno)};
+	if ((rc = ot::print_comments(tags.comments, tags_file.get(), raw)) != ot::st::ok)
+		return rc;
+	tags_file.reset();

 	// Spawn the editor, and watch the modification timestamps.
-	ot::status rc;
 	timespec before, after;
 	if ((rc = ot::get_file_timestamp(tags_path.c_str(), before)) != ot::st::ok)
 		return rc;
@ -324,11 +339,11 @@ static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::option
 	tags_file = fopen(tags_path.c_str(), "re");
 	if (tags_file == nullptr)
 		return {ot::st::standard_error, "Error opening " + tags_path + ": " + strerror(errno)};
-	if ((rc = ot::read_comments(tags_file, tags.comments)) != ot::st::ok) {
+	if ((rc = ot::read_comments(tags_file.get(), tags.comments, raw)) != ot::st::ok) {
 		fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str());
 		return rc;
 	}
-	fclose(tags_file);
+	tags_file.reset();

 	// Remove the temporary tags file only on success, because unlike the
 	// partial Ogg file that is irrecoverable, the edited tags file
@ -386,7 +401,7 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const
 			if (writer) {
 				if (opt.edit_interactively) {
 					fflush(writer->file); // flush before calling the subprocess
-					if ((rc = edit_tags_interactively(tags, writer->path)) != ot::st::ok)
+					if ((rc = edit_tags_interactively(tags, writer->path, opt.raw)) != ot::st::ok)
 						return rc;
 				}
 				auto packet = ot::render_tags(tags);
@ -394,7 +409,8 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const
 				if (rc != ot::st::ok)
 					return rc;
 			} else {
-				ot::print_comments(tags.comments, stdout);
+				if ((rc = ot::print_comments(tags.comments, stdout, opt.raw)) != ot::st::ok)
+					return rc;
 				break;
 			}
 		} else {
--- a/src/config.h.in
+++ b/src/config.h.in
@ -3,3 +3,5 @@

 #cmakedefine HAVE_ENDIAN_H @HAVE_ENDIAN_H@
 #cmakedefine HAVE_SYS_ENDIAN_H @HAVE_SYS_ENDIAN_H@
+#cmakedefine HAVE_STAT_ST_MTIM @HAVE_STAT_ST_MTIM@
+#cmakedefine HAVE_STAT_ST_MTIMESPEC @HAVE_STAT_ST_MTIMESPEC@
--- a/src/opustags.h
+++ b/src/opustags.h
@ -24,6 +24,8 @@

 #pragma once

+#include <config.h>
+
 #include <iconv.h>
 #include <ogg/ogg.h>
 #include <stdio.h>
@ -63,7 +65,6 @@ enum class st {
 	cancel,
 	/* System */
 	badly_encoded,
-	information_lost,
 	child_process_failed,
 	/* Ogg */
 	bad_stream,
@ -156,12 +157,9 @@ public:
 	~encoding_converter();
 	/**
 	 * Convert text using iconv. If the input sequence is invalid, return #st::badly_encoded and
-	 * abort the processing. If some character could not be converted perfectly, keep converting
-	 * the string and finally return #st::information_lost.
+	 * abort the processing, leaving out in an undefined state.
 	 */
-	status operator()(const std::string& in, std::string& out)
-		{ return (*this)(in.data(), in.size(), out); }
-	status operator()(const char* in, size_t n, std::string& out);
+	status operator()(std::string_view in, std::string& out);
 private:
 	iconv_t cd; /**< conversion descriptor */
 };
@ -450,7 +448,7 @@ struct options {
 	 *
 	 * Option: --delete, --set
 	 */
-	std::vector<std::string> to_delete;
+	std::list<std::string> to_delete;
 	/**
 	 * Delete all the existing comments.
 	 *
@ -466,6 +464,12 @@ struct options {
 	 * Options: --add, --set, --set-all
 	 */
 	std::list<std::string> to_add;
+	/**
+	 * Disable encoding conversions. OpusTags are specified to always be encoded as UTF-8, but
+	 * if for some reason a specific file contains binary tags that someone would like to
+	 * extract and set as-is, encoding conversion would get in the way.
+	 */
+	bool raw = false;
 };

 /**
@ -484,14 +488,14 @@ status parse_options(int argc, char** argv, options& opt, FILE* comments);
 *
 * The output generated is meant to be parseable by #ot::read_comments.
 */
-void print_comments(const std::list<std::string>& comments, FILE* output);
+status print_comments(const std::list<std::string>& comments, FILE* output, bool raw);

 /**
 * Parse the comments outputted by #ot::print_comments.
 *
 * The comments are converted from the system encoding to UTF-8, and returned as UTF-8.
 */
-status read_comments(FILE* input, std::list<std::string>& comments);
+status read_comments(FILE* input, std::list<std::string>& comments, bool raw);

 /**
 * Remove all comments matching the specified selector, which may either be a field name or a
--- a/src/system.cc
+++ b/src/system.cc
@ -104,36 +104,35 @@ ot::encoding_converter::~encoding_converter()
 	iconv_close(cd);
 }

-ot::status ot::encoding_converter::operator()(const char* in, size_t n, std::string& out)
+ot::status ot::encoding_converter::operator()(std::string_view in, std::string& out)
 {
 	iconv(cd, nullptr, nullptr, nullptr, nullptr);
 	out.clear();
-	out.reserve(n);
-	char* in_cursor = const_cast<char*>(in);
-	size_t in_left = n;
+	out.reserve(in.size());
+	char* in_cursor = const_cast<char*>(in.data());
+	size_t in_left = in.size();
 	constexpr size_t chunk_size = 1024;
 	char chunk[chunk_size];
-	bool lost_information = false;
 	for (;;) {
 		char *out_cursor = chunk;
 		size_t out_left = chunk_size;
 		size_t rc = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
-		if (rc == (size_t) -1 && errno != E2BIG)
+
+		if (rc == (size_t) -1 && errno == E2BIG) {
+			// Loop normally.
+		} else if (rc == (size_t) -1) {
+			return {ot::st::badly_encoded, strerror(errno) + "."s};
+		} else if (rc != 0) {
 			return {ot::st::badly_encoded,
-			        "Could not convert string '" + std::string(in, n) + "': " +
-			        strerror(errno)};
-		if (rc != 0)
-			lost_information = true;
+				"Some characters could not be converted into the target encoding."};
+		}
+
 		out.append(chunk, out_cursor - chunk);
 		if (in_cursor == nullptr)
 			break;
 		else if (in_left == 0)
 			in_cursor = nullptr;
 	}
-	if (lost_information)
-		return {ot::st::information_lost,
-		        "Some characters could not be converted into the target encoding "
-		        "in string '" + std::string(in, n) + "'."};
 	return ot::st::ok;
 }

@ -179,6 +178,13 @@ ot::status ot::get_file_timestamp(const char* path, timespec& mtime)
 	struct stat st;
 	if (stat(path, &st) == -1)
 		return {st::standard_error, path + ": stat error: "s + strerror(errno)};
-	mtime = st.st_mtim; // more precise than st_mtime
+#if defined(HAVE_STAT_ST_MTIM)
+	mtime = st.st_mtim;
+#elif defined(HAVE_STAT_ST_MTIMESPEC)
+	mtime = st.st_mtimespec;
+#else
+	mtime.tv_sec = st.st_mtime;
+	mtime.tv_nsec = st.st_mtimensec;
+#endif
 	return st::ok;
 }
--- a/t/cli.cc
+++ b/t/cli.cc
@ -12,7 +12,7 @@ void check_read_comments()
 	{
 		std::string txt = "TITLE=a b c\n\nARTIST=X\nArtist=Y\n"s;
 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
-		rc = ot::read_comments(input.get(), comments);
+		rc = ot::read_comments(input.get(), comments, false);
 		if (rc != ot::st::ok)
 			throw failure("could not read comments");
 		auto&& expected = {"TITLE=a b c", "ARTIST=X", "Artist=Y"};
@ -22,14 +22,23 @@ void check_read_comments()
 	{
 		std::string txt = "CORRUPTED=\xFF\xFF\n"s;
 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
-		rc = ot::read_comments(input.get(), comments);
+		rc = ot::read_comments(input.get(), comments, false);
 		if (rc != ot::st::badly_encoded)
 			throw failure("did not get the expected error reading corrupted data");
 	}
+	{
+		std::string txt = "RAW=\xFF\xFF\n"s;
+		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
+		rc = ot::read_comments(input.get(), comments, true);
+		if (rc != ot::st::ok)
+			throw failure("could not read comments");
+		if (comments.front() != "RAW=\xFF\xFF")
+			throw failure("parsed user comments did not match expectations");
+	}
 	{
 		std::string txt = "MALFORMED\n"s;
 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r");
-		rc = ot::read_comments(input.get(), comments);
+		rc = ot::read_comments(input.get(), comments, false);
 		if (rc != ot::st::error)
 			throw failure("did not get the expected error reading malformed comments");
 	}
@ -71,7 +80,7 @@ void check_good_arguments()
 	opt = parse({"opustags", "x", "--output", "y", "-D", "-s", "X=Y Z", "-d", "a=b"});
 	if (opt.paths_in.size() != 1 || opt.paths_in.front() != "x" || !opt.path_out ||
 	    opt.path_out != "y" || !opt.delete_all || opt.overwrite || opt.to_delete.size() != 2 ||
-	    opt.to_delete[0] != "X" || opt.to_delete[1] != "a=b" ||
+	    opt.to_delete.front() != "X" || *std::next(opt.to_delete.begin()) != "a=b" ||
 	    opt.to_add != std::list<std::string>{"X=Y Z"})
 		throw failure("unexpected option parsing result for case #1");

@ -90,6 +99,10 @@ void check_good_arguments()
 	if (opt.paths_in.size() != 1 || opt.paths_in[0] != "x" ||
 	    !opt.edit_interactively || !opt.overwrite || !opt.in_place)
 		throw failure("unexpected option parsing result for case #4");
+
+	opt = parse({"opustags", "-a", "X=\xFF", "--raw", "x"});
+	if (!opt.raw || opt.to_add.front() != "X=\xFF")
+		throw failure("--raw did not disable transcoding");
 }

 void check_bad_arguments()
@ -139,6 +152,15 @@ void check_bad_arguments()
 	error_case({"opustags", "--edit", "x", "-i", "-d", "X"}, "Cannot mix --edit with -adDsS.", "mixing -e and -d");
 	error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D");
 	error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S");
+	error_case({"opustags", "-d", "\xFF", "x"},
+	           "Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character.",
+	           "-d with binary data");
+	error_case({"opustags", "-a", "X=\xFF", "x"},
+	           "Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character.",
+	           "-a with binary data");
+	error_case({"opustags", "-s", "X=\xFF", "x"},
+	           "Could not encode argument into UTF-8: Invalid or incomplete multibyte or wide character.",
+	           "-s with binary data");
 }

 static void check_delete_comments()
--- a/t/opustags.t
+++ b/t/opustags.t
@ -4,7 +4,7 @@ use strict;
 use warnings;
 use utf8;

-use Test::More tests => 47;
+use Test::More tests => 50;

 use Digest::MD5;
 use File::Basename;
@ -72,6 +72,7 @@ Options:
  -s, --set FIELD=VALUE         replace a comment
  -S, --set-all                 import comments from standard input
  -e, --edit                    edit tags interactively in VISUAL/EDITOR
+  --raw                         disable encoding conversion

 See the man page for extensive documentation.
 EOF
@ -161,7 +162,7 @@ is_deeply(opustags('out.opus', '-D', '-a', "X=foo\nbar\tquux"), [<<'END_OUT', <<
 X=foo
 bar	quux
 END_OUT
-warning: Some tags contain newline characters. These are not supported by --set-all.
+warning: Some tags contain unsupported newline characters.
 warning: Some tags contain control characters.
 END_ERR

@ -255,15 +256,16 @@ unlink('muxed.ogg');
 ####################################################################################################
 # Locale

-my $locale = 'fr_FR.iso88591';
+my $locale = 'en_US.iso88591';
 my @all_locales = split(' ', `locale -a`);

 SKIP: {
-skip "locale $locale is not present", 4 unless (any { $_ eq $locale } @all_locales);
+skip "locale $locale is not present", 5 unless (any { $_ eq $locale } @all_locales);

 opustags(qw(gobble.opus -a TITLE=七面鳥 -a ARTIST=éàç -o out.opus -y));

 local $ENV{LC_ALL} = $locale;
+local $ENV{LANGUAGE} = '';

 is_deeply(opustags(qw(-S out.opus), {in => <<"END_IN", mode => ':raw'}), [<<"END_OUT", '', 0], 'set all in ISO-8859-1');
 T=\xef\xef\xf6
@ -273,14 +275,16 @@ END_OUT

 is_deeply(opustags('-i', 'out.opus', "--add=I=\xf9\xce", {mode => ':raw'}), ['', '', 0], 'write tags in ISO-8859-1');

-is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<'END_ERR', 0], 'read tags in ISO-8859-1');
+is_deeply(opustags('out.opus', {mode => ':raw'}), [<<"END_OUT", <<"END_ERR", 256], 'read tags in ISO-8859-1 with incompatible characters');
+encoder=Lavc58.18.100 libopus
+END_OUT
+out.opus: error: Invalid or incomplete multibyte or wide character. See --raw.
+END_ERR
+
+is_deeply(opustags(qw(out.opus -d TITLE -d ARTIST), {mode => ':raw'}), [<<"END_OUT", '', 0], 'read tags in ISO-8859-1');
 encoder=Lavc58.18.100 libopus
-TITLE=???
-ARTIST=\xe9\xe0\xe7
 I=\xf9\xce
 END_OUT
-warning: Some tags have been transliterated to your system encoding.
-END_ERR

 $ENV{LC_ALL} = '';

@ -290,4 +294,20 @@ TITLE=七面鳥
 ARTIST=éàç
 I=ùÎ
 END_OUT
+
+unlink('out.opus');
 }
+
+####################################################################################################
+# Raw edition
+
+is_deeply(opustags(qw(-S gobble.opus -o out.opus --raw -a), "U=\xFE", {in => <<"END_IN", mode => ':raw'}), ['', '', 0], 'raw set-all with binary data');
+T=\xFF
+END_IN
+
+is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read');
+T=\xFF
+U=\xFE
+END_OUT
+
+unlink('out.opus');
--- a/t/system.cc
+++ b/t/system.cc
@ -34,7 +34,7 @@ void check_converter()
 {
 	const char* ephemere_iso = "\xc9\x70\x68\xe9\x6d\xe8\x72\x65";
 	ot::encoding_converter to_utf8("ISO_8859-1", "UTF-8");
-	ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//TRANSLIT");
+	ot::encoding_converter from_utf8("UTF-8", "ISO_8859-1//IGNORE");
 	std::string out;

 	ot::status rc = to_utf8(ephemere_iso, out);
Author	SHA1	Message	Date
Frédéric Mangano	2d5db09bda	Release 1.6.0	2021-01-01 11:41:03 +01:00
Frédéric Mangano	3e0b3fa56e	Make encoding errors fatal With --raw there is a workaround. The tolerant approach was cool and nice until you want to edit something non-interactively and get the warning telling you you might have lost data after the file was written. Failing fast is most likely the better option here.	2020-12-27 10:55:25 +01:00
Frédéric Mangano	3e7b42062a	Discard incompatible comments entirely //IGNORE is not portable either. Now that we have --raw it’s less an issue though.	2020-12-27 10:55:20 +01:00
Frédéric Mangano	4cae6c44ee	Introduce --raw for disabling transcoding	2020-12-26 16:51:36 +01:00
Frédéric Mangano	6db7f07bd5	Factor CLI argument transcoding	2020-12-26 13:00:20 +01:00
Frédéric Mangano	fd5fa3cd5f	Make ot::encoding_converter use string views	2020-12-26 12:42:37 +01:00
Frédéric Mangano	c43704a0a7	Use //IGNORE instead of //TRANSLIT when transcoding //TRANSLIT is not a well supported, and in most cases there’s not much transliteration can help with when the encoding is limiting. Besides, it sounds reasonable to assume most people use UTF-8 nowadays.	2020-12-26 12:30:44 +01:00
Frédéric Mangano	f98208c1a1	Support the various stat structures across systems	2020-11-25 20:07:23 +01:00
Frédéric Mangano	64fc6f8f6d	Include config.h globally	2020-11-25 20:05:46 +01:00