mirror of
				https://github.com/fmang/opustags.git
				synced 2025-10-31 17:08:12 +01:00 
			
		
		
		
	Introduce --raw for disabling transcoding
This commit is contained in:
		| @@ -60,7 +60,6 @@ More generally, here are a few features that could be added in the future: | ||||
| - Logicial stream listing and selection for multiplexed files. | ||||
| - Escaping control characters with --escape. | ||||
| - Dump binary packets with --binary. | ||||
| - Skip encoding conversion with --raw. | ||||
| - Edition of the vendor string. | ||||
| - Edition of the arbitrary binary block past the comments. | ||||
| - Support for OpusTags packets spanning multiple pages (> 64 kB). | ||||
|   | ||||
| @@ -62,5 +62,6 @@ Documentation | ||||
|       -s, --set FIELD=VALUE         replace a comment | ||||
|       -S, --set-all                 import comments from standard input | ||||
|       -e, --edit                    edit tags interactively in VISUAL/EDITOR | ||||
|       --raw                         disable encoding conversion | ||||
|  | ||||
| See the man page, `opustags.1`, for extensive documentation. | ||||
|   | ||||
| @@ -103,6 +103,13 @@ Blank lines and lines starting with \fI#\fP are ignored. | ||||
| Edit tags interactively by spawning the program specified by the EDITOR | ||||
| environment variable. The allowed format is the same as \fB--set-all\fP. | ||||
| If TERM and VISUAL are set, VISUAL takes precedence over EDITOR. | ||||
| .TP | ||||
| .B \-\-raw | ||||
| OpusTags metadata should always be encoded in UTF-8, as per RFC 7845. However, some files may be | ||||
| corrupted or possibly even contain intentional binary data. In that case, --raw lets you edit that | ||||
| kind of binary data without ensuring the validity of the tags encoding. This option may also be | ||||
| useful when your system encoding is different from UTF-8 and you wish to preserve the full UTF-8 | ||||
| character set even though your system cannot display it. | ||||
| .SH EXAMPLES | ||||
| .PP | ||||
| List all the tags in file foo.opus: | ||||
|   | ||||
							
								
								
									
										79
									
								
								src/cli.cc
									
									
									
									
									
								
							
							
						
						
									
										79
									
								
								src/cli.cc
									
									
									
									
									
								
							| @@ -38,6 +38,7 @@ Options: | ||||
|   -s, --set FIELD=VALUE         replace a comment | ||||
|   -S, --set-all                 import comments from standard input | ||||
|   -e, --edit                    edit tags interactively in VISUAL/EDITOR | ||||
|   --raw                         disable encoding conversion | ||||
|  | ||||
| See the man page for extensive documentation. | ||||
| )raw"; | ||||
| @@ -53,6 +54,7 @@ static struct option getopt_options[] = { | ||||
| 	{"delete-all", no_argument, 0, 'D'}, | ||||
| 	{"set-all", no_argument, 0, 'S'}, | ||||
| 	{"edit", no_argument, 0, 'e'}, | ||||
| 	{"raw", no_argument, 0, 'r'}, | ||||
| 	{NULL, 0, 0, 0} | ||||
| }; | ||||
|  | ||||
| @@ -107,6 +109,9 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm | ||||
| 		case 'e': | ||||
| 			opt.edit_interactively = true; | ||||
| 			break; | ||||
| 		case 'r': | ||||
| 			opt.raw = true; | ||||
| 			break; | ||||
| 		case ':': | ||||
| 			return {st::bad_arguments, | ||||
| 			        "Missing value for option '"s + argv[optind - 1] + "'."}; | ||||
| @@ -126,12 +131,14 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm | ||||
| 	} | ||||
|  | ||||
| 	// Convert arguments to UTF-8. | ||||
| 	for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) { | ||||
| 		for (std::string& arg : *args) { | ||||
| 			rc = to_utf8(arg, utf8); | ||||
| 			if (rc != ot::st::ok) | ||||
| 				return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message}; | ||||
| 			arg = std::move(utf8); | ||||
| 	if (!opt.raw) { | ||||
| 		for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) { | ||||
| 			for (std::string& arg : *args) { | ||||
| 				rc = to_utf8(arg, utf8); | ||||
| 				if (rc != ot::st::ok) | ||||
| 					return {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message}; | ||||
| 				arg = std::move(utf8); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| @@ -159,7 +166,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm | ||||
| 	if (set_all) { | ||||
| 		// Read comments from stdin and prepend them to opt.to_add. | ||||
| 		std::list<std::string> comments; | ||||
| 		auto rc = read_comments(comments_input, comments); | ||||
| 		auto rc = read_comments(comments_input, comments, opt.raw); | ||||
| 		if (rc != st::ok) | ||||
| 			return rc; | ||||
| 		opt.to_add.splice(opt.to_add.begin(), std::move(comments)); | ||||
| @@ -174,7 +181,7 @@ ot::status ot::parse_options(int argc, char** argv, ot::options& opt, FILE* comm | ||||
|  *       callers that don’t escape backslashes. Maybe add options to select a mode between simple, | ||||
|  *       raw, and escaped. | ||||
|  */ | ||||
| void ot::print_comments(const std::list<std::string>& comments, FILE* output) | ||||
| void ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw) | ||||
| { | ||||
| 	static ot::encoding_converter from_utf8("UTF-8", "//IGNORE"); | ||||
| 	std::string local; | ||||
| @@ -182,25 +189,33 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output) | ||||
| 	bool bad_comments = false; | ||||
| 	bool has_newline = false; | ||||
| 	bool has_control = false; | ||||
| 	for (const std::string& comment : comments) { | ||||
| 		ot::status rc = from_utf8(comment, local); | ||||
| 		if (rc == ot::st::information_lost) { | ||||
| 			info_lost = true; | ||||
| 		} else if (rc != ot::st::ok) { | ||||
| 			bad_comments = true; | ||||
| 			continue; | ||||
| 	for (const std::string& utf8_comment : comments) { | ||||
| 		const std::string* comment; | ||||
| 		// Convert the comment from UTF-8 to the system encoding if relevant. | ||||
| 		if (raw) { | ||||
| 			comment = &utf8_comment; | ||||
| 		} else { | ||||
| 			ot::status rc = from_utf8(utf8_comment, local); | ||||
| 			comment = &local; | ||||
| 			if (rc == ot::st::information_lost) { | ||||
| 				info_lost = true; | ||||
| 			} else if (rc != ot::st::ok) { | ||||
| 				bad_comments = true; | ||||
| 				continue; | ||||
| 			} | ||||
| 		} | ||||
| 		for (unsigned char c : comment) { | ||||
|  | ||||
| 		for (unsigned char c : *comment) { | ||||
| 			if (c == '\n') | ||||
| 				has_newline = true; | ||||
| 			else if (c < 0x20) | ||||
| 				has_control = true; | ||||
| 		} | ||||
| 		fwrite(local.data(), 1, local.size(), output); | ||||
| 		fwrite(comment->data(), 1, comment->size(), output); | ||||
| 		putc('\n', output); | ||||
| 	} | ||||
| 	if (info_lost) | ||||
| 		fputs("warning: Some characters are not supported by your system encoding and have been discarded.\n", stderr); | ||||
| 		fputs("warning: Some characters could not be converted to your system encoding and have been discarded. See --raw.\n", stderr); | ||||
| 	if (bad_comments) | ||||
| 		fputs("warning: Some tags are not properly encoded and have not been displayed.\n", stderr); | ||||
| 	if (has_newline) | ||||
| @@ -210,7 +225,7 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output) | ||||
| 		fputs("warning: Some tags contain control characters.\n", stderr); | ||||
| } | ||||
|  | ||||
| ot::status ot::read_comments(FILE* input, std::list<std::string>& comments) | ||||
| ot::status ot::read_comments(FILE* input, std::list<std::string>& comments, bool raw) | ||||
| { | ||||
| 	static ot::encoding_converter to_utf8("", "UTF-8"); | ||||
| 	comments.clear(); | ||||
| @@ -229,13 +244,17 @@ ot::status ot::read_comments(FILE* input, std::list<std::string>& comments) | ||||
| 			free(line); | ||||
| 			return rc; | ||||
| 		} | ||||
| 		std::string utf8; | ||||
| 		ot::status rc = to_utf8(std::string_view(line, nread), utf8); | ||||
| 		if (rc == ot::st::ok) { | ||||
| 			comments.emplace_back(std::move(utf8)); | ||||
| 		if (raw) { | ||||
| 			comments.emplace_back(line, nread); | ||||
| 		} else { | ||||
| 			free(line); | ||||
| 			return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message}; | ||||
| 			std::string utf8; | ||||
| 			ot::status rc = to_utf8(std::string_view(line, nread), utf8); | ||||
| 			if (rc == ot::st::ok) { | ||||
| 				comments.emplace_back(std::move(utf8)); | ||||
| 			} else { | ||||
| 				free(line); | ||||
| 				return {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message}; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	free(line); | ||||
| @@ -281,7 +300,7 @@ static ot::status edit_tags(ot::opus_tags& tags, const ot::options& opt) | ||||
| } | ||||
|  | ||||
| /** Spawn VISUAL or EDITOR to edit the given tags. */ | ||||
| static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path) | ||||
| static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::optional<std::string>& base_path, bool raw) | ||||
| { | ||||
| 	const char* editor = nullptr; | ||||
| 	if (getenv("TERM") != nullptr) | ||||
| @@ -299,7 +318,7 @@ static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::option | ||||
| 	if (fd == -1 || (tags_file = fdopen(fd, "w")) == nullptr) | ||||
| 		return {ot::st::standard_error, | ||||
| 		        "Could not open '" + tags_path + "': " + strerror(errno)}; | ||||
| 	ot::print_comments(tags.comments, tags_file); | ||||
| 	ot::print_comments(tags.comments, tags_file, raw); | ||||
| 	if (fclose(tags_file) != 0) | ||||
| 		return {ot::st::standard_error, tags_path + ": fclose error: "s + strerror(errno)}; | ||||
|  | ||||
| @@ -328,7 +347,7 @@ static ot::status edit_tags_interactively(ot::opus_tags& tags, const std::option | ||||
| 	tags_file = fopen(tags_path.c_str(), "re"); | ||||
| 	if (tags_file == nullptr) | ||||
| 		return {ot::st::standard_error, "Error opening " + tags_path + ": " + strerror(errno)}; | ||||
| 	if ((rc = ot::read_comments(tags_file, tags.comments)) != ot::st::ok) { | ||||
| 	if ((rc = ot::read_comments(tags_file, tags.comments, raw)) != ot::st::ok) { | ||||
| 		fprintf(stderr, "warning: Leaving %s on the disk.\n", tags_path.c_str()); | ||||
| 		return rc; | ||||
| 	} | ||||
| @@ -390,7 +409,7 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const | ||||
| 			if (writer) { | ||||
| 				if (opt.edit_interactively) { | ||||
| 					fflush(writer->file); // flush before calling the subprocess | ||||
| 					if ((rc = edit_tags_interactively(tags, writer->path)) != ot::st::ok) | ||||
| 					if ((rc = edit_tags_interactively(tags, writer->path, opt.raw)) != ot::st::ok) | ||||
| 						return rc; | ||||
| 				} | ||||
| 				auto packet = ot::render_tags(tags); | ||||
| @@ -398,7 +417,7 @@ static ot::status process(ot::ogg_reader& reader, ot::ogg_writer* writer, const | ||||
| 				if (rc != ot::st::ok) | ||||
| 					return rc; | ||||
| 			} else { | ||||
| 				ot::print_comments(tags.comments, stdout); | ||||
| 				ot::print_comments(tags.comments, stdout, opt.raw); | ||||
| 				break; | ||||
| 			} | ||||
| 		} else { | ||||
|   | ||||
| @@ -466,6 +466,12 @@ struct options { | ||||
| 	 * Options: --add, --set, --set-all | ||||
| 	 */ | ||||
| 	std::list<std::string> to_add; | ||||
| 	/** | ||||
| 	 * Disable encoding conversions. OpusTags are specified to always be encoded as UTF-8, but | ||||
| 	 * if for some reason a specific file contains binary tags that someone would like to | ||||
| 	 * extract and set as-is, encoding conversion would get in the way. | ||||
| 	 */ | ||||
| 	bool raw = false; | ||||
| }; | ||||
|  | ||||
| /** | ||||
| @@ -484,14 +490,14 @@ status parse_options(int argc, char** argv, options& opt, FILE* comments); | ||||
|  * | ||||
|  * The output generated is meant to be parseable by #ot::read_comments. | ||||
|  */ | ||||
| void print_comments(const std::list<std::string>& comments, FILE* output); | ||||
| void print_comments(const std::list<std::string>& comments, FILE* output, bool raw); | ||||
|  | ||||
| /** | ||||
|  * Parse the comments outputted by #ot::print_comments. | ||||
|  * | ||||
|  * The comments are converted from the system encoding to UTF-8, and returned as UTF-8. | ||||
|  */ | ||||
| status read_comments(FILE* input, std::list<std::string>& comments); | ||||
| status read_comments(FILE* input, std::list<std::string>& comments, bool raw); | ||||
|  | ||||
| /** | ||||
|  * Remove all comments matching the specified selector, which may either be a field name or a | ||||
|   | ||||
							
								
								
									
										28
									
								
								t/cli.cc
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								t/cli.cc
									
									
									
									
									
								
							| @@ -12,7 +12,7 @@ void check_read_comments() | ||||
| 	{ | ||||
| 		std::string txt = "TITLE=a b c\n\nARTIST=X\nArtist=Y\n"s; | ||||
| 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r"); | ||||
| 		rc = ot::read_comments(input.get(), comments); | ||||
| 		rc = ot::read_comments(input.get(), comments, false); | ||||
| 		if (rc != ot::st::ok) | ||||
| 			throw failure("could not read comments"); | ||||
| 		auto&& expected = {"TITLE=a b c", "ARTIST=X", "Artist=Y"}; | ||||
| @@ -22,14 +22,23 @@ void check_read_comments() | ||||
| 	{ | ||||
| 		std::string txt = "CORRUPTED=\xFF\xFF\n"s; | ||||
| 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r"); | ||||
| 		rc = ot::read_comments(input.get(), comments); | ||||
| 		rc = ot::read_comments(input.get(), comments, false); | ||||
| 		if (rc != ot::st::badly_encoded) | ||||
| 			throw failure("did not get the expected error reading corrupted data"); | ||||
| 	} | ||||
| 	{ | ||||
| 		std::string txt = "RAW=\xFF\xFF\n"s; | ||||
| 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r"); | ||||
| 		rc = ot::read_comments(input.get(), comments, true); | ||||
| 		if (rc != ot::st::ok) | ||||
| 			throw failure("could not read comments"); | ||||
| 		if (comments.front() != "RAW=\xFF\xFF") | ||||
| 			throw failure("parsed user comments did not match expectations"); | ||||
| 	} | ||||
| 	{ | ||||
| 		std::string txt = "MALFORMED\n"s; | ||||
| 		ot::file input = fmemopen((char*) txt.data(), txt.size(), "r"); | ||||
| 		rc = ot::read_comments(input.get(), comments); | ||||
| 		rc = ot::read_comments(input.get(), comments, false); | ||||
| 		if (rc != ot::st::error) | ||||
| 			throw failure("did not get the expected error reading malformed comments"); | ||||
| 	} | ||||
| @@ -90,6 +99,10 @@ void check_good_arguments() | ||||
| 	if (opt.paths_in.size() != 1 || opt.paths_in[0] != "x" || | ||||
| 	    !opt.edit_interactively || !opt.overwrite || !opt.in_place) | ||||
| 		throw failure("unexpected option parsing result for case #4"); | ||||
|  | ||||
| 	opt = parse({"opustags", "-a", "X=\xFF", "--raw", "x"}); | ||||
| 	if (!opt.raw || opt.to_add.front() != "X=\xFF") | ||||
| 		throw failure("--raw did not disable transcoding"); | ||||
| } | ||||
|  | ||||
| void check_bad_arguments() | ||||
| @@ -139,6 +152,15 @@ void check_bad_arguments() | ||||
| 	error_case({"opustags", "--edit", "x", "-i", "-d", "X"}, "Cannot mix --edit with -adDsS.", "mixing -e and -d"); | ||||
| 	error_case({"opustags", "--edit", "x", "-i", "-D"}, "Cannot mix --edit with -adDsS.", "mixing -e and -D"); | ||||
| 	error_case({"opustags", "--edit", "x", "-i", "-S"}, "Cannot mix --edit with -adDsS.", "mixing -e and -S"); | ||||
| 	error_case({"opustags", "-d", "\xFF", "x"}, | ||||
| 	           "Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.", | ||||
| 	           "-d with binary data"); | ||||
| 	error_case({"opustags", "-a", "X=\xFF", "x"}, | ||||
| 	           "Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.", | ||||
| 	           "-a with binary data"); | ||||
| 	error_case({"opustags", "-s", "X=\xFF", "x"}, | ||||
| 	           "Could not encode argument into UTF-8: Some characters could not be converted into the target encoding.", | ||||
| 	           "-s with binary data"); | ||||
| } | ||||
|  | ||||
| static void check_delete_comments() | ||||
|   | ||||
							
								
								
									
										25
									
								
								t/opustags.t
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								t/opustags.t
									
									
									
									
									
								
							| @@ -4,7 +4,7 @@ use strict; | ||||
| use warnings; | ||||
| use utf8; | ||||
|  | ||||
| use Test::More tests => 47; | ||||
| use Test::More tests => 50; | ||||
|  | ||||
| use Digest::MD5; | ||||
| use File::Basename; | ||||
| @@ -72,6 +72,7 @@ Options: | ||||
|   -s, --set FIELD=VALUE         replace a comment | ||||
|   -S, --set-all                 import comments from standard input | ||||
|   -e, --edit                    edit tags interactively in VISUAL/EDITOR | ||||
|   --raw                         disable encoding conversion | ||||
|  | ||||
| See the man page for extensive documentation. | ||||
| EOF | ||||
| @@ -279,7 +280,7 @@ TITLE= | ||||
| ARTIST=\xe9\xe0\xe7 | ||||
| I=\xf9\xce | ||||
| END_OUT | ||||
| warning: Some characters are not supported by your system encoding and have been discarded. | ||||
| warning: Some characters could not be converted to your system encoding and have been discarded. See --raw. | ||||
| END_ERR | ||||
|  | ||||
| $ENV{LC_ALL} = ''; | ||||
| @@ -291,3 +292,23 @@ ARTIST=éàç | ||||
| I=ùÎ | ||||
| END_OUT | ||||
| } | ||||
|  | ||||
|  | ||||
| #################################################################################################### | ||||
| # Raw edition | ||||
|  | ||||
| is_deeply(opustags(qw(-S out.opus -i --raw -a), "U=\xFE", {in => <<"END_IN", mode => ':raw'}), ['', '', 0], 'raw set-all with binary data'); | ||||
| T=\xFF | ||||
| END_IN | ||||
|  | ||||
| is_deeply(opustags(qw(out.opus)), [<<"END_OUT", <<'END_ERR', 0], 'default read with binary data'); | ||||
| T= | ||||
| U= | ||||
| END_OUT | ||||
| warning: Some characters could not be converted to your system encoding and have been discarded. See --raw. | ||||
| END_ERR | ||||
|  | ||||
| is_deeply(opustags(qw(out.opus --raw), { mode => ':raw' }), [<<"END_OUT", '', 0], 'raw read'); | ||||
| T=\xFF | ||||
| U=\xFE | ||||
| END_OUT | ||||
|   | ||||
		Reference in New Issue
	
	Block a user