From 70e9b576cff0ed865172a27c5f4ee8845974988d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mangano-Tarumi?= Date: Sun, 9 Dec 2018 14:03:15 -0500 Subject: [PATCH] review the doc, for utf-8 in particular --- opustags.1 | 15 +++++++++------ src/opustags.h | 32 ++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/opustags.1 b/opustags.1 index 2a4bf15..142d207 100644 --- a/opustags.1 +++ b/opustags.1 @@ -1,6 +1,6 @@ -.TH opustags 1 "November 2018" "@PROJECT_NAME@ @PROJECT_VERSION@" +.TH opustags 1 "December 2018" "@PROJECT_NAME@ @PROJECT_VERSION@" .SH NAME -opustags \- Opus comment editor +opustags \- Ogg Opus tag editor .SH SYNOPSIS .B opustags --help .br @@ -14,7 +14,7 @@ opustags \- Opus comment editor .I OUTPUT INPUT .SH DESCRIPTION .PP -\fBopustags\fP can read and edit the comment header of an Opus file. +\fBopustags\fP can read and edit the comment header of an Ogg Opus file. It basically has two modes: read-only, and read-write for tag editing. .PP In read-only mode, only the beginning of \fIINPUT\fP is read, and the tags are @@ -40,9 +40,12 @@ If you want to replace all the tags, you can use the \fB--set-all\fP option whic The format is the same as the one used for output: newline-separated \fIFIELD=Value\fP assignment. All the previously existing tags as deleted. .PP -\fBWarning:\fP the Opus format specifications requires tags to be encoded in -\fBUTF-8\fP. This tool ignores the system locale, assuming the encoding is -set to UTF-8, and assume that tags are already encoded in UTF-8. +The Opus format specifications requires that tags are encoded in UTF-8, so that's the only encoding +opustags supports. If your system encoding is different, the tags are automatically converted to and +from your system locale. When the conversion is lossy, the incompatible characters are +transliterated and a warning is displayed. Even if you edit an Opus file whose tags contains +characters unsupported by your system encoding, the original UTF-8 values will be preserved for the +tags you don't explictly modify. .SH OPTIONS .TP .B \-h, \-\-help diff --git a/src/opustags.h b/src/opustags.h index 94772f7..800f2b7 100644 --- a/src/opustags.h +++ b/src/opustags.h @@ -5,6 +5,7 @@ * * Let's have a quick tour around. The project is split into the following modules: * + * - The system module provides a few generic tools for interating with the system. * - The ogg module reads and writes Ogg files, letting you manipulate Ogg pages and packets. * - The opus module parses the contents of Ogg packets according to the Opus specifications. * - The cli module implements the main logic of the program. @@ -167,7 +168,11 @@ private: * \{ */ -/** RAII-aware wrapper around libogg's ogg_stream_state. */ +/** + * RAII-aware wrapper around libogg's ogg_stream_state. Though it handles automatic destruction, it + * does not prevent copying or implement move semantics correctly, so it's your responsibility to + * ensure these operations don't happen. + */ struct ogg_logical_stream : ogg_stream_state { ogg_logical_stream(int serialno) { if (ogg_stream_init(this, serialno) != 0) @@ -313,16 +318,16 @@ private: struct opus_tags { /** * OpusTags packets begin with a vendor string, meant to identify the implementation of the - * encoder. It should be an arbitrary UTF-8 string. + * encoder. It is expected to be an arbitrary UTF-8 string. */ std::string vendor; /** - * Comments. These are a list of string following the NAME=Value format. A comment may also - * be called a field, or a tag. + * Comments are strings in the NAME=Value format. A comment may also be called a field, or a + * tag. * - * The field name in vorbis comment is case-insensitive and ASCII, while the value can be - * any valid UTF-8 string. The specification is not too clear for Opus, but let's assume - * it's the same. + * The field name in vorbis comments is usually case-insensitive and ASCII, while the value + * can be any valid UTF-8 string. The specification is not too clear for Opus, but let's + * assume it's the same. */ std::list comments; /** @@ -353,6 +358,10 @@ dynamic_ogg_packet render_tags(const opus_tags& tags); /** * Remove all the comments whose field name is equal to the special one, case-sensitive. + * + * \todo Become case-insensitive. + * \todo Move to module cli. + * \todo Accept fields like X=Y to remove only comments X=Y, instead of all X. */ void delete_comments(opus_tags& tags, const char* field_name); @@ -443,9 +452,10 @@ struct options { status parse_options(int argc, char** argv, options& opt); /** - * Print all the comments, separated by line breaks. Since a comment may - * contain line breaks, this output is not completely reliable, but it fits - * most cases. + * Print all the comments, separated by line breaks. Since a comment may contain line breaks, this + * output is not completely reliable, but it fits most cases. + * + * The comments must be encoded in UTF-8, and are converted to the system locale when printed. * * The output generated is meant to be parseable by #ot::read_tags. */ @@ -453,6 +463,8 @@ void print_comments(const std::list& comments, FILE* output); /** * Parse the comments outputted by #ot::print_comments. + * + * The comments are converted from the system encoding to UTF-8, and returned as UTF-8. */ std::list read_comments(FILE* input);