mirror of
https://github.com/mihonapp/mihon.git
synced 2025-10-22 19:18:54 +02:00
Improve handling of downloads for chapters with same metadata and optionally for OSes that don't support Unicode in filename (#2305)
Co-authored-by: jkim <jhskim@hotmail.com> Co-authored-by: fatotak <111342761+fatotak@users.noreply.github.com> Co-authored-by: MajorTanya <39014446+MajorTanya@users.noreply.github.com> Co-authored-by: AntsyLich <59261191+AntsyLich@users.noreply.github.com>
This commit is contained in:
@@ -9,6 +9,9 @@ import androidx.core.content.ContextCompat
|
||||
import com.hippo.unifile.UniFile
|
||||
import eu.kanade.tachiyomi.util.lang.Hash
|
||||
import java.io.File
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.CharBuffer
|
||||
import java.nio.charset.CodingErrorAction
|
||||
|
||||
object DiskUtil {
|
||||
|
||||
@@ -102,26 +105,84 @@ object DiskUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* Mutate the given filename to make it valid for a FAT filesystem,
|
||||
* replacing any invalid characters with "_". This method doesn't allow hidden files (starting
|
||||
* with a dot), but you can manually add it later.
|
||||
* Transform a filename fragment to make it safe to use on almost
|
||||
* all commonly used filesystems. You can pass an entire filename,
|
||||
* or just part of one, in case you want a specific part of a long
|
||||
* filename to be truncated, rather than the end of it.
|
||||
*
|
||||
* Characters that are potentially unsafe for some filesystems are
|
||||
* replaced with underscores. This includes the standard ones from
|
||||
* https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
|
||||
* but does allow any other valid Unicode code point.
|
||||
*
|
||||
* Excessively long filenames are truncated, by default to 240
|
||||
* bytes. Note that the truncation is based on bytes rather than
|
||||
* characters (code points), because this is what is relevant to
|
||||
* filesystem restrictions in most cases.
|
||||
*
|
||||
* Leading periods are stripped, to avoid the creation of hidden
|
||||
* files by default. If a hidden file is desired, a period can be
|
||||
* prepended to the return value from this function.
|
||||
*
|
||||
* If the optional argument disallowNonAscii is set to true,
|
||||
* then ANYTHING outside the ASCII range is replaced not with underscores,
|
||||
* but with its hexadecimal encoding. This is to make it so that distinct
|
||||
* non-English titles of things remain distinct, since not all
|
||||
* places where this function is used also take care of
|
||||
* disambiguation.
|
||||
*
|
||||
* We could instead replace only non-ASCII characters known to
|
||||
* be problematic, but so far nobody with a non-Unicode-compliant
|
||||
* device has been able to provide either directions to reproduce
|
||||
* their issue nor any documentation or tests that would allow us
|
||||
* to determine which characters are problems and which are not.
|
||||
*/
|
||||
fun buildValidFilename(origName: String): String {
|
||||
fun buildValidFilename(
|
||||
origName: String,
|
||||
maxBytes: Int = MAX_FILE_NAME_BYTES,
|
||||
disallowNonAscii: Boolean = false,
|
||||
): String {
|
||||
val name = origName.trim('.', ' ')
|
||||
if (name.isEmpty()) {
|
||||
return "(invalid)"
|
||||
}
|
||||
val sb = StringBuilder(name.length)
|
||||
name.forEach { c ->
|
||||
if (isValidFatFilenameChar(c)) {
|
||||
if (disallowNonAscii && c >= 0x80.toChar()) {
|
||||
sb.append(
|
||||
c.toString().toByteArray(Charsets.UTF_8).toHexString(
|
||||
HexFormat {
|
||||
upperCase = false
|
||||
},
|
||||
),
|
||||
)
|
||||
} else if (isValidFatFilenameChar(c)) {
|
||||
sb.append(c)
|
||||
} else {
|
||||
sb.append('_')
|
||||
}
|
||||
}
|
||||
// Even though vfat allows 255 UCS-2 chars, we might eventually write to
|
||||
// ext4 through a FUSE layer, so use that limit minus 15 reserved characters.
|
||||
return sb.toString().take(240)
|
||||
return truncateToLength(sb.toString(), maxBytes)
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate a string to a maximum length, while maintaining valid Unicode encoding.
|
||||
*/
|
||||
fun truncateToLength(s: String, maxBytes: Int): String {
|
||||
val charset = Charsets.UTF_8
|
||||
val decoder = charset.newDecoder()
|
||||
val sba = s.toByteArray(charset)
|
||||
if (sba.size <= maxBytes) {
|
||||
return s
|
||||
}
|
||||
// Ensure truncation by having byte buffer = maxBytes
|
||||
val bb = ByteBuffer.wrap(sba, 0, maxBytes)
|
||||
val cb = CharBuffer.allocate(maxBytes)
|
||||
// Ignore an incomplete character
|
||||
decoder.onMalformedInput(CodingErrorAction.IGNORE)
|
||||
decoder.decode(bb, cb, true)
|
||||
decoder.flush(cb)
|
||||
return String(cb.array(), 0, cb.position())
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -139,6 +200,8 @@ object DiskUtil {
|
||||
|
||||
const val NOMEDIA_FILE = ".nomedia"
|
||||
|
||||
// Safe theoretical max filename size is 255 bytes and 1 char = 2-4 bytes (UTF-8)
|
||||
const val MAX_FILE_NAME_BYTES = 250
|
||||
// Safe theoretical max filename size is 255 bytes and 1 char = 2-4 bytes (UTF-8).
|
||||
// To allow for writing to ext4 through a FUSE layer in the future, also subtract 15
|
||||
// reserved characters.
|
||||
const val MAX_FILE_NAME_BYTES = 240
|
||||
}
|
||||
|
Reference in New Issue
Block a user