Bug 653 - Metadata: UTF-16 ID3 tags read incorrectly.

The real problem is that whatever created the file mentioned in
the bug report, did not include the Unicode BOM and libid3tag
defaulted to big endian. This commit uses some code from ReactOS
that TRIES to autodetect the endianness.
This commit is contained in:
Leland Lucius 2021-02-16 13:33:42 -06:00
parent 37b36f5826
commit f52860355d
1 changed files with 34 additions and 0 deletions

View File

@ -267,6 +267,40 @@ id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length,
byteorder = ID3_UTF16_BYTEORDER_LE;
*ptr += 2;
break;
default:
// The text is missing the BOM, so attempt to detect the byte order.
// This is using code from ReactOS and, while it's not foolproof, it
// does provide some hope of getting the endianness correct.
{
unsigned char last_lo_byte = 0;
unsigned char last_hi_byte = 0;
unsigned int hi_byte_diff = 0;
unsigned int lo_byte_diff = 0;
int i;
for (i = 0; i < length; i += 2)
{
unsigned char lo_byte = (*ptr)[i + 1];
unsigned char hi_byte = (*ptr)[i];
lo_byte_diff += max(lo_byte, last_lo_byte) - min(lo_byte, last_lo_byte);
hi_byte_diff += max(hi_byte, last_hi_byte) - min(hi_byte, last_hi_byte);
last_lo_byte = lo_byte;
last_hi_byte = hi_byte;
}
if (lo_byte_diff < 127 && !hi_byte_diff)
{
byteorder = ID3_UTF16_BYTEORDER_BE;
}
if (hi_byte_diff && !lo_byte_diff)
{
byteorder = ID3_UTF16_BYTEORDER_LE;
}
}
break;
}
}