Add support for cp1252 (Western European) codepage.

In Europe Windows defaults to its own codepage cp1252 (also known as "WinLatin"
or "Windows-1252"). cp1252 adds some characters to ISO-8859-1.

Some mp3 tagging software on Windows uses cp1252 instead of ISO-8859-1. This
violates the ID3 specification, which requires tags to be ISO-8859-1 or
Unicode. However, similar violations are made for other codepages and supported
by Rockbox using the "Default Codepage" setting. Add support for cp1252 to
enable people using such broken tools to override the correct decoding to get
their tags displayed properly.

Change-Id: I9f2ec478afe2503e99ee8e6609416c92b0f453e0
Reviewed-on: http://gerrit.rockbox.org/209
Reviewed-by: Jens Arnold <amiconn@rockbox.org>
Tested-by: Jens Arnold <amiconn@rockbox.org>
This commit is contained in:
Dominik Riebeling 2012-04-06 20:21:29 +02:00 committed by Jens Arnold
parent 4a6b875eda
commit 2d9c0bab54
7 changed files with 52 additions and 12 deletions

View File

@ -13038,3 +13038,17 @@
*: "Start Sleep Timer"
</voice>
</phrase>
<phrase>
id: LANG_CODEPAGE_WESTERN_EUROPEAN
desc: in codepage setting menu
user: core
<source>
*: "Western European (CP1252)"
</source>
<dest>
*: "Western European (CP1252)"
</dest>
<voice>
*: "Western European"
</voice>
</phrase>

View File

@ -1531,25 +1531,28 @@ const struct settings_list settings[] = {
#ifdef HAVE_LCD_BITMAP
/* The order must match with that in unicode.c */
"iso8859-1,iso8859-7,iso8859-8,cp1251,iso8859-11,cp1256,"
"iso8859-9,iso8859-2,cp1250,sjis,gb2312,ksx1001,big5,utf-8",
"iso8859-9,iso8859-2,cp1250,cp1252,sjis,gb2312,ksx1001,big5,utf-8",
set_codepage, 14,
ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK),
ID2P(LANG_CODEPAGE_LATIN1),
ID2P(LANG_CODEPAGE_GREEK),
ID2P(LANG_CODEPAGE_HEBREW), ID2P(LANG_CODEPAGE_CYRILLIC),
ID2P(LANG_CODEPAGE_THAI), ID2P(LANG_CODEPAGE_ARABIC),
ID2P(LANG_CODEPAGE_TURKISH),
ID2P(LANG_CODEPAGE_LATIN_EXTENDED),
ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN),
ID2P(LANG_CODEPAGE_WESTERN_EUROPEAN),
ID2P(LANG_CODEPAGE_JAPANESE),
ID2P(LANG_CODEPAGE_SIMPLIFIED), ID2P(LANG_CODEPAGE_KOREAN),
ID2P(LANG_CODEPAGE_TRADITIONAL), ID2P(LANG_CODEPAGE_UTF8)),
#else /* !HAVE_LCD_BITMAP */
/* The order must match with that in unicode.c */
"iso8859-1,iso8859-7,cp1251,iso8859-9,iso8859-2,cp1250,utf-8",
"iso8859-1,iso8859-7,cp1251,iso8859-9,iso8859-2,cp1250,cp1252,utf-8",
set_codepage, 7,
ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK),
ID2P(LANG_CODEPAGE_CYRILLIC), ID2P(LANG_CODEPAGE_TURKISH),
ID2P(LANG_CODEPAGE_LATIN_EXTENDED),
ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN),
ID2P(LANG_CODEPAGE_WESTERN_EUROPEAN),
ID2P(LANG_CODEPAGE_UTF8)),
#endif
OFFON_SETTING(0, warnon_erase_dynplaylist, LANG_WARN_ERASEDYNPLAYLIST_MENU,

View File

@ -56,7 +56,7 @@ static const char * const filename[NUM_TABLES] =
static const char cp_2_table[NUM_CODEPAGES] =
{
0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0
};
static const char * const name_codepages[NUM_CODEPAGES+1] =
@ -70,6 +70,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] =
"ISO-8859-9",
"ISO-8859-2",
"CP1250",
"CP1252",
"SJIS",
"GB-2312",
"KSX-1001",
@ -80,7 +81,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] =
#else /* !HAVE_LCD_BITMAP, reduced support */
#define MAX_CP_TABLE_SIZE 640
#define MAX_CP_TABLE_SIZE 768
#define NUM_TABLES 1
static const char * const filename[NUM_TABLES] = {
@ -89,7 +90,7 @@ static const char * const filename[NUM_TABLES] = {
static const char cp_2_table[NUM_CODEPAGES] =
{
0, 1, 1, 1, 1, 1, 0
0, 1, 1, 1, 1, 1, 1, 0
};
static const char * const name_codepages[NUM_CODEPAGES+1] =
@ -100,6 +101,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] =
"ISO-8859-9",
"ISO-8859-2",
"CP1250",
"CP1252",
"UTF-8",
"unknown"
};
@ -190,6 +192,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
/* cp tells us which codepage to convert from */
switch (cp) {
case ISO_8859_7: /* Greek */
case WIN_1252: /* Western European */
case WIN_1251: /* Cyrillic */
case ISO_8859_9: /* Turkish */
case ISO_8859_2: /* Latin Extended */

View File

@ -45,6 +45,7 @@ enum codepages {
ISO_8859_9, /* Turkish */
ISO_8859_2, /* Latin Extended */
WIN_1250, /* Central European */
WIN_1252, /* Western European */
SJIS, /* Japanese */
GB_2312, /* Simp. Chinese */
KSX_1001, /* Korean */
@ -62,6 +63,7 @@ enum codepages {
ISO_8859_9, /* Turkish */
ISO_8859_2, /* Latin Extended */
WIN_1250, /* Central European */
WIN_1252, /* Western European */
UTF_8, /* Unicode */
NUM_CODEPAGES
};

View File

@ -57,6 +57,13 @@ const unsigned short cp1251_to_uni[] = {
0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457 /* B8-BF */
};
const unsigned short cp1252_to_uni[] = {
0x20AC, 0x00A0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, /* 80-87 */
0x0c26, 0x2030, 0x0160, 0x2039, 0x0152, 0x00a0, 0x017D, 0x00a0, /* 88-8F */
0x00A0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 90-97 */
0x02dc, 0x2122, 0x0161, 0x203A, 0x0153, 0x00a0, 0x017E, 0x0178 /* 98-9F */
};
const unsigned short cp1256_to_uni[] = {
0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,

View File

@ -22,6 +22,7 @@
extern const unsigned short iso8859_7_to_uni[];
extern const unsigned short cp1250_to_uni[];
extern const unsigned short cp1251_to_uni[];
extern const unsigned short cp1252_to_uni[];
extern const unsigned short cp1256_to_uni[];
extern const unsigned short iso8859_2_to_uni[];
extern const unsigned short cp932_table[];

View File

@ -25,8 +25,8 @@
#define MAX_TABLE_SIZE 32768
static const int mini_index[6] = {
0, 1, 3, 6, 7, 8
static const int mini_index[7] = {
0, 1, 3, 6, 7, 8, 9
};
static unsigned short iso_table[MAX_TABLE_SIZE];
@ -140,7 +140,7 @@ unsigned short iso_decode(unsigned char *latin1, int cp, int count)
ucs = iso8859_2_to_uni[*latin1++ - 0xA1];
}
break;
case 0x08: /* Central European (CP1250) */
while (count--) {
/* first convert to unicode */
@ -150,7 +150,17 @@ unsigned short iso_decode(unsigned char *latin1, int cp, int count)
ucs = cp1250_to_uni[*latin1++ - 0x80];
}
break;
case 0x09: /* Western European (CP1252) */
while (count--) {
/* first convert to unicode */
if (*latin1 < 0x80 || *latin1 >= 0xa0)
ucs = *latin1++;
else
ucs = cp1252_to_uni[*latin1++ - 0x80];
}
break;
default:
break;
}
@ -209,7 +219,7 @@ int main(int argc, char **argv)
of = fopen("isomini.cp", "wb");
if (!of) return 1;
for (i=1; i<6; i++) {
for (i=1; i<7; i++) {
for (j=0; j<128; j++) {
k = (unsigned char)j + 128;
@ -223,7 +233,7 @@ int main(int argc, char **argv)
of = fopen("iso.cp", "wb");
if (!of) return 1;
for (i=1; i<9; i++) {
for (i=1; i<10; i++) {
for (j=0; j<128; j++) {
k = (unsigned char)j + 128;