diff --git a/src/OpenFOAM/primitives/chars/wchar/wchar.H b/src/OpenFOAM/primitives/chars/wchar/wchar.H index c731a3dc8149154b127fe51d60c700b41f3f70ca..61519a51d345032b8e96e262035b7009d8eba642 100644 --- a/src/OpenFOAM/primitives/chars/wchar/wchar.H +++ b/src/OpenFOAM/primitives/chars/wchar/wchar.H @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | - \\ / A nd | Copyright (C) 2009-2009 OpenCFD Ltd. + \\ / A nd | Copyright (C) 2009-2010 OpenCFD Ltd. \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -31,6 +31,10 @@ Description SourceFiles wcharIO.C +SeeAlso + http://en.wikipedia.org/wiki/UTF-8 + http://en.wikibooks.org/wiki/Unicode/Character_reference + \*---------------------------------------------------------------------------*/ #ifndef wchar_H @@ -48,10 +52,10 @@ class Ostream; // * * * * * * * * * * * * * * * IOstream Operators * * * * * * * * * * * * // -//- Output via a normal char +//- Output wide character (Unicode) as UTF-8 Ostream& operator<<(Ostream&, const wchar_t); -//- Output string via normal char +//- Output wide character (Unicode) string as UTF-8 Ostream& operator<<(Ostream&, const wchar_t*); diff --git a/src/OpenFOAM/primitives/chars/wchar/wcharIO.C b/src/OpenFOAM/primitives/chars/wchar/wcharIO.C index 47a5ed056024b78a174ed6e35281fad1057106ec..9796d4b6eb9658eb4f8f68c8ff6fe8e3683a9f38 100644 --- a/src/OpenFOAM/primitives/chars/wchar/wcharIO.C +++ b/src/OpenFOAM/primitives/chars/wchar/wcharIO.C @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | - \\ / A nd | Copyright (C) 2009-2009 OpenCFD Ltd. + \\ / A nd | Copyright (C) 2009-2010 OpenCFD Ltd. \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -22,11 +22,6 @@ License along with OpenFOAM; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -Description - Reads a char from an input stream, for a given version - number and File format. If an ascii File is being read, then the line - numbers are counted and an erroneous read is reported. - \*---------------------------------------------------------------------------*/ #include "error.H" @@ -38,7 +33,68 @@ Description Foam::Ostream& Foam::operator<<(Ostream& os, const wchar_t wc) { - os.write(char(wc)); + if (!(wc & ~0x0000007F)) + { + // 0x00000000 - 0x0000007F: (1-byte output) + // 0xxxxxxx + os.write(char(wc)); + } + else if (!(wc & ~0x000007FF)) + { + // 0x00000080 - 0x000007FF: (2-byte output) + // 110bbbaa 10aaaaaa + os.write(char(0xC0 | ((wc >> 6) & 0x1F))); + os.write(char(0x80 | ((wc) & 0x3F))); + } + else if (!(wc & ~0x0000FFFF)) + { + // 0x00000800 - 0x0000FFFF: (3-byte output) + // 1110bbbb 10bbbbaa 10aaaaaa + os.write(char(0xE0 | ((wc >> 12) & 0x0F))); + os.write(char(0x80 | ((wc >> 6) & 0x3F))); + os.write(char(0x80 | ((wc) & 0x3F))); + } + else if (!(wc & ~0x001FFFFF)) + { + // 0x00010000 - 0x001FFFFF: (4-byte output) + // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa + os.write(char(0xF0 | ((wc >> 18) & 0x07))); + os.write(char(0x80 | ((wc >> 12) & 0x3F))); + os.write(char(0x80 | ((wc >> 6) & 0x3F))); + os.write(char(0x80 | ((wc) & 0x3F))); + } + else if (!(wc & ~0x03FFFFFF)) + { + // 0x00200000 - 0x03FFFFFF: (5-byte output) + // 111110dd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa + os.write(char(0xF8 | ((wc >> 24) & 0x03))); + os.write(char(0x80 | ((wc >> 18) & 0x3F))); + os.write(char(0x80 | ((wc >> 12) & 0x3F))); + os.write(char(0x80 | ((wc >> 6) & 0x3F))); + os.write(char(0x80 | ((wc) & 0x3F))); + } + else if (!(wc & ~0x7FFFFFFF)) + { + // 0x04000000 - 0x7FFFFFFF: (6-byte output) + // 1111110d 10dddddd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa + os.write(char(0xFC | ((wc >> 30) & 0x01))); + os.write(char(0x80 | ((wc >> 24) & 0x3F))); + os.write(char(0x80 | ((wc >> 18) & 0x3F))); + os.write(char(0x80 | ((wc >> 12) & 0x3F))); + os.write(char(0x80 | ((wc >> 6) & 0x3F))); + os.write(char(0x80 | ((wc) & 0x3F))); + } + else + { + // according to man page utf8(7) + // the Unicode standard specifies no characters above 0x0010FFFF, + // so Unicode characters can only be up to four bytes long in UTF-8. + + // report anything unknown/invalid as replacement character U+FFFD + os.write(char(0xEF)); + os.write(char(0xBF)); + os.write(char(0xBD)); + } os.check("Ostream& operator<<(Ostream&, const wchar_t)"); return os; @@ -51,10 +107,10 @@ Foam::Ostream& Foam::operator<<(Ostream& os, const wchar_t* ws) { for (const wchar_t* p = ws; *p; ++p) { - os.write(char(*p)); + os << *p; } } - os.check("Ostream& operator<<(Ostream&, const wchar_t*)"); + return os; }