Skip to content
Snippets Groups Projects
Commit a132aade authored by Mark Olesen's avatar Mark Olesen
Browse files

Output wchar_t as UTF-8 instead of simply truncating to char.

parent 9c79927b
Branches
Tags
No related merge requests found
......@@ -2,7 +2,7 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2009-2009 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2009-2010 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
License
......@@ -31,6 +31,10 @@ Description
SourceFiles
wcharIO.C
SeeAlso
http://en.wikipedia.org/wiki/UTF-8
http://en.wikibooks.org/wiki/Unicode/Character_reference
\*---------------------------------------------------------------------------*/
#ifndef wchar_H
......@@ -48,10 +52,10 @@ class Ostream;
// * * * * * * * * * * * * * * * IOstream Operators * * * * * * * * * * * * //
//- Output via a normal char
//- Output wide character (Unicode) as UTF-8
Ostream& operator<<(Ostream&, const wchar_t);
//- Output string via normal char
//- Output wide character (Unicode) string as UTF-8
Ostream& operator<<(Ostream&, const wchar_t*);
......
......@@ -2,7 +2,7 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2009-2009 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2009-2010 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
License
......@@ -22,11 +22,6 @@ License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Description
Reads a char from an input stream, for a given version
number and File format. If an ascii File is being read, then the line
numbers are counted and an erroneous read is reported.
\*---------------------------------------------------------------------------*/
#include "error.H"
......@@ -38,7 +33,68 @@ Description
Foam::Ostream& Foam::operator<<(Ostream& os, const wchar_t wc)
{
os.write(char(wc));
if (!(wc & ~0x0000007F))
{
// 0x00000000 - 0x0000007F: (1-byte output)
// 0xxxxxxx
os.write(char(wc));
}
else if (!(wc & ~0x000007FF))
{
// 0x00000080 - 0x000007FF: (2-byte output)
// 110bbbaa 10aaaaaa
os.write(char(0xC0 | ((wc >> 6) & 0x1F)));
os.write(char(0x80 | ((wc) & 0x3F)));
}
else if (!(wc & ~0x0000FFFF))
{
// 0x00000800 - 0x0000FFFF: (3-byte output)
// 1110bbbb 10bbbbaa 10aaaaaa
os.write(char(0xE0 | ((wc >> 12) & 0x0F)));
os.write(char(0x80 | ((wc >> 6) & 0x3F)));
os.write(char(0x80 | ((wc) & 0x3F)));
}
else if (!(wc & ~0x001FFFFF))
{
// 0x00010000 - 0x001FFFFF: (4-byte output)
// 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa
os.write(char(0xF0 | ((wc >> 18) & 0x07)));
os.write(char(0x80 | ((wc >> 12) & 0x3F)));
os.write(char(0x80 | ((wc >> 6) & 0x3F)));
os.write(char(0x80 | ((wc) & 0x3F)));
}
else if (!(wc & ~0x03FFFFFF))
{
// 0x00200000 - 0x03FFFFFF: (5-byte output)
// 111110dd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa
os.write(char(0xF8 | ((wc >> 24) & 0x03)));
os.write(char(0x80 | ((wc >> 18) & 0x3F)));
os.write(char(0x80 | ((wc >> 12) & 0x3F)));
os.write(char(0x80 | ((wc >> 6) & 0x3F)));
os.write(char(0x80 | ((wc) & 0x3F)));
}
else if (!(wc & ~0x7FFFFFFF))
{
// 0x04000000 - 0x7FFFFFFF: (6-byte output)
// 1111110d 10dddddd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa
os.write(char(0xFC | ((wc >> 30) & 0x01)));
os.write(char(0x80 | ((wc >> 24) & 0x3F)));
os.write(char(0x80 | ((wc >> 18) & 0x3F)));
os.write(char(0x80 | ((wc >> 12) & 0x3F)));
os.write(char(0x80 | ((wc >> 6) & 0x3F)));
os.write(char(0x80 | ((wc) & 0x3F)));
}
else
{
// according to man page utf8(7)
// the Unicode standard specifies no characters above 0x0010FFFF,
// so Unicode characters can only be up to four bytes long in UTF-8.
// report anything unknown/invalid as replacement character U+FFFD
os.write(char(0xEF));
os.write(char(0xBF));
os.write(char(0xBD));
}
os.check("Ostream& operator<<(Ostream&, const wchar_t)");
return os;
......@@ -51,10 +107,10 @@ Foam::Ostream& Foam::operator<<(Ostream& os, const wchar_t* ws)
{
for (const wchar_t* p = ws; *p; ++p)
{
os.write(char(*p));
os << *p;
}
}
os.check("Ostream& operator<<(Ostream&, const wchar_t*)");
return os;
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment