misc/libphysfs/physfs_unicode.c
author nemo
Mon, 11 May 2015 13:53:08 -0400
changeset 10942 5d7dd938dedc
parent 8524 a65e9bcf0a03
child 12213 bb5522e88ab2
permissions -rw-r--r--
This probably fixes bug #839 - mine time was hardcoded to 3000 in Attack, instead of using the "0 as undefined" input that other places were using. When re653e96b0ec3 started paying attention to the input parameter, this previously ignored value became a problem.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7768
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     1
#define __PHYSICSFS_INTERNAL__
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     2
#include "physfs_internal.h"
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     3
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     4
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     5
/*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     6
 * From rfc3629, the UTF-8 spec:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     7
 *  http://www.ietf.org/rfc/rfc3629.txt
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     8
 *
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     9
 *   Char. number range  |        UTF-8 octet sequence
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    10
 *      (hexadecimal)    |              (binary)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    11
 *   --------------------+---------------------------------------------
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    12
 *   0000 0000-0000 007F | 0xxxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    13
 *   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    14
 *   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    15
 *   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    16
 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    17
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    18
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    19
/*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    20
 * This may not be the best value, but it's one that isn't represented
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    21
 *  in Unicode (0x10FFFF is the largest codepoint value). We return this
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    22
 *  value from utf8codepoint() if there's bogus bits in the
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    23
 *  stream. utf8codepoint() will turn this value into something
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    24
 *  reasonable (like a question mark), for text that wants to try to recover,
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    25
 *  whereas utf8valid() will use the value to determine if a string has bad
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    26
 *  bits.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    27
 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    28
#define UNICODE_BOGUS_CHAR_VALUE 0xFFFFFFFF
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    29
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    30
/*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    31
 * This is the codepoint we currently return when there was bogus bits in a
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    32
 *  UTF-8 string. May not fly in Asian locales?
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    33
 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    34
#define UNICODE_BOGUS_CHAR_CODEPOINT '?'
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    35
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    36
static PHYSFS_uint32 utf8codepoint(const char **_str)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    37
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    38
    const char *str = *_str;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    39
    PHYSFS_uint32 retval = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    40
    PHYSFS_uint32 octet = (PHYSFS_uint32) ((PHYSFS_uint8) *str);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    41
    PHYSFS_uint32 octet2, octet3, octet4;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    42
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    43
    if (octet == 0)  /* null terminator, end of string. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    44
        return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    45
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    46
    else if (octet < 128)  /* one octet char: 0 to 127 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    47
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    48
        (*_str)++;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    49
        return octet;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    50
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    51
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    52
    else if ((octet > 127) && (octet < 192))  /* bad (starts with 10xxxxxx). */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    53
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    54
        /*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    55
         * Apparently each of these is supposed to be flagged as a bogus
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    56
         *  char, instead of just resyncing to the next valid codepoint.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    57
         */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    58
        (*_str)++;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    59
        return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    60
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    61
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    62
    else if (octet < 224)  /* two octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    63
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    64
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    65
        octet -= (128+64);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    66
        octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    67
        if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    68
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    69
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    70
        *_str += 1;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    71
        retval = ((octet << 6) | (octet2 - 128));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    72
        if ((retval >= 0x80) && (retval <= 0x7FF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    73
            return retval;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    74
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    75
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    76
    else if (octet < 240)  /* three octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    77
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    78
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    79
        octet -= (128+64+32);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    80
        octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    81
        if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    82
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    83
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    84
        octet3 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    85
        if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    86
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    87
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    88
        *_str += 2;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    89
        retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    90
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    91
        /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    92
        switch (retval)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    93
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    94
            case 0xD800:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    95
            case 0xDB7F:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    96
            case 0xDB80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    97
            case 0xDBFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    98
            case 0xDC00:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    99
            case 0xDF80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   100
            case 0xDFFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   101
                return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   102
        } /* switch */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   103
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   104
        /* 0xFFFE and 0xFFFF are illegal, too, so we check them at the edge. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   105
        if ((retval >= 0x800) && (retval <= 0xFFFD))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   106
            return retval;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   107
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   108
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   109
    else if (octet < 248)  /* four octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   110
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   111
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   112
        octet -= (128+64+32+16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   113
        octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   114
        if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   115
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   116
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   117
        octet3 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   118
        if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   119
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   120
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   121
        octet4 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   122
        if ((octet4 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   123
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   124
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   125
        *_str += 3;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   126
        retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   127
                   ((octet3 - 128) << 6) | ((octet4 - 128)) );
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   128
        if ((retval >= 0x10000) && (retval <= 0x10FFFF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   129
            return retval;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   130
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   131
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   132
    /*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   133
     * Five and six octet sequences became illegal in rfc3629.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   134
     *  We throw the codepoint away, but parse them to make sure we move
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   135
     *  ahead the right number of bytes and don't overflow the buffer.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   136
     */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   137
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   138
    else if (octet < 252)  /* five octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   139
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   140
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   141
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   142
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   143
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   144
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   145
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   146
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   147
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   148
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   149
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   150
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   151
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   152
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   153
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   154
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   155
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   156
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   157
        *_str += 4;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   158
        return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   159
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   160
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   161
    else  /* six octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   162
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   163
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   164
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   165
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   166
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   167
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   168
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   169
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   170
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   171
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   172
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   173
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   174
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   175
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   176
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   177
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   178
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   179
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   180
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   181
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   182
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   183
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   184
        *_str += 6;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   185
        return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   186
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   187
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   188
    return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   189
} /* utf8codepoint */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   190
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   191
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   192
void PHYSFS_utf8ToUcs4(const char *src, PHYSFS_uint32 *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   193
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   194
    len -= sizeof (PHYSFS_uint32);   /* save room for null char. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   195
    while (len >= sizeof (PHYSFS_uint32))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   196
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   197
        PHYSFS_uint32 cp = utf8codepoint(&src);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   198
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   199
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   200
        else if (cp == UNICODE_BOGUS_CHAR_VALUE)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   201
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   202
        *(dst++) = cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   203
        len -= sizeof (PHYSFS_uint32);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   204
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   205
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   206
    *dst = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   207
} /* PHYSFS_utf8ToUcs4 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   208
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   209
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   210
void PHYSFS_utf8ToUcs2(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   211
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   212
    len -= sizeof (PHYSFS_uint16);   /* save room for null char. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   213
    while (len >= sizeof (PHYSFS_uint16))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   214
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   215
        PHYSFS_uint32 cp = utf8codepoint(&src);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   216
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   217
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   218
        else if (cp == UNICODE_BOGUS_CHAR_VALUE)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   219
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   220
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   221
        if (cp > 0xFFFF)  /* UTF-16 surrogates (bogus chars in UCS-2) */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   222
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   223
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   224
        *(dst++) = cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   225
        len -= sizeof (PHYSFS_uint16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   226
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   227
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   228
    *dst = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   229
} /* PHYSFS_utf8ToUcs2 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   230
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   231
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   232
void PHYSFS_utf8ToUtf16(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   233
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   234
    len -= sizeof (PHYSFS_uint16);   /* save room for null char. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   235
    while (len >= sizeof (PHYSFS_uint16))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   236
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   237
        PHYSFS_uint32 cp = utf8codepoint(&src);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   238
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   239
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   240
        else if (cp == UNICODE_BOGUS_CHAR_VALUE)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   241
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   242
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   243
        if (cp > 0xFFFF)  /* encode as surrogate pair */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   244
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   245
            if (len < (sizeof (PHYSFS_uint16) * 2))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   246
                break;  /* not enough room for the pair, stop now. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   247
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   248
            cp -= 0x10000;  /* Make this a 20-bit value */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   249
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   250
            *(dst++) = 0xD800 + ((cp >> 10) & 0x3FF);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   251
            len -= sizeof (PHYSFS_uint16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   252
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   253
            cp = 0xDC00 + (cp & 0x3FF);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   254
        } /* if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   255
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   256
        *(dst++) = cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   257
        len -= sizeof (PHYSFS_uint16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   258
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   259
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   260
    *dst = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   261
} /* PHYSFS_utf8ToUtf16 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   262
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   263
static void utf8fromcodepoint(PHYSFS_uint32 cp, char **_dst, PHYSFS_uint64 *_len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   264
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   265
    char *dst = *_dst;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   266
    PHYSFS_uint64 len = *_len;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   267
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   268
    if (len == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   269
        return;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   270
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   271
    if (cp > 0x10FFFF)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   272
        cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   273
    else if ((cp == 0xFFFE) || (cp == 0xFFFF))  /* illegal values. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   274
        cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   275
    else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   276
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   277
        /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   278
        switch (cp)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   279
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   280
            case 0xD800:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   281
            case 0xDB7F:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   282
            case 0xDB80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   283
            case 0xDBFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   284
            case 0xDC00:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   285
            case 0xDF80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   286
            case 0xDFFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   287
                cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   288
        } /* switch */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   289
    } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   290
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   291
    /* Do the encoding... */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   292
    if (cp < 0x80)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   293
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   294
        *(dst++) = (char) cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   295
        len--;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   296
    } /* if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   297
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   298
    else if (cp < 0x800)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   299
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   300
        if (len < 2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   301
            len = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   302
        else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   303
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   304
            *(dst++) = (char) ((cp >> 6) | 128 | 64);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   305
            *(dst++) = (char) (cp & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   306
            len -= 2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   307
        } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   308
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   309
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   310
    else if (cp < 0x10000)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   311
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   312
        if (len < 3)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   313
            len = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   314
        else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   315
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   316
            *(dst++) = (char) ((cp >> 12) | 128 | 64 | 32);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   317
            *(dst++) = (char) ((cp >> 6) & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   318
            *(dst++) = (char) (cp & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   319
            len -= 3;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   320
        } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   321
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   322
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   323
    else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   324
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   325
        if (len < 4)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   326
            len = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   327
        else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   328
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   329
            *(dst++) = (char) ((cp >> 18) | 128 | 64 | 32 | 16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   330
            *(dst++) = (char) ((cp >> 12) & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   331
            *(dst++) = (char) ((cp >> 6) & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   332
            *(dst++) = (char) (cp & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   333
            len -= 4;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   334
        } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   335
    } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   336
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   337
    *_dst = dst;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   338
    *_len = len;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   339
} /* utf8fromcodepoint */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   340
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   341
#define UTF8FROMTYPE(typ, src, dst, len) \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   342
    if (len == 0) return; \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   343
    len--;  \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   344
    while (len) \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   345
    { \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   346
        const PHYSFS_uint32 cp = (PHYSFS_uint32) ((typ) (*(src++))); \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   347
        if (cp == 0) break; \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   348
        utf8fromcodepoint(cp, &dst, &len); \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   349
    } \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   350
    *dst = '\0'; \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   351
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   352
void PHYSFS_utf8FromUcs4(const PHYSFS_uint32 *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   353
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   354
    UTF8FROMTYPE(PHYSFS_uint32, src, dst, len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   355
} /* PHYSFS_utf8FromUcs4 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   356
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   357
void PHYSFS_utf8FromUcs2(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   358
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   359
    UTF8FROMTYPE(PHYSFS_uint64, src, dst, len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   360
} /* PHYSFS_utf8FromUcs2 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   361
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   362
/* latin1 maps to unicode codepoints directly, we just utf-8 encode it. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   363
void PHYSFS_utf8FromLatin1(const char *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   364
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   365
    UTF8FROMTYPE(PHYSFS_uint8, src, dst, len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   366
} /* PHYSFS_utf8FromLatin1 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   367
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   368
#undef UTF8FROMTYPE
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   369
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   370
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   371
void PHYSFS_utf8FromUtf16(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   372
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   373
    if (len == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   374
        return;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   375
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   376
    len--;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   377
    while (len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   378
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   379
        PHYSFS_uint32 cp = (PHYSFS_uint32) *(src++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   380
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   381
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   382
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   383
        /* Orphaned second half of surrogate pair? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   384
        if ((cp >= 0xDC00) && (cp <= 0xDFFF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   385
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   386
        else if ((cp >= 0xD800) && (cp <= 0xDBFF))  /* start surrogate pair! */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   387
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   388
            const PHYSFS_uint32 pair = (PHYSFS_uint32) *src;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   389
            if ((pair < 0xDC00) || (pair > 0xDFFF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   390
                cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   391
            else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   392
            {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   393
                src++;  /* eat the other surrogate. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   394
                cp = (((cp - 0xD800) << 10) | (pair - 0xDC00));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   395
            } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   396
        } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   397
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   398
        utf8fromcodepoint(cp, &dst, &len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   399
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   400
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   401
    *dst = '\0';
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   402
} /* PHYSFS_utf8FromUtf16 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   403
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   404
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   405
typedef struct CaseFoldMapping
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   406
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   407
    PHYSFS_uint32 from;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   408
    PHYSFS_uint32 to0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   409
    PHYSFS_uint32 to1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   410
    PHYSFS_uint32 to2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   411
} CaseFoldMapping;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   412
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   413
typedef struct CaseFoldHashBucket
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   414
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   415
    const PHYSFS_uint8 count;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   416
    const CaseFoldMapping *list;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   417
} CaseFoldHashBucket;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   418
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   419
#include "physfs_casefolding.h"
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   420
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   421
static void locate_case_fold_mapping(const PHYSFS_uint32 from,
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   422
                                     PHYSFS_uint32 *to)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   423
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   424
    PHYSFS_uint32 i;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   425
    const PHYSFS_uint8 hashed = ((from ^ (from >> 8)) & 0xFF);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   426
    const CaseFoldHashBucket *bucket = &case_fold_hash[hashed];
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   427
    const CaseFoldMapping *mapping = bucket->list;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   428
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   429
    for (i = 0; i < bucket->count; i++, mapping++)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   430
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   431
        if (mapping->from == from)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   432
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   433
            to[0] = mapping->to0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   434
            to[1] = mapping->to1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   435
            to[2] = mapping->to2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   436
            return;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   437
        } /* if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   438
    } /* for */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   439
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   440
    /* Not found...there's no remapping for this codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   441
    to[0] = from;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   442
    to[1] = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   443
    to[2] = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   444
} /* locate_case_fold_mapping */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   445
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   446
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   447
static int utf8codepointcmp(const PHYSFS_uint32 cp1, const PHYSFS_uint32 cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   448
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   449
    PHYSFS_uint32 folded1[3], folded2[3];
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   450
    locate_case_fold_mapping(cp1, folded1);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   451
    locate_case_fold_mapping(cp2, folded2);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   452
    return ( (folded1[0] == folded2[0]) &&
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   453
             (folded1[1] == folded2[1]) &&
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   454
             (folded1[2] == folded2[2]) );
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   455
} /* utf8codepointcmp */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   456
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   457
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   458
int __PHYSFS_utf8stricmp(const char *str1, const char *str2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   459
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   460
    while (1)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   461
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   462
        const PHYSFS_uint32 cp1 = utf8codepoint(&str1);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   463
        const PHYSFS_uint32 cp2 = utf8codepoint(&str2);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   464
        if (!utf8codepointcmp(cp1, cp2)) break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   465
        if (cp1 == 0) return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   466
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   467
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   468
    return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   469
} /* __PHYSFS_utf8stricmp */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   470
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   471
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   472
int __PHYSFS_utf8strnicmp(const char *str1, const char *str2, PHYSFS_uint32 n)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   473
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   474
    while (n > 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   475
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   476
        const PHYSFS_uint32 cp1 = utf8codepoint(&str1);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   477
        const PHYSFS_uint32 cp2 = utf8codepoint(&str2);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   478
        if (!utf8codepointcmp(cp1, cp2)) return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   479
        if (cp1 == 0) return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   480
        n--;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   481
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   482
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   483
    return 1;  /* matched to n chars. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   484
} /* __PHYSFS_utf8strnicmp */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   485
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   486
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   487
int __PHYSFS_stricmpASCII(const char *str1, const char *str2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   488
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   489
    while (1)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   490
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   491
        const char ch1 = *(str1++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   492
        const char ch2 = *(str2++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   493
        const char cp1 = ((ch1 >= 'A') && (ch1 <= 'Z')) ? (ch1+32) : ch1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   494
        const char cp2 = ((ch2 >= 'A') && (ch2 <= 'Z')) ? (ch2+32) : ch2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   495
        if (cp1 < cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   496
            return -1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   497
        else if (cp1 > cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   498
            return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   499
        else if (cp1 == 0)  /* they're both null chars? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   500
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   501
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   502
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   503
    return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   504
} /* __PHYSFS_stricmpASCII */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   505
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   506
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   507
int __PHYSFS_strnicmpASCII(const char *str1, const char *str2, PHYSFS_uint32 n)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   508
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   509
    while (n-- > 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   510
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   511
        const char ch1 = *(str1++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   512
        const char ch2 = *(str2++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   513
        const char cp1 = ((ch1 >= 'A') && (ch1 <= 'Z')) ? (ch1+32) : ch1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   514
        const char cp2 = ((ch2 >= 'A') && (ch2 <= 'Z')) ? (ch2+32) : ch2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   515
        if (cp1 < cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   516
            return -1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   517
        else if (cp1 > cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   518
            return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   519
        else if (cp1 == 0)  /* they're both null chars? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   520
            return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   521
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   522
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   523
    return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   524
} /* __PHYSFS_strnicmpASCII */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   525
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   526
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   527
/* end of physfs_unicode.c ... */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   528