/* 日本語文字コード関連関数 (v1.3)
 *
 * 著作権：(c) 1999-2002 Andrew Church <achurch@achurch.org>
 *
 * 他のソフトでの使用は（営利目的か否かに関わらず）完全に自由です。但し、
 * このソースコードの再配布、または他のソフトのソースコードと一緒での配
 * 布の場合は、必ず変更せずに元のままで配布して下さい。変更されたものの
 * 配布は禁じます。
 * また、万一不具合が見つかった場合、上記メールアドレスまでご連絡下さい。
 *
 * Copyright (c) 1999-2002 Andrew Church <achurch@achurch.org>
 *
 * These routines may be used freely in any software, commercial or
 * otherwise.  However, any distribution of this source code, whether
 * independently or as part of another program, must be of the original,
 * unmodified source code; distribution of modified versions of the source
 * code is prohibited.
 * Please report any bugs to the above address.
 */

/*** 注意：このファイルを編集する時、ＪＩＳコードで保存しないで下さい。***
 *** 　　　ソースがコンパイルできなくなる場合があります。              ***/

/*** Notice: When editing this file, do not save it with JIS encoding, ***
 ***         or it will become uncompilable.                           ***/

/*************************************************************************/

#include <stdlib.h>
#include <string.h>
#include "jcode.h"

/*************************************************************************/

/* jcode_local(str)
 * jcode_nlocal(str, max)
 *
 * プログラムが実行されているコンピュータの通常使用されるコードに変換する。
 * DOS/WindowsとMacの場合はSJIS、その他はEUC（いずれもエスケープシーケンス
 * の入っていないもの）。結果はそれぞれjcode_sjis()、jcode_euc()を呼び出し
 * た時と同様。
 *
 * jcode_nlocal()では、結果文字列（終端の'\0'を含む）の最大バイト数を指定で
 * きる。２バイト文字（全角文字）が分割されて文字化けすることはない。
 *
 * 結果文字列は静的バッファに格納されるので、連続で呼び出す場合やprintf()で
 * ２回以上利用する場合は各文字列を別に保存しておく必要がある。
 *
 * Converts the given string into the encoding used by the computer on
 * which the program is running.  This is SJIS on DOS/Windows and Macintosh
 * machines, EUC on others (in all cases the result will not contain escape
 * codes).  The behavior of the function is exactly the same as if
 * jcode_sjis() or jcode_euc() was called.
 *
 * jcode_nlocal() allows the caller to specify the maximum length in bytes
 * (including the trailing '\0') of the result string.  2-byte characters
 * will never be split as a result of reaching the maximum length.
 *
 * The returned string is stored in a static buffer, so it must be saved
 * elsewhere after each call when calling either function multiple times
 * in succession or inside a printf() or other function call.
 */

#if defined(_WIN32) || defined(DOS) || defined(MAC)
# define LOCAL_SJIS 1
#else
# define LOCAL_SJIS 0
#endif

char *jcode_local(const char *str)
{
#if LOCAL_SJIS
    return jcode_sjis(str);
#else
    return jcode_euc(str);
#endif
}

char *jcode_nlocal(const char *str, int max)
{
    unsigned char *res, *s;

    max--;  /* '\0'のために１バイトを取っておく */
#if LOCAL_SJIS
    res = (unsigned char *) jcode_sjis(str);
#else
    res = (unsigned char *) jcode_euc(str);
#endif
    s = res;
    while (*res && s-res < max) {
#if LOCAL_SJIS
	if ((*s >= 0x81 && *s <= 0x9F) || (*s >= 0xE0 && *s <= 0xFF))
#else
	if (*s >= 0xA1 && *s <= 0xFE)
#endif
	{
	    if (s+1-res >= max)
		break;
	    s++;
	}
	s++;
    }
    *res = 0;
    return (char *) res;
}

/*************************************************************************/

/* jcode_jis(str)
 * jcode_euc(str)
 * jcode_sjis(str)
 *
 * 文字列を各コードに変換して返す。jcode_local()と同様、結果文字列は静的バ
 * ッファに格納される。
 *
 * Converts a string to the given encoding and returns it.  As with
 * jcode_local(), the result string is stored in a static buffer.
 */

char *jcode_jis(const char *str)
{
    if (!str)
	return NULL;
    switch (jcode_hantei(str)) {
      case JCODE_EUC:
	return jcode_euc2jis(str);
      case JCODE_SJIS:
	return jcode_sjis2jis(str);
      case JCODE_JIS:
      default:
	return (char *)str;
    }
}

char *jcode_euc(const char *str)
{
    if (!str)
	return NULL;
    switch (jcode_hantei(str)) {
      case JCODE_JIS:
	return jcode_jis2euc(str);
      case JCODE_SJIS:
	return jcode_sjis2euc(str);
      case JCODE_EUC:
      default:
	return (char *)str;
    }
}

char *jcode_sjis(const char *str)
{
    if (!str)
	return NULL;
    switch (jcode_hantei(str)) {
      case JCODE_JIS:
	return jcode_jis2sjis(str);
      case JCODE_EUC:
	return jcode_euc2sjis(str);
      case JCODE_SJIS:
      default:
	return (char *)str;
    }
}

/*************************************************************************/
/*************************************************************************/

/* jcode_hantei(str)
 *
 * strが書かれている文字コードを判定する。JCODE_*定数のいずれかを返す。
 *
 * Determines what Japanese encoding is used in the given string.  Returns
 * one of the JCODE_* constants.
 */

int jcode_hantei(const char *str)
{
    if (!str)
	return JCODE_UNKNOWN;
    if (strstr(str, "\033$B"))
	return JCODE_JIS;
    if (strpbrk(str, "\201\202\203\204\205\206\207\210\211\212\213\214\215\216"
	             "\217\220\221\222\223\224\225\226\227\230\231\232\233\234"
	             "\235\236\237")
    )
	return JCODE_SJIS;
    while (*str) {
	if ((unsigned char)*str >= 0xA1 && (unsigned char)*str <= 0xFE)
	    return JCODE_EUC;
	str++;
    }
    return JCODE_UNKNOWN;
}

/*************************************************************************/
/*************************************************************************/

/* jcode_xxx2yyy(str)
 *
 * 各コード間に変換する。jcode_hantei()が失敗した場合、これらを直接呼び出し
 * 変換を行うことは出来るが、その場合はjcode_hanteiを直した方が望ましい。
 *
 * Converts between two particular encodings.  If jcode_hantei() fails,
 * these functions can be used to convert the string directly, but it is
 * preferable to fix jcode_hantei() instead.
 */

static char *result;
static int result_size, result_len;

static unsigned char hankana_map[0x40] = {
/*  "・", "。", "「", "」", "、", "・", "ヲ", "ァ", */
    0xA6, 0xA3, 0xD6, 0xD7, 0xA2, 0xA6, 0xF2, 0xA1,
/*  "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", */
    0xA3, 0xA5, 0xA7, 0xA9, 0xE3, 0xE5, 0xE7, 0xC3,
/*  "ー", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", */
    0xBC, 0xA2, 0xA4, 0xA6, 0xA8, 0xAA, 0xAB, 0xAD,
/*  "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", */
    0xAF, 0xB1, 0xB3, 0xB5, 0xB7, 0xB9, 0xBB, 0xBD,
/*  "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", */
    0xBF, 0xC1, 0xC4, 0xC6, 0xC8, 0xCA, 0xCB, 0xCC,
/*  "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", */
    0xCD, 0xCE, 0xCF, 0xD2, 0xD5, 0xD8, 0xDB, 0xDE,
/*  "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", */
    0xDF, 0xE0, 0xE1, 0xE2, 0xE4, 0xE6, 0xE8, 0xE9,
/*  "リ", "ル", "レ", "ロ", "ワ", "ン", "゛", "゜", */
    0xEA, 0xEB, 0xEC, 0xED, 0xEF, 0xF3, 0xAB, 0xAC,
};

static unsigned char hankana_map_2[0x40] = {
    0xA1, 0xA1, 0xA1, 0xA1, 0xA1, 0xA1, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA1, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA1, 0xA1,
};

/*************************************************************************/

/* 結果文字列に文字を追加し、必要な場合にバッファサイズを増やす。
 *
 * Add a character to the result string, lengthening the result buffer if
 * necessary.
 */

static void addchar(int c)
{
    if (result_len+2 >= result_size) {
	int new_size;
	char *new_result;

	if (result_size < 128)
	    new_size = 256;
	else
	    new_size = result_size * 2;
	new_result = (char *) malloc(new_size);
	if (!new_result)
	    return;
	if (result_len)
	    memcpy(new_result, result, result_len);
	if (result)
	    free(result);
	result = new_result;
	result_size = new_size;
    }
    result[result_len++] = c;
}

/*************************************************************************/

char *jcode_jis2euc(const char *str)
{
    int twobyte = 0;

    result_len = 0;
    while (*str) {
	if (*str == 0x1B && str[1] == '$' && str[2]) {
	    twobyte = 1;
	    str += 3;
	} else if (*str == 0x1B && str[1] == '(' && str[2]) {
	    twobyte = 0;
	    str += 3;
	} else if (twobyte && str[1]) {
	    addchar(*str++ | 0x80);
	    addchar(*str++ | 0x80);
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_euc2jis(const char *str)
{
    int twobyte = 0;

    result_len = 0;
    while (*str) {
	if ((unsigned char)*str >= 0xA1 && (unsigned char)*str <= 0xFE && str[1]) {
	    if (!twobyte) {
		addchar(0x1B);
		addchar('$');
		addchar('B');
		twobyte = 1;
	    }
	    addchar(*str++ & 0x7F);
	    addchar(*str++ & 0x7F);
	} else {
	    if (twobyte) {
		addchar(0x1B);
		addchar('(');
		addchar('B');
		twobyte = 0;
	    }
	    addchar(*str++);
	}
    }
    if (twobyte) {
	addchar(0x1B);
	addchar('(');
	addchar('B');
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_sjis2euc(const char *str)
{
    result_len = 0;
    while (*str) {
	int a = (unsigned char)*str, b = (unsigned char)str[1];
	if (((a >= 0x81 && a <= 0x9F) || (a >= 0xE0 && a <= 0xFF)) && b >= 0x40) {
	    if (a >= 0xE0)
		a -= 0x40;
	    a -= 0x81;
	    if (b >= 0x80)
		b--;
	    b -= 0x40;
	    addchar(0xA1 + a*2 + b/0x5E);
	    addchar(0xA1 + b%0x5E);
	    str += 2;
	} else if (a >= 0xA0 && a <= 0xDF) {
	    addchar(hankana_map_2[a-0xA0]);
	    addchar(hankana_map[a-0xA0]);
	    str++;
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_euc2sjis(const char *str)
{
    result_len = 0;
    while (*str) {
	int a = (unsigned char)*str, b = (unsigned char)str[1], c;
	if ((a >= 0xA1 && a <= 0xFE) && (b >= 0xA1 && b <= 0xFE)) {
	    a -= 0xA1;
	    b -= 0xA1;
	    if (b+0x40 >= 0x7F || (a & 1))
		b++;
	    c = 0x81 + a/2;
	    if (c >= 0xA0)
		c += 0x40;
	    addchar(c);
	    addchar(0x40 + (a%2)*0x5E + b);
	    str += 2;
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_jis2sjis(const char *str)
{
    int twobyte = 0, a, b, c;

    result_len = 0;
    while (*str) {
	if (*str == 0x1B && str[1] == '$' && str[2]) {
	    twobyte = 1;
	    str += 3;
	} else if (*str == 0x1B && str[1] == '(' && str[2]) {
	    twobyte = 0;
	    str += 3;
	} else if (twobyte && str[1]) {
	    a = (unsigned char)str[0] - 0x21;
	    b = (unsigned char)str[1] - 0x21;
	    if (b+0x40 >= 0x7F || (a & 1))
		b++;
	    c = 0x81 + a/2;
	    if (c >= 0xA0)
		c += 0x40;
	    addchar(c);
	    addchar(0x40 + (a%2)*0x5E + b);
	    str += 2;
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_sjis2jis(const char *str)
{
    int twobyte = 0;

    result_len = 0;
    while (*str) {
	int a = (unsigned char)*str, b = (unsigned char)str[1];
	if (((a >= 0x81 && a <= 0x9F) || (a >= 0xE0 && a <= 0xFF)) && b) {
	    if (!twobyte) {
		addchar(0x1B);
		addchar('$');
		addchar('B');
		twobyte = 1;
	    }
	    if (a >= 0xE0)
		a -= 0x40;
	    a -= 0x81;
	    if (b >= 0x80)
		b--;
	    b -= 0x40;
	    addchar(0x21 + a*2 + b/0x5E);
	    addchar(0x21 + b%0x5E);
	} else if (a >= 0xA0 && a <= 0xDF) {
	    if (!twobyte) {
		addchar(0x1B);
		addchar('$');
		addchar('B');
		twobyte = 1;
	    }
	    addchar(hankana_map_2[a-0xA0] & 0x7F);
	    addchar(hankana_map[a-0xA0] & 0x7F);
	} else {
	    if (twobyte) {
		addchar(0x1B);
		addchar('(');
		addchar('B');
		twobyte = 0;
	    }
	    addchar(*str++);
	}
    }
    if (twobyte) {
	addchar(0x1B);
	addchar('(');
	addchar('B');
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/