/* 日本語文字コード関連関数 (v1.4)
 *
 * 著作権：(c) 1999-2004 Andrew Church <achurch@achurch.org>
 *
 * 他のソフトでの使用は（営利目的か否かに関わらず）完全に自由です。但し、
 * このソースコードの再配布、または他のソフトのソースコードと一緒での配
 * 布の場合は、必ず変更せずに元のままで配布して下さい。変更されたものの
 * 配布は禁じます。
 * また、万一不具合が見つかった場合、上記メールアドレスまでご連絡下さい。
 *
 * Copyright (c) 1999-2004 Andrew Church <achurch@achurch.org>
 *
 * These routines may be used freely in any software, commercial or
 * otherwise.  However, any distribution of this source code, whether
 * independently or as part of another program, must be of the original,
 * unmodified source code; distribution of modified versions of the source
 * code is prohibited.
 * Please report any bugs to the above address.
 */

/*** 注意：このファイルを編集する時、ＪＩＳコードで保存しないで下さい。***
 *** 　　　ソースがコンパイルできなくなる場合があります。              ***/

/*** Notice: When editing this file, do not save it with JIS encoding, ***
 ***         or it will become uncompilable.                           ***/

/*************************************************************************/

#include <stdlib.h>
#include <string.h>
#include "jcode.h"

/*************************************************************************/

/* jcode_local(str)
 * jcode_nlocal(str, max)
 *
 * プログラムが実行されているコンピュータの通常使用されるコードに変換する。
 * LANG環境変数の値が「ja_JP.EUC」または「ja_JP.SJIS」に設定されている場合
 * はそれぞれEUC、SJISコードに変換する。LANGの値が未設定または上記のいずれ
 * でもない場合、OSTYPE環境変数を確認し、その値が「linux」または「darwin」
 * であればEUC、「cygwin」であればSJISに変換する。OSTYPEでも決まらない場合
 * は、コンパイルした環境がDOS/WindowsまたはMacの場合はSJIS、その他の場合は
 * EUCに変換する。
 *
 * jcode_local()の結果はそれぞれjcode_sjis()、jcode_euc()を呼び出した時と同
 * 様。エスケープシーケンスの挿入に関わる問題を避け、かつ以前のバージョンと
 * の互換性を保つため、LANGの設定に関わらずJISコードに変換することはない。
 * JISへの変換を望む場合はjcode_jis()を直接呼び出さなければならない。
 *
 * jcode_nlocal()では、結果文字列（終端の'\0'を含む）の最大バイト数を指定で
 * きる。２バイト文字（全角文字）が分割されて文字化けすることはない。
 *
 * 結果文字列は静的バッファに格納されるので、連続で呼び出す場合やprintf()で
 * ２回以上利用する場合は各文字列を別に保存しておく必要がある。
 *
 * Converts the given string into the encoding used by the computer on
 * which the program is running.  This is determined by the following tests,
 * in order of precedence (dollar signs represent environment variables;
 * quotes are for illustration only):
 *     - if $LANG is set to "ja_JP.EUC", then EUC
 *     - if $LANG is set to "ja_JP.SJIS", then SJIS
 *     - if $OSTYPE is set to either "linux" or "darwin", then EUC
 *     - if $OSTYPE is set to "cygwin", then SJIS
 *     - if the source was compiled on a DOS/Windows/Mac platform, then SJIS
 *     - otherwise, EUC
 *
 * The behavior of jcode_local() is exactly the same as if jcode_sjis() or
 * jcode_euc() was called.  Note that to in order to avoid potential
 * difficulties with strings containing escape sequences and to maintain
 * compatibility with previous versions, this function will not convert
 * into JIS regardless of the setting of the LANG variable.  To convert
 * into JIS, call the jcode_jis() function directly.
 *
 * jcode_nlocal() allows the caller to specify the maximum length in bytes
 * (including the trailing '\0') of the result string.  2-byte characters
 * will never be split as a result of reaching the maximum length.
 *
 * The returned string is stored in a static buffer, so it must be saved
 * elsewhere after each call when calling either function multiple times
 * in succession or inside a printf() or other function call.
 */

#define LOCAL_EUC	0
#define LOCAL_SJIS	1

static int get_local_code(void)
{
    char *LANG = getenv("LANG");
    char *OSTYPE = getenv("OSTYPE");
    char *OS = getenv("OSTYPE");

    if (LANG && strcmp(LANG, "ja_JP.EUC") == 0)
	return LOCAL_EUC;
    if (LANG && strcmp(LANG, "ja_JP.SJIS") == 0)
	return LOCAL_SJIS;
    if (OSTYPE && strncmp(OSTYPE, "linux", 5) == 0)
	return LOCAL_EUC;
    if (OSTYPE && strcmp(OSTYPE, "darwin") == 0)
	return LOCAL_EUC;
    if (OSTYPE && strcmp(OSTYPE, "cygwin") == 0)
	return LOCAL_SJIS;
    if (OS && strcmp(OS, "Windows_NT") == 0)
	return LOCAL_SJIS;
#if defined(_WIN32) || defined(DOS) || defined(MAC)
    return LOCAL_SJIS;
#else
    return LOCAL_EUC;
#endif
}

char *jcode_local(const char *str)
{
    if (get_local_code() == LOCAL_EUC)
	return jcode_euc(str);
    else
	return jcode_sjis(str);
}

char *jcode_nlocal(const char *str, int max)
{
    int local_code = get_local_code();
    unsigned char *res, *s;

    max--;  /* '\0'のために１バイトを取っておく */
    if (local_code == LOCAL_EUC)
	res = (unsigned char *) jcode_euc(str);
    else
	res = (unsigned char *) jcode_sjis(str);
    s = res;
    while (*res && s-res < max) {
	if (local_code == LOCAL_EUC
	    ? (*s >= 0xA1 && *s <= 0xFE)
	    : ((*s >= 0x81 && *s <= 0x9F) || (*s >= 0xE0))
	) {
	    if (s+1-res >= max)
		break;
	    s++;
	}
	s++;
    }
    *res = 0;
    return (char *) res;
}

/*************************************************************************/

/* jcode_jis(str)
 * jcode_euc(str)
 * jcode_sjis(str)
 *
 * 文字列を各コードに変換して返す。jcode_local()と同様、結果文字列は静的バ
 * ッファに格納される。
 *
 * Converts a string to the given encoding and returns it.  As with
 * jcode_local(), the result string is stored in a static buffer.
 */

char *jcode_jis(const char *str)
{
    if (!str)
	return NULL;
    switch (jcode_hantei(str)) {
      case JCODE_EUC:
	return jcode_euc2jis(str);
      case JCODE_SJIS:
	return jcode_sjis2jis(str);
      case JCODE_JIS:
      default:
	return (char *)str;
    }
}

char *jcode_euc(const char *str)
{
    if (!str)
	return NULL;
    switch (jcode_hantei(str)) {
      case JCODE_JIS:
	return jcode_jis2euc(str);
      case JCODE_SJIS:
	return jcode_sjis2euc(str);
      case JCODE_EUC:
      default:
	return (char *)str;
    }
}

char *jcode_sjis(const char *str)
{
    if (!str)
	return NULL;
    switch (jcode_hantei(str)) {
      case JCODE_JIS:
	return jcode_jis2sjis(str);
      case JCODE_EUC:
	return jcode_euc2sjis(str);
      case JCODE_SJIS:
      default:
	return (char *)str;
    }
}

/*************************************************************************/
/*************************************************************************/

/* jcode_hantei(str)
 *
 * strが書かれている文字コードを判定する。JCODE_*定数のいずれかを返す。
 *
 * Determines what Japanese encoding is used in the given string.  Returns
 * one of the JCODE_* constants.
 */

int jcode_hantei(const char *str)
{
    if (!str)
	return JCODE_UNKNOWN;
    if (strstr(str, "\033$B"))
	return JCODE_JIS;
    if (strpbrk(str, "\201\202\203\204\205\206\207\210\211\212\213\214\215\216"
	             "\217\220\221\222\223\224\225\226\227\230\231\232\233\234"
	             "\235\236\237")
    )
	return JCODE_SJIS;
    while (*str) {
	if ((unsigned char)*str >= 0xA1 && (unsigned char)*str <= 0xFE)
	    return JCODE_EUC;
	str++;
    }
    return JCODE_UNKNOWN;
}

/*************************************************************************/
/*************************************************************************/

/* jcode_xxx2yyy(str)
 *
 * 各コード間に変換する。jcode_hantei()が失敗した場合、これらを直接呼び出し
 * 変換を行うことは出来るが、その場合はjcode_hanteiを直した方が望ましい。
 *
 * Converts between two particular encodings.  If jcode_hantei() fails,
 * these functions can be used to convert the string directly, but it is
 * preferable to fix jcode_hantei() instead.
 */

static char *result;
static int result_size, result_len;

/* 半角カナ→全角カナ (EUC) 変換データ */

static unsigned char hankana_map[0x40] = {
/*  "・", "。", "「", "」", "、", "・", "ヲ", "ァ", */
    0xA6, 0xA3, 0xD6, 0xD7, 0xA2, 0xA6, 0xF2, 0xA1,
/*  "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", */
    0xA3, 0xA5, 0xA7, 0xA9, 0xE3, 0xE5, 0xE7, 0xC3,
/*  "ー", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", */
    0xBC, 0xA2, 0xA4, 0xA6, 0xA8, 0xAA, 0xAB, 0xAD,
/*  "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", */
    0xAF, 0xB1, 0xB3, 0xB5, 0xB7, 0xB9, 0xBB, 0xBD,
/*  "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", */
    0xBF, 0xC1, 0xC4, 0xC6, 0xC8, 0xCA, 0xCB, 0xCC,
/*  "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", */
    0xCD, 0xCE, 0xCF, 0xD2, 0xD5, 0xD8, 0xDB, 0xDE,
/*  "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", */
    0xDF, 0xE0, 0xE1, 0xE2, 0xE4, 0xE6, 0xE8, 0xE9,
/*  "リ", "ル", "レ", "ロ", "ワ", "ン", "゛", "゜", */
    0xEA, 0xEB, 0xEC, 0xED, 0xEF, 0xF3, 0xAB, 0xAC,
};

static unsigned char hankana_map_2[0x40] = {
    0xA1, 0xA1, 0xA1, 0xA1, 0xA1, 0xA1, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA1, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5,
    0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA5, 0xA1, 0xA1,
};

/* 濁音・半濁音→全角
 * 例：0xB8 0xDE (「ク」「゛」) → 0xA5B0 (「グ」)
 * 全てカタカナなのでMSBは0xA5
 */

static unsigned char hankana_daku_map[0x40] = {
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0, 0xF4,    0,    0, 0xAC, 0xAE,
    0xB0, 0xB2, 0xB4, 0xB6, 0xB8, 0xBA, 0xBC, 0xBE,
    0xC0, 0xC2, 0xC5, 0xC7, 0xC9,    0,    0,    0,
       0,    0, 0xD0, 0xD3, 0xD6, 0xD9, 0xDC,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
};

static unsigned char hankana_handaku_map[0x40] = {
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0, 0xD1, 0xD4, 0xD7, 0xDA, 0xDD,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
       0,    0,    0,    0,    0,    0,    0,    0,
};

/*************************************************************************/

/* 結果文字列に文字を追加し、必要な場合にバッファサイズを増やす。
 *
 * Add a character to the result string, lengthening the result buffer if
 * necessary.
 */

static void addchar(int c)
{
    if (result_len+2 >= result_size) {
	int new_size;
	char *new_result;

	if (result_size < 128)
	    new_size = 256;
	else
	    new_size = result_size * 2;
	new_result = (char *) malloc(new_size);
	if (!new_result)
	    return;
	if (result_len)
	    memcpy(new_result, result, result_len);
	if (result)
	    free(result);
	result = new_result;
	result_size = new_size;
    }
    result[result_len++] = c;
}

/*************************************************************************/

char *jcode_jis2euc(const char *str)
{
    int twobyte = 0;

    result_len = 0;
    while (*str) {
	if (*str == 0x1B && str[1] == '$' && str[2]) {
	    twobyte = 1;
	    str += 3;
	} else if (*str == 0x1B && str[1] == '(' && str[2]) {
	    twobyte = 0;
	    str += 3;
	} else if (twobyte && str[1]) {
	    addchar(*str++ | 0x80);
	    addchar(*str++ | 0x80);
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_euc2jis(const char *str)
{
    int twobyte = 0;

    result_len = 0;
    while (*str) {
	if ((unsigned char)*str >= 0xA1 && (unsigned char)*str <= 0xFE && str[1]) {
	    if (!twobyte) {
		addchar(0x1B);
		addchar('$');
		addchar('B');
		twobyte = 1;
	    }
	    addchar(*str++ & 0x7F);
	    addchar(*str++ & 0x7F);
	} else {
	    if (twobyte) {
		addchar(0x1B);
		addchar('(');
		addchar('B');
		twobyte = 0;
	    }
	    addchar(*str++);
	}
    }
    if (twobyte) {
	addchar(0x1B);
	addchar('(');
	addchar('B');
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_sjis2euc(const char *str)
{
    result_len = 0;
    while (*str) {
	int a = (unsigned char)*str, b = (unsigned char)str[1];
	if (((a >= 0x81 && a <= 0x9F) || (a >= 0xE0 && a <= 0xFF)) && b >= 0x40) {
	    if (a >= 0xE0)
		a -= 0x40;
	    a -= 0x81;
	    if (b >= 0x80)
		b--;
	    b -= 0x40;
	    addchar(0xA1 + a*2 + b/0x5E);
	    addchar(0xA1 + b%0x5E);
	    str += 2;
	} else if (a >= 0xA0 && a <= 0xDF) {
	    if (b == 0xDE && hankana_daku_map[a-0xA0]) {
		addchar(0xA5);
		addchar(hankana_daku_map[a-0xA0]);
		str += 2;
	    } else if (b == 0xDF && hankana_handaku_map[a-0xA0]) {
		addchar(0xA5);
		addchar(hankana_handaku_map[a-0xA0]);
		str += 2;
	    } else {
		addchar(hankana_map_2[a-0xA0]);
		addchar(hankana_map[a-0xA0]);
		str++;
	    }
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_euc2sjis(const char *str)
{
    result_len = 0;
    while (*str) {
	int a = (unsigned char)*str, b = (unsigned char)str[1], c;
	if ((a >= 0xA1 && a <= 0xFE) && (b >= 0xA1 && b <= 0xFE)) {
	    a -= 0xA1;
	    b -= 0xA1;
	    if (b+0x40 >= 0x7F || (a & 1))
		b++;
	    c = 0x81 + a/2;
	    if (c >= 0xA0)
		c += 0x40;
	    addchar(c);
	    addchar(0x40 + (a%2)*0x5E + b);
	    str += 2;
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_jis2sjis(const char *str)
{
    int twobyte = 0, a, b, c;

    result_len = 0;
    while (*str) {
	if (*str == 0x1B && str[1] == '$' && str[2]) {
	    twobyte = 1;
	    str += 3;
	} else if (*str == 0x1B && str[1] == '(' && str[2]) {
	    twobyte = 0;
	    str += 3;
	} else if (twobyte && str[1]) {
	    a = (unsigned char)str[0] - 0x21;
	    b = (unsigned char)str[1] - 0x21;
	    if (b+0x40 >= 0x7F || (a & 1))
		b++;
	    c = 0x81 + a/2;
	    if (c >= 0xA0)
		c += 0x40;
	    addchar(c);
	    addchar(0x40 + (a%2)*0x5E + b);
	    str += 2;
	} else {
	    addchar(*str++);
	}
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/

char *jcode_sjis2jis(const char *str)
{
    int twobyte = 0;

    result_len = 0;
    while (*str) {
	int a = (unsigned char)*str, b = (unsigned char)str[1];
	if (((a >= 0x81 && a <= 0x9F) || (a >= 0xE0 && a <= 0xFF)) && b) {
	    if (!twobyte) {
		addchar(0x1B);
		addchar('$');
		addchar('B');
		twobyte = 1;
	    }
	    if (a >= 0xE0)
		a -= 0x40;
	    a -= 0x81;
	    if (b >= 0x80)
		b--;
	    b -= 0x40;
	    addchar(0x21 + a*2 + b/0x5E);
	    addchar(0x21 + b%0x5E);
	} else if (a >= 0xA0 && a <= 0xDF) {
	    if (!twobyte) {
		addchar(0x1B);
		addchar('$');
		addchar('B');
		twobyte = 1;
	    }
	    if (b == 0xDE && hankana_daku_map[a-0xA0]) {
		addchar(0x25);
		addchar(hankana_daku_map[a-0xA0] & 0x7F);
		str += 2;
	    } else if (b == 0xDF && hankana_handaku_map[a-0xA0]) {
		addchar(0x25);
		addchar(hankana_handaku_map[a-0xA0] & 0x7F);
		str += 2;
	    } else {
		addchar(hankana_map_2[a-0xA0] & 0x7F);
		addchar(hankana_map[a-0xA0] & 0x7F);
		str++;
	    }
	} else {
	    if (twobyte) {
		addchar(0x1B);
		addchar('(');
		addchar('B');
		twobyte = 0;
	    }
	    addchar(*str++);
	}
    }
    if (twobyte) {
	addchar(0x1B);
	addchar('(');
	addchar('B');
    }
    result[result_len] = 0;
    return result;
}

/*************************************************************************/