#!/usr/bin/perl -w 

$GBCODE_BYTE_OFFSET = 0xa1;
$GBCODE_UNIT_LEN = 94;
$GBCODE_CHAR_SIZE = 18; #16x16 bitmap -> 32 bytes

#read from: HZK16, input txt file, 
#generate: fox_msg_zh_data.c & fox_moji_zh_data.c
sub process_kanji {
    my ($in_db, $in_txt, $in_arrows) = @_;
    my $cnt = 0;
    my @map;
    my %arrows = (
      '^' => 0x10,
      '@' => 0x11,
      '$' => 0x12,
      '%' => 0x13,
      '&' => 0x14,
      '~' => 0x15,
      '+' => 0x16,
      '*' => 0x17,
    );

    my @punc = (
        0xA1A3,
        0xA3A1,
        0xA3A8,
        0xA3A9,
        0xA3AC,
        0xA3AE,
        0xA3BF,
    );

    open(IN_DB, "<$in_db") or die "Can't open $in_db: $!";
    open(IN_TXT, "<$in_txt") or die "Can't open $in_txt: $!";
    open(OUT_MSG, ">fox_msg_zh_data.c") or die "Can't open fox_msg_zh_data.c: $!";
    open(OUT_PNM_TMP, ">moji.pnm.tmp") or die "Can't open moji.pnm.tmp: $!";

    foreach $mark (@punc) {
        $idx = (($mark >> 8)-$GBCODE_BYTE_OFFSET)*$GBCODE_UNIT_LEN + (($mark & 0xff)-$GBCODE_BYTE_OFFSET);
        if (!$map[$idx]) {
            $cnt ++;
            $map[$idx] = $cnt;

            $skip = $idx * $GBCODE_CHAR_SIZE;
            seek(IN_DB, $skip, 0);
            for ($i=0; $i<$GBCODE_CHAR_SIZE; $i++) {
                $byte = getc(IN_DB);
                for ($j=0; $j<8; $j++) {
                    printf OUT_PNM_TMP "%c", (vec($byte, 0, 8) & (0x80>>$j))==0 ? 0:0xff;
                }
            }
        }
    }

LINE: while (<IN_TXT>) {
        $linked = "";
        $suffix = "";
        SWITCH: {
            if (/^(\d+)/) {
                printf OUT_MSG "unsigned short msg_%s [] = {", $1;
                if ($1 eq "00010" or 
                    $1 eq "00020" or
                    $1 eq "00030" or
                    $1 eq "00040") {
                    $linked = "0x0f, 0x01, ";
                }
                last SWITCH;
            }
        }

        while (1) {
            $c = getc(IN_TXT);
            next LINE if (!defined $c);
            if ($c eq '#') { 
                <IN_TXT>;
                next LINE; 
            }

            SWITCH: {
                if ($suffix ne "") {
                    printf OUT_MSG $suffix;
                    $suffix = "";
                }
                if ($c eq "\n") {
                    $suffix = "0x01, ";
                    last SWITCH;
                }
                if ($c eq " ") {
                    printf OUT_MSG "0x0c, ";
                    last SWITCH;
                }
                if ($arrows{$c}) {
                    printf OUT_MSG "0x%02x, ", $arrows{$c};
                    last SWITCH;
                }

                if ($c ge "A" and $c le "Z") {
                    $c2 = chr(0xC1 - ord("A") + ord($c));
                    $c = chr(0xA3);
                } elsif ($c ge "'" and $c le "9") {
                    $c2 = chr(0xA7 - ord("'") + ord($c));
                    $c = chr(0xA3);
                } elsif (vec($c, 0, 8) < 127) {
                    printf "char = %s\n", $c;
                    printf OUT_MSG "0x100,";
                    last SWITCH;
                } else {
                    $c2 = getc(IN_TXT);
                }

                $idx = (vec($c,0,8)-$GBCODE_BYTE_OFFSET)*$GBCODE_UNIT_LEN + (vec($c2,0,8)-$GBCODE_BYTE_OFFSET);
                if (!$map[$idx]) {
                    $cnt ++;
                    $map[$idx] = $cnt;

                    $skip = $idx * $GBCODE_CHAR_SIZE;
                    seek(IN_DB, $skip, 0);
                    for ($i=0; $i<$GBCODE_CHAR_SIZE; $i++) {
                        $byte = getc(IN_DB);
                        for ($j=0; $j<8; $j++) {
                            printf OUT_PNM_TMP "%c", (vec($byte, 0, 8) & (0x80>>$j))==0 ? 0:0xff;
                        }
                    }
                }

                $b1 = (($map[$idx] + 31) >> 8) & 0xff;
                $b2 = ($map[$idx] + 31) & 0xff;
                printf OUT_MSG "0x%02x%02x, ", $b1, $b2;
                
                last SWITCH;
            }
        }
    } continue {
        printf OUT_MSG "%s0x00};\n", $linked;
    }

    for ($i = 0; $i < (12*(int(($cnt+3)/4)*4) - 12*$cnt); $i++) {
        for ($j = 0; $j < 12; $j++) {
            printf OUT_PNM_TMP "%c", 0;
        }
    }

    close IN_DB;
    close IN_TXT;
    close OUT_MSG;
    close OUT_PNM_TMP;

    open(OUT_PNM, ">moji.pnm") or die "Can't open moji.pnm: $!";
    printf OUT_PNM "P5\n12 %d\n255\n", 48*int(($cnt+3)/4);
    close OUT_PNM;
    `cat moji.pnm.tmp >> moji.pnm`;
    `pnmcat -tb $in_arrows $in_arrows moji.pnm > final.pnm`;
    `rm moji.pnm.tmp moji.pnm`;

    open(IN_PNM, "<final.pnm");
    open(OUT_MOJI, ">fox_moji_zh_data.c") or die "Can't open fox_moji_zh_data.c: $!";
    <IN_PNM>;
    my ($width, $height) = split(/ /, <IN_PNM>);
    <IN_PNM>;
    read IN_PNM, $data, $width*$height;

    printf OUT_MOJI "unsigned char font_00[] = {\n";
    for ($i = 0; $i < $width*$height/144; $i += 4) {
        for ($j = 0; $j < 12; $j ++) {
            for ($k = 0; $k < 6; $k++) {
                $bits1 = 0;
                $bits2 = 0;
                for ($l = 0; $l < 4; $l++) {
                    $c = vec($data, ($i+$l)*144+$j*12+$k*2, 8);
                    if ($c) {
                        $bits1 |=  (1 << $l);
                    }
                    $c = vec($data, ($i+$l)*144+$j*12+$k*2+1, 8);
                    if ($c) {
                        $bits2 |=  (1 << $l);
                    }
                }
                printf OUT_MOJI "0x%01x%01x, ", $bits1, $bits2;
            }
            printf OUT_MOJI "0x00, 0x00, ";
            printf OUT_MOJI "\n";
        }
    }
    printf OUT_MOJI "};\n";

    close OUT_MOJI;
    close IN_PNM;
    `rm final.pnm`;

    return $cnt;
}

die "Usage: zh_kanji.pl <Chinese GB font map> <input Chinese text file> <arrow file>\n       See README for more detail.\n" unless $#ARGV==2;

print "processing input Chinese text file\n";
$out = process_kanji($ARGV[0], $ARGV[1], $ARGV[2]);
print "number of unique Chinese characters used: ", $out, "\n";
