﻿use utf8;
use strict;
use Encode;
use File::Find;
use Storable qw(store_fd fd_retrieve);
use gmm;
use xls;

my $CACHE = 1;

binmode(STDOUT, "utf8");
binmode(STDERR, "encoding(shiftjis)");

main();
exit;

#===========================================================
# メイン処理。
#===========================================================
sub main {
	my (%glossary, $flist, %col_to_id, %spe, %exc, %out, $num, $num2, %count);
	my ($jiman_uraniwa, $uraniwa);

	$uraniwa = decode("utf16be", "\x{B4}\x{B7}\x{B9}\x{C8}\x{B2}\x{F9}");
	$jiman_uraniwa = decode("utf16be", "\x{C7}\x{90}\x{B7}\x{91}\x{C7}\x{58}\x{00}\x{20}");
	$jiman_uraniwa .= $uraniwa;

	if ($CACHE) {
		if (@ARGV != 1) {
			die "引数がおかしい。\n";
		}
		xls::start();
	}
	{
		my ($xls, @except, $data, $cnt, @tmp);

		if ($CACHE) {
			my ($xls);

			$xls = new xls($ARGV[0]);
			for my $i (1 .. 26) {
				my ($s_name, $d);

				$d->{s_name} = $xls->getSheetName($i);
				$d->{data} = $xls->getData($i, "B", 2);
				push @{$data}, $d;
			}
			$xls->close();
			if (open DT, ">glossary.bin") {
				store_fd($data, \*DT);
				close DT;
			}
		} else {
			if (open DT, "glossary.bin") {
				$data = fd_retrieve(\*DT);
				close DT;
			}
		}

		$cnt = 0;
		for my $i (@{$data}) {
			for my $j (@{$i->{data}}[1 .. $#{$i->{data}}]) {
				my ($jpn, $kor);

				($jpn, $kor) = ($j->[1], $j->[5]);
				$kor =~ s/[ 　]+$//;
				if ($cnt == 1) {
					$jpn .= "ポケモン";
					$kor .= decode("utf16be", "\x{d3}\x{ec}\x{cf}\x{13}\x{ba}\x{ac}");
				}
				if ($jpn ne "たいりょく") {
					add_glossary(\%glossary, $jpn, $kor, $i->{s_name});
				}
				if ($jpn eq "じまんのうらにわ" && $kor ne $jiman_uraniwa) {
					die "「じまんのうらにわ」が変更になりました。\n";
				}
			}
			$cnt++;
		}
		add_glossary(\%glossary, "じまんの　うらにわ", $jiman_uraniwa, "Original");
		add_glossary(\%glossary, "うらにわ", $uraniwa, "Original");

		@except = read_file_utf8("except.txt");
		chomp @except;
		for my $i (@except) {
			$i =~ s/\\n/\n/g;
			add_except(\%glossary, $i);
		}

		@tmp = read_file_utf8("except2.txt");
		chomp @tmp;
		for my $i (@tmp) {
			my ($msg_id, $jpn);

			($msg_id, $jpn) = split(/\t/, $i);
			$exc{$msg_id}{$jpn} = 1;
		}

		@tmp = read_file_utf8("special.txt");
		chomp @tmp;
		for my $i (@tmp) {
			my ($msg_id, $kor, $reg);

			($msg_id, $kor, $reg) = split(/\t/, $i);
			$spe{$msg_id}{$kor} = qr/$reg/;
		}
	}

	for (my ($col_id, $cnt) = ('A', 0); $col_id ne 'AA'; $col_id++, $cnt++) {
		$col_to_id{$col_id} = $cnt;
	}

	$flist = get_file_list("src");
	for my $i (@{$flist}) {
		my ($f_name);

		$i->{name} =~ /[\\\/]?([^\\\/]+)$/;
		$f_name = $1;
		if ($i->{type} eq "gmm") {
			my ($gmm, $data);

			if ($CACHE) {
				$gmm = new gmm($i->{name});
				for (my $iter = $gmm->newIter(); !$iter->over(); $iter->next()) {
					if ($iter->getWindowContext() ne "garbage") {
						push @{$data}, { f_name => $f_name, msg_id => $iter->getMsgId(),
							jpn => $iter->getText("LANG_JAPAN"), kor => $iter->getText("LANG_KOREA") };
					}
				}
				$gmm->delete();
				if (open DT, ">cache/$f_name") {
					store_fd($data, \*DT);
					close DT;
				}
			} else {
				if (open DT, "cache/$f_name") {
					$data = fd_retrieve(\*DT);
					close DT;
				}
			}
			for my $j (@{$data}) {
				if ($j->{msg_id} !~ /3-ZKN_COMMENT_02_NEW_\d{2}/ &&
					$j->{msg_id} !~ /[45]-ZKN_WORLD_(NAME|TYPE)_NEW_\d{2}/)
				{
					match_glossary($j->{jpn}, $j->{kor}, \%glossary, $j->{f_name}, $j->{msg_id}, \%spe, \%exc, \%out);
				}
			}
		} else {
			my %COL_INFO = (
				"DP_Areanames.xls"   => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
				],
				"DP_Berries.xls"     => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
					{ jpn => 'F', kor => 'H', suf => "(Info)" }
				],
				"DP_Characters.xls"  => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" }
				],
				"DP_Items.xls"       => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
					{ jpn => 'F', kor => 'H', suf => "(Info)" }
				],
				"DP_Moves.xls"       => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
					{ jpn => 'F', kor => 'H', suf => "(Info)" }
				],
				"DP_Pokedex.xls"     => [
					{ jpn => 'B', kor => 'D', suf => "(Name)" },
					{ jpn => 'G', kor => 'I', suf => "(Diamond)" },
					{ jpn => 'J', kor => 'L', suf => "(Pearl)" }
				],
				"DP_Simpletext.xls"  => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" }
				],
				"DP_SpeAbility.xls"  => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
					{ jpn => 'F', kor => 'H', suf => "(Info)" }
				],
				"DP_Stickers.xls"    => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
					{ jpn => 'F', kor => 'H', suf => "(Info)" }
				],
				"DP_Trainers.xls"    => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" }
				],
				"DP_Underground.xls" => [
					{ jpn => 'C', kor => 'E', suf => "(Name)" },
					{ jpn => 'F', kor => 'H', suf => "(Info)" }
				]
			);
			my ($xls, $data, $info);

			if ($CACHE) {
				$xls = new xls($i->{name});
				$data = $xls->getData(1, "A", 1);
				if (open DT, ">cache/$f_name") {
					store_fd($data, \*DT);
					close DT;
				}
			} else {
				if (open DT, "cache/$f_name") {
					$data = fd_retrieve(\*DT);
					close DT;
				}
			}
			$info = $COL_INFO{$f_name};
			for my $i (@{$data}[1 .. $#{$data}]) {
				for my $j (@{$info}) {
					match_glossary($i->[$col_to_id{$j->{jpn}}], $i->[$col_to_id{$j->{kor}}],
						\%glossary, $f_name, "$i->[0]$j->{suf}", \%spe, \%exc, \%out);
				}
			}
			if ($CACHE) {
				$xls->close();
			}
		}
	}
	if ($CACHE) {
		xls::end();
	}

	# 今回は、結局、使用しなかった。
#	if (open DT, ">out.bin") {
#		store_fd(\%out, \*DT);
#		close DT;
#	}
	for my $aa (1)
	{
		my $reg = qr/.*/;

		$num = 0;
		$num2 = 0;
		for my $i (sort keys %out) {
			$num += @{$out{$i}};
			for my $j (@{$out{$i}}) {
				my ($ok, $cnt);

				for my $k (@{$j->{diff}}) {
					my ($exc);

					if (0) {
					# 本番チェック。
					for my $m (keys %{$k->{str}{kor}}) {
						if ($spe{$j->{msg_id}}{$m}) {
							$exc = 1;
						}
					}
					}
					if ($exc) {
						$cnt++;
					}

					if (!$exc && $k->{str}{jpn} =~ /^$reg$/) {
						$ok = 1;
						$count{$k->{str}{jpn}}{str} = $k->{str};
						$count{$k->{str}{jpn}}{cnt}++;
					}
				}

				if ($cnt == @{$j->{diff}}) {
					$num--;
				}
				if ($ok) {
					$num2++;
					print "---\n";
					print "\"$i\", $j->{msg_id}\n";
					for my $k (@{$j->{diff}}) {
						if ($k->{str}{jpn} =~ /^$reg$/) {
							print ">>$k->{str}{jpn}";
							for my $m (sort keys %{$k->{str}{s_name}}) {
								print ", $m";
							}
							for my $m (sort keys %{$k->{str}{kor}}) {
								print ", $m";
							}
							print "\n";
						}
					}
					print "$j->{jpn}\n===\n";
					print "$j->{kor}\n\n";
				}
			}
		}
		print STDERR "$num, $num2\n";
	}
	for my $i (sort { $count{$b}{cnt} <=> $count{$a}{cnt} } (keys %count)) {
		print STDOUT "$i, $count{$i}{cnt}";
		for my $j (sort keys %{$count{$i}{str}{s_name}}) {
			print STDOUT ", $j";
		}
		print STDOUT "\n";
	}

	if (0) {
		my (@tmp);

		@tmp = read_file_utf8("imatake.txt");
		chomp @tmp;
		for my $i (@tmp) {
			my ($msg_id, $jpn);

			($msg_id, $jpn) = split(/\t/, $i);
			if (scalar(keys %{$glossary{list}{$jpn}{kor}}) == 1) {
				for my $j (keys %{$glossary{list}{$jpn}{kor}}) {
					print "$msg_id\t$j\t$j\t;今竹くんチェック済み\n";
				}
			} else {
				print "$msg_id\t$jpn●\n";
			}
		}
	}

#	for my $i (sort values %{$glossary{list}}) {
#		if (!$i->{check}) {
#			print "none: $i->{jpn}";
#			for my $j (sort keys %{$i->{s_name}}) {
#				print ", $j";
#			}
#			for my $j (sort keys %{$i->{kor}}) {
#				print ", $j";
#			}
#			print "\n";
#		}
#	}
}

#===========================================================
# 用語の登録の内容表示（デバッグ用）。
#===========================================================
sub show_glo {
	my ($n, $t) = @_;

	for my $i (sort keys %{$n}) {
		print "\t" x $t . "$i";
		if (exists $n->{$i}{str}) {
			print ", $n->{$i}{str}{jpn}";
			for my $j (sort keys %{$n->{$i}{str}{s_name}}) {
				print ", $j";
			}
			for my $j (sort keys %{$n->{$i}{str}{kor}}) {
				print ", $j";
			}
		} else {
			print "\n";
		}
		show_glo($n->{$i}{next}, $t + 1);
	}
}

#===========================================================
# 用語の登録。
#===========================================================
sub add_glossary {
	my ($glo, $jpn, $kor, $s_name) = @_;
	my ($pn, $t);

	$t = \%{$glo->{list}{$jpn}};
	$t->{jpn} = $jpn;
	$t->{kor}{$kor} = 1;
	$t->{s_name}{$s_name} = 1;
	$pn = add_help($jpn, \%{$glo->{tree}});
	$pn->{str} = $t;
}

#===========================================================
# 除外ワードの登録。
#===========================================================
sub add_except {
	my ($glo, $jpn) = @_;
	my ($pn);

	$pn = add_help($jpn, \%{$glo->{tree}});
	if ($pn->{str}) {
		die "用語として存在する, $jpn。\n";
	}
	$pn->{str} = { exc => 1, jpn => $jpn };
}

#===========================================================
# 登録。
#===========================================================
sub add_help {
	my ($str, $n) = @_;
	my ($c, $pn);

	while ($c = substr($str, 0, 1, "")) {
		$pn = \%{$n->{$c}};
		$n = \%{$n->{$c}{next}};
	}
	return $pn;
}

#===========================================================
# 用語のマッチングチェック。
#===========================================================
sub match_glossary {
	my ($jpn, $kor, $glo, $f_name, $msg_id, $spe, $exc, $out) = @_;
	my (%lst, @diff, $jp);

	$jp = $jpn;
	while ($jp) {
		my ($c, $n, $m, $str);

		$str = $jp;
		$n = $glo->{tree};
		while ($c = substr($str, 0, 1, "")) {
			if (!exists $n->{$c}) {
				last;
			}
			if (exists $n->{$c}{str}) {
				$m = $n->{$c}{str};
			}
			$n = \%{$n->{$c}{next}};
		}
		if ($m) {
			if (!$m->{exc} && !$exc->{$msg_id}{$m->{jpn}}) {
				$lst{$m->{jpn}}{str} = $m;
				$lst{$m->{jpn}}{cnt}++;
				$m->{check} = 1;
			}
			substr($jp, 0, length($m->{jpn}), "");
		} else {
			substr($jp, 0, 1, "");
		}
	}
	for my $i (sort keys %lst) {
		my ($ttl);

		for my $j (keys %{$lst{$i}{str}{kor}}) {
			my (@tmp);

			if ($spe->{$msg_id}{$j}) {
				@tmp = $kor =~ /$spe->{$msg_id}{$j}/g;
			} else {
				@tmp = $kor =~ /$j/g;
			}
			$ttl += scalar(@tmp);
		}
		if ($ttl < $lst{$i}{cnt}) {
			push @diff, { str => $lst{$i}{str}, cnt => $lst{$i}{cnt}, ttl => $ttl };
		}
	}
	if (@diff) {
		push @{$out->{$f_name}},
			{ msg_id => $msg_id, jpn => $jpn, kor => $kor, diff => \@diff };
	}
}

#===========================================================
# ファイルリスト取得。
#===========================================================
sub get_file_list {
	my ($dir) = @_;
	my $sub;
	my @list;

	$sub = sub {
		if ($File::Find::name =~ /\.gmm$/) {
			push @list, { name => $File::Find::name, type => "gmm" };
		} elsif ($File::Find::name =~ /\.xls$/) {
			push @list, { name => $File::Find::name, type => "xls" };
		}
	};
	find($sub, $dir);
	return \@list;
}

#===========================================================
# utf8ファイル読み込み。
#===========================================================
sub read_file_utf8 {
	my ($f_name) = @_;
	my (@line, $tmp);

	if (!open IN, $f_name) {
		die "ファイルが開けません。\n";
	}
	read(IN, $tmp, 3);
	binmode(IN, "utf8");
	@line = <IN>;
	close IN;

	return @line;
}
