#!/usr/bin/perl package Jtext; use Exporter; @ISA = (Exporter); @EXPORT = qw(index_sjis rindex_sjis index_euc rindex_euc substr_sjis substr_euc length_sjis length_euc); $VERSION = 0.01; # ------------------------------------------ sub index_sjis { my ($strall,$strpart,$pos) = @_; return &index_ja($strall,$strpart,$pos,"sjis"); } sub rindex_sjis { my ($strall,$strpart,$pos) = @_; return &index_ja($strall,$strpart,$pos,"sjis","rindex"); } sub index_euc { my ($strall,$strpart,$pos) = @_; return &index_ja($strall,$strpart,$pos,"euc"); } sub rindex_euc { my ($strall,$strpart,$pos) = @_; return &index_ja($strall,$strpart,$pos,"euc","rindex"); } sub index_ja { my ($strall,$strpart,$pos,$charset,$rindex) = @_; my $string; my $result; my @tmpall; my @tmppart; if($rindex eq "rindex"){ @tmpall = reverse &chars_ja($strall,$charset); @tmppart = reverse &chars_ja($strpart,$charset); } else { @tmpall = &chars_ja($strall,$charset); @tmppart = &chars_ja($strpart,$charset); } if(!defined($pos)) {$pos=0;} BIG: for ($h=$pos; $h<($#tmpall+1); $h++) { if($tmpall[$h] eq $tmppart[0]) { $j=0; SMALL: for ($i=$h; $i<($#tmpall+1); $i++) { if($tmpall[$i] eq $tmppart[$j]) { $j++; if($j == ($#tmppart+1)) { if($rindex eq "rindex"){ return ($#tmpall - $#tmppart - $h); } else { return $h; } } } else { next BIG; } } } } return -1; # No hit } # ------------------------------------------ sub substr_sjis { my ($str,$pos,$len,$replacestr) = @_; return &substr_ja($str,$pos,$len,$replacestr,"sjis"); } sub substr_euc { my ($str,$pos,$len,$replacestr) = @_; return &substr_ja($str,$pos,$len,$replacestr,"euc"); } sub substr_ja { my ($str,$pos,$len,$replacestr,$charset) = @_; my $string; my @result; if($pos<0) { @tmp = reverse &chars_ja($str,$charset); if(defined($replacestr)) { @replacearray = reverse &chars_ja($replacestr,$charset); } $pos = abs($pos); $reverse = 1; } else { @tmp = &chars_ja($str,$charset); if(defined($replacestr)) { @replacearray = &chars_ja($replacestr,$charset); } } if(defined($replacestr)) { for ($i=0; $i<$pos; $i++) { push(@result,$tmp[$i]); } push(@result,@replacearray); for (($i+$len) .. ($#tmp+1)) { push(@result,$tmp[$_]); } } else { if(defined($len)) { $len += $pos; } else { $len = $#tmp+1; } for ($i=$pos; $i<$len; $i++) { push(@result,$tmp[$i]); } } if(!$reverse) { $string = join("",@result); } else { $string = join("",reverse @result); } return $string; } # ------------------------------------------ sub length_sjis { my($string) = @_; my @tmp = &chars_ja($string,"sjis"); return ($#tmp+1); } sub length_euc { my($string) = @_; my @tmp = &chars_ja($string,"euc"); return ($#tmp+1); } # ------------------------------------------ sub chars_ja { my($string,$charset) = @_; my @chars; $_ = $string; if($charset eq "sjis"){ # sjis @chars = / [\x20-\x7E] # ASCII |[\xA1-\xDF] # HANKAKU-KATAKANA |[\x81-\x9F][\x40-\xFC] # 2bytes character |[\xE0-\xEF][\x40-\xFC] # 2bytes character /gox; } else { # euc @chars = / [\x20-\x7E] # ASCII |\x8E[\xA0-\xDF] # HANKAKU-KATAKANA |\x8E[\xA1-\xFE][\xA1-\xFE] # 3bytes character |[\xA1-\xFE][\xA1-\xFE] # 2bytes character /gox; } return @chars; } # ------------------------------------------ 1; __END__ =head1 NAME Jtext - substr(), index() and rindex() available for Japanese Characters. =head1 SYNOPSIS $line = jcode("これはeucテキストです。eucです。sjisではありません。")->euc; $length = length_euc($line); # 31 $text = substr_euc($line,13,6); # eucです。 $text = substr_euc($line,6,4,"文字"); # これはeuc文字です。... $text = substr_euc($line,-1,5,"ない")."\n"; # ...sjisではない。 $index = index_euc($line,jcode("テキスト")->euc); # 6 $index = index_euc($line,jcode("テキスト")->euc,9); # -1 $rindex = rindex_euc($line,jcode("euc")->euc); # 13 $rindex = rindex_euc($line,jcode("euc")->euc,30); # -1 =head1 DESCRIPTION Jtext is a module that makes length(), substr(), index() and rindex() available for Japanese Characters. =head1 METHOD =head2 length_euc / length_sjis length_euc EXPR length_sjis EXPR $length = length_euc($line); # 31 length() for Japanese Characters. =head2 substr_euc / substr_sjis substr_euc EXPR, OFFSET, LENGTH, REPLACEMENT substr_euc EXPR, OFFSET, LENGTH substr_euc EXPR, OFFSET substr_sjis EXPR, OFFSET, LENGTH, REPLACEMENT substr_sjis EXPR, OFFSET, LENGTH substr_sjis EXPR, OFFSET $text = substr_euc($line,13,6); # eucです。 $text = substr_euc($line,6,4,"文字"); # これはeuc文字です。... $text = substr_euc($line,-1,5,"ない")."\n"; # ...sjisではない。 substr() for Japanese Characters. =head2 index_euc / index_sjis index_euc STR, SUBSTR, OFFSET index_euc STR, SUBSTR index_sjis STR, SUBSTR, OFFSET index_sjis STR, SUBSTR $index = index_euc($line,jcode("テキスト")->euc); # 6 $index = index_euc($line,jcode("テキスト")->euc,9); # -1 index() for Japanese Characters. =head2 rindex_euc / rindex_sjis $rindex = rindex_euc($line,jcode("euc")->euc); # 13 $rindex = rindex_euc($line,jcode("euc")->euc,30); # -1 rindex() for Japanese Characters. =head1 AUTHOR Yuki SHIMAZU Ey.shimazu@nifty.comE =cut