#!/usr/bin/perl -w ########################################################################### # # Corpus Analysis and Programming, Homework #6. # Conversion from numeric values to corresponding symbolic names. # # Data source for this program is adapted from the following: # 1. CKIP tagset: http://www.sinica.edu.tw/~tdbproj/kiwi/use05.html # 2. testing text: file "ev1" in Academia Sinica balanced corpus, v3.0. # # Programmed by William Yeh, 2002/10/16. # ########################################################################### # # global definitions # $Tagset_Filename = "ckip-tagset.txt"; # the CKIP tagset # @Tag_Mapping; # number -> tag mapping # # first, check for command arguments # if (scalar(@ARGV) != 2) { die "Usage: s3_tag.pl [input filename] [output filename]"; } # # second, read in table of SKIP tagset # open(TAGSET, "$Tagset_Filename") or die "Fail to open $Tagset_Filename: $!"; while ($line = ) { chomp $line; next if $line =~ /^\s*$/; # skip blank lines next if $line =~ /^\s*#/; # skip comment lines $line =~ /^(\d+)\s+([^\s]+)/; ($number, $tag) = ($1, $2); # print "$number\t$tag\n"; $Tag_Mapping[ $number ] = $tag; } close TAGSET; # # third, process input text... # open(INPUT, $ARGV[0]) or die "Fail to open $ARGV[0]: $!"; open(OUTPUT, "> $ARGV[1]") or die "Fail to open $ARGV[1]: $!"; while ($line = ) { chomp $line; if ($line =~ /^\s*$/) { # blank lines: print as-is print OUTPUT "\n"; next; } $line =~ /^([^\s]+)\s+([\d]+)/; ($word, $number) = ($1, $2); print OUTPUT $word, "\t", $Tag_Mapping[ $number ], "\n"; } close INPUT; close OUTPUT;