#!/usr/bin/perl
# tecindo.cgi
#
# Converts a user-supplied (Quenya) string into tengwar.
$|=1;
%charTable = (
tinco => '1',
parma => 'q',
calma => 'a',
quesse => 'z',
ando => '2',
umbar => 'w',
anga => 's',
ungwe => 'x',
thule => '3',
formen => 'e',
harma => 'd',
hwesta => 'c',
anto => '4',
ampa => 'r',
anca => 'f',
unque => 'v',
numen => '5',
malta => 't',
noldo => 'g',
nwalme => 'b',
ore => '6',
vala => 'y',
anna => 'h',
vilya => 'n',
romen => '7',
arda => 'u',
lambe => 'j',
alda => 'm',
silme => '8',
'silme nuquerna' => 'i',
are => 'k',
'are nuquerna' => ',',
hyarmen => '9',
'hwesta sindarinwa' => 'o',
yanta => 'l',
ure => '.',
halla => "\xBD",
'long carrier' => '~',
'short carrier' => '`',
'a tehta left' => '#',
'a tehta mid' => 'E',
'a tehta right' => 'D',
'a tehta narrow' => 'C',
'e tehta left' => '$',
'e tehta mid' => 'R',
'e tehta right' => 'F',
'e tehta narrow' => 'V',
'i tehta left' => '%',
'i tehta mid' => 'T',
'i tehta right' => 'G',
'i tehta narrow' => 'B',
'o tehta left' => '^',
'o tehta mid' => 'Y',
'o tehta right' => 'H',
'o tehta narrow' => 'N',
'u tehta left' => '&',
'u tehta mid' => 'U',
'u tehta right' => 'J',
'u tehta narrow' => 'M',
'A tehta' => '#E',
'E tehta' => '$F',
'I tehta' => '%G',
'O tehta' => '^H',
'U tehta' => '&J',
'following y left' => "\xCC",
'following y mid' => "\xCD",
'following y right' => "\xCE",
'following y narrow' => "\xCF",
'following y inside' => "\xB4",
'hook wide' => '+',
'hook narrow' => '_',
'hook reversed' => '|',
'doubler wide' => ':',
'doubler narrow' => ';',
'doubler wide low' => '?',
'doubler narrow low' => '/',
'doubler inside' => "\xB0",
'doubler narrow above' => 'p',
comma => '=',
period => '-- ',
semicolon => '-',
hyphen => '=',
ques => "\xC0".' ',
bang => "\xC1".' ',
paren => "\x9B",
);
my @calls;
# Get input, whether by GET or POST.
if ($ENV{REQUEST_METHOD} eq GET)
{
@pairs = split(/&/, $ENV{QUERY_STRING});
}
elsif ($ENV{REQUEST_METHOD} eq POST)
{
read(STDIN, $buffer, $ENV{CONTENT_LENGTH});
@pairs = split(/&/, $buffer);
}
else
{
print "Script $0 called with no CGI method! Aborting.\n";
exit 1;
}
foreach (@pairs)
{
($name, $value) = split(/=/);
$name =~ tr/+/ /;
$name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
${$name} = $value;
}
if (defined $quettar)
{
$input = $quettar;
$input .= ' ';
$input = lc($input);
# $english = '' unless ($english eq 'on');
# $diacritred = '' unless ($diacritred eq 'on');
}
elsif (defined $redo || $english ne '')
{
foreach (EnglishPunct, RedPunct, RedDiacrit, DblSilme, FinalOre, InitialHalla)
{
$$_ = ($$_ eq 'on') ? ' checked' : '';
}
foreach (0..2)
{
local $var = 'CarrierPref'.$_;
$$var = ($CarrierPref == $_) ? ' selected' : '';
}
$NoteOne = '';
local $file = ( ($english eq 'on') ? 'tecindo-english.html' : 'tecindo-input.html' );
HTMLspew($file);
exit 0;
}
else
{
$NoteOne = '(A sample phrase is already there for you.)';
$addQuettar = 'VALUE="Á tece sinome quettar Quenyainen."';
foreach (EnglishPunct, RedPunct, RedDiacrit, DblSilme, FinalOre, InitialHalla)
{
$$_ = '';
}
$CarrierPref0 = '';
$CarrierPref1 = ' selected';
$CarrierPref2 = '';
HTMLspew('tecindo-input.html');
exit 0;
}
# Make sure it's actually Quenya; otherwise, transliteration
# attempts will fail.
if ($input =~ /(\band\s|\bthe\b|\bof\b)/)
{
$disallowed = '"'.$1.'" as its own word';
HTMLspew('tecindo-ume-quenya.html');
exit 1;
}
if ($input =~ /(\b[db])/)
{
$disallowed = '"'.$1.'" at the beginning of a word';
HTMLspew('tecindo-ume-quenya.html');
exit 1;
}
if ($input =~ /(ng\s)/)
{
$disallowed = '"'.$1.'" at the end of a word';
HTMLspew('tecindo-ume-quenya.html');
exit 1;
}
if ($input =~ /(j|ch|sh)/)
{
$disallowed = '"'.$1."'";
HTMLspew('tecindo-ume-quenya.html');
exit 1;
}
# Begin transliteration.
# Start regularizing orthography.
$input =~ s/qu/q/g;
$input =~ s/c/k/g;
$input =~ s/x/ks/g;
$input =~ s/ng/N/g;
# Now, figure out what diacritical system the person used.
if ($input =~ /[\xC0-\xFF]/)
{
$dia_type = 'high-ASCII';
}
elsif ($input =~ /aa|ee|ii|oo|uu/)
{
$dia_type = 'doubled vowels';
}
elsif ($input =~ /[aeiou]['\"\:]/)
{
$dia_type = 'following mark';
}
else
{
$dia_type = 'unknown!';
}
#print "\nThis uses diacritical type: $dia_type\n";
if ($dia_type eq 'high-ASCII')
{
$input =~ s/[\xC1\xE1]/A/g;
$input =~ s/[\xC9\xE9]/E/g;
$input =~ s/[\xCB\xEB]/e/g;
$input =~ s/[\xCD\xED]/I/g;
$input =~ s/[\xD1\xF1]/N/g;
$input =~ s/[\xD3\xF3]/O/g;
$input =~ s/[\xDA\xFA]/U/g;
}
elsif ($dia_type eq 'doubled vowels')
{
$input =~ s/aa/A/g;
$input =~ s/ee/E/g;
$input =~ s/ii/I/g;
$input =~ s/oo/O/g;
$input =~ s/uu/U/g;
}
elsif ($dia_type eq 'following mark')
{
$input =~ s/a'/A/g;
$input =~ s/e'/E/g;
$input =~ s/i'/I/g;
$input =~ s/o'/O/g;
$input =~ s/u'/U/g;
$input =~ s/e[\:\"]/e/g;
}
$transform = $input;
######
#
# Note for future expansion: this is where we're going to have to
# perform the kind of morphological scan that will be necessary
# to properly handle the suule/silme problem. This will be a
# high-level scan that checks morphology and etymology, and turns
# any "s" that should be spelled with suule into "th".
#
# Ick.
#
######
# Initialize output string, and prepare to process through input
# character-by-character, transliterating as you go.
$output = '';
$prevChar = 'null';
$prevCharType = 'null';
$lastChar = 'null';
$thisChar = '';
$nextChar = '';
$thisCharType = 'null';
$nextCharType = 'null';
LOOP:
while ($input ne '')
{
getNextChar();
if ($thisCharType eq 'vowel')
{
# Find out if this is part of a diphthong.
$dipTest = $thisChar.$nextChar;
if ($dipTest eq 'ai' || $dipTest eq 'au' ||
$dipTest eq 'eu' || $dipTest eq 'iu' ||
$dipTest eq 'oi' || $dipTest eq 'ui')
{
$diphthong = $dipTest.' diphthong';
addChar($diphthong);
getNextChar();
}
elsif ($prevCharType eq 'consonant' || $prevCharType eq 'semivowel')
{
# It's not a diphthong. But the previous
# character was a consonant, so we can put
# a _tehta_ on it.
if ($thisChar =~ /[AEIOU]/)
{
# Whoops, this vowel is long. We may not be
# allowed to put a _tehta_ over the prev.
# consonant.
if ($CarrierPref == 1 && $thisChar =~ /[AI]/)
{
addChar('long carrier');
$tehta = lc($thisChar).' tehta';
addChar($tehta);
}
elsif ($CarrierPref == 2)
{
addChar('long carrier');
$tehta = lc($thisChar).' tehta';
addChar($tehta);
}
else
{
$tehta = $thisChar.' tehta';
addChar($tehta);
}
}
else
{
# Ah, this vowel is short (and we're still
# processing the case of just post-consonant).
# Just go ahead and put a single _tehta_ on
# the last character.
$tehta = $thisChar.' tehta';
addChar($tehta);
}
}
elsif ($prevCharType eq 'vowel' || $prevCharType eq 'null')
{
# The previous character wasn't a consonant; we
# flat-out *need* a carrier.
# Long or short carrier?
if (lc($thisChar) eq $thisChar)
{
$carrier = 'short carrier';
}
else
{
$carrier = 'long carrier';
}
addChar($carrier);
$tehta = lc($thisChar).' tehta';
addChar($tehta);
}
else
{
# The previous character was a semivowel.
# I'll figure out what to do later.
}
}
elsif ($thisCharType eq 'consonant')
{
# First, check to see if this is the second
# of a doubled consonant-pair. If so, just
# put a 'doubler' symbol under the previous
# one.
# (Note that doubled S is handled below, which
# is *earlier* in the string parsing!)
if ($thisChar eq $prevChar)
{
addChar('doubler');
next LOOP;
}
# Next... could this be a compound consonant?
$compound = $thisChar . $nextChar;
# print " Compound check: $compound\n";
# if ($compound eq 'ss' && ! $DblSilme)
# {
# getNextChar();
# if ($nextCharType eq 'vowel')
# {
# addChar('are nuquerna');
# }
# else
# {
# addChar('are');
# }
# next LOOP;
# }
if ($compound eq 'hw')
{
addChar('hwesta');
getNextChar();
$thisCharType = 'consonant';
next LOOP;
}
elsif ($compound eq 'ld')
{
addChar('alda');
getNextChar();
next LOOP;
}
elsif ($compound eq 'mb')
{
addChar('umbar');
getNextChar();
next LOOP;
}
elsif ($compound eq 'mp')
{
addChar('ampa');
getNextChar();
next LOOP;
}
elsif ($compound eq 'nd')
{
addChar('ando');
getNextChar();
next LOOP;
}
elsif ($compound eq 'nk')
{
addChar('anca');
getNextChar();
next LOOP;
}
elsif ($compound eq 'nq')
{
addChar('unque');
getNextChar();
next LOOP;
}
elsif ($compound eq 'nt')
{
addChar('anto');
getNextChar();
next LOOP;
}
elsif ($compound eq 'Nw')
{
addChar('ungwe');
getNextChar();
next LOOP;
}
elsif ($compound eq 'rd')
{
addChar('arda');
getNextChar();
next LOOP;
}
elsif ($compound eq 'th')
{
addChar('thule');
getNextChar();
next LOOP;
}
# Okay, if you got through that, then the consonant
# wasn't a compound type.
# If it's an s, it may be a following-s, or it may
# have a vowel after it (in which case it should be
# _nuquerna_ to make room for the _tehta_), or it
# may have a consonant or space following it (in
# which case it can be normal).
if ($thisChar eq 's')
{
if ($prevCharType eq 'consonant')
{
# Now we have some minor hell to go through.
# Because of the width of the hook, we have
# to apply a following vowel *first* -- but
# not if it's a diphthong!
local $twoCheck = $nextChar.substr($input, 1,1);
if ($twoCheck =~ /(ai|au|eu|iu|oi|ui)/)
{
addChar('hook');
next LOOP;
}
elsif ($nextCharType eq 'vowel')
{
$tehta = $nextChar.' tehta';
addChar($tehta);
addChar('hook');
next LOOP;
}
}
else
{
local $baseLetter;
if ($nextChar eq 's')
{
$baseLetter = ($DblSilme eq 'on') ? 'silme' : 'are';
if (shouldBeNuquerna(double))
{
addChar($baseLetter.' nuquerna');
}
else
{
addChar($baseLetter);
}
getNextChar() unless($DblSilme eq 'on');
next LOOP;
}
else
{
$baseLetter = 'silme';
if (shouldBeNuquerna())
{
addChar($baseLetter.' nuquerna');
}
else
{
addChar($baseLetter);
}
next LOOP;
}
}
# else
# {
# addRomanChar('?');
# next LOOP;
# }
}
if ($thisChar eq 't')
{
addChar('tinco');
next LOOP;
}
if ($thisChar eq 'p')
{
addChar('parma');
next LOOP;
}
if ($thisChar eq 'k')
{
addChar('calma');
next LOOP;
}
if ($thisChar eq 'q')
{
addChar('quesse');
next LOOP;
}
if ($thisChar eq 'N')
{
if ($nextChar eq 'o' || $nextChar eq 'O')
{
addChar('noldo');
}
else
{
addChar('anga');
}
next LOOP;
}
if ($thisChar eq 'f')
{
addChar('formen');
next LOOP;
}
if ($thisChar eq 'n')
{
addChar('numen');
next LOOP;
}
if ($thisChar eq 'm')
{
addChar('malta');
next LOOP;
}
if ($thisChar eq 'v')
{
addChar('vala');
next LOOP;
}
if ($thisChar eq 'w')
{
addChar('vilya');
next LOOP;
}
if ($thisChar eq 'r')
{
if ($nextCharType eq 'null' || $nextCharType eq 'consonant')
{
addChar('ore');
}
else
{
addChar('romen');
}
next LOOP;
}
if ($thisChar eq 'l')
{
addChar('lambe');
next LOOP;
}
if ($thisChar eq 'h')
{
if ($prevCharType eq 'null' && $InitialHalla && $nextCharType eq 'consonant')
{
addChar('halla');
}
else
{
addChar('hyarmen');
}
next LOOP;
}
}
elsif ($thisChar eq 'y')
{
if ($prevCharType eq 'null')
{
addChar('anna');
addChar('following y');
$thisCharType = 'consonant';
next LOOP;
}
elsif ($prevCharType eq 'consonant')
{
addChar('following y');
$thisCharType = 'consonant';
next LOOP;
}
elsif ($prevCharType eq 'vowel')
{
addChar('anna');
addChar('following y');
$thisCharType = 'consonant';
next LOOP;
}
}
elsif ($thisChar eq ' ')
{
addChar(' ');
# getNextChar();
next LOOP;
}
# elsif ($thisChar =~ /[\n\r]/)
# {
# $output .= "
\n";
# getNextChar();
# next LOOP;
# }
elsif ($thisCharType eq 'punct')
{
if ($EnglishPunct)
{
addRomanChar($thisChar);
}
else
{
if ($thisChar eq ',')
{
addChar('comma');
}
elsif ($thisChar eq '.')
{
addChar('period');
}
elsif ($thisChar eq ';')
{
addChar('semicolon');
}
elsif ($thisChar eq '?')
{
addChar('ques');
}
elsif ($thisChar eq '!')
{
addChar('bang');
}
elsif ($thisChar eq '-')
{
addChar('hyphen');
}
else
{
addChar('paren');
}
}
next LOOP;
}
elsif ($thisCharType eq 'numeral')
{
addChar(chr(240+$thisChar));
}
else
{
addRomanChar($thisChar);
}
}
chomp($dateStamp = `date +'%Y-%m-%d: %T'`);
$remoteIP = $ENV{REMOTE_ADDR};
$userAgent = $ENV{HTTP_USER_AGENT};
$log_file = 'tecindo.log';
$optString = 'CPref '.$CarrierPref.'; ';
$optString .= 'EPun, ' if ($EnglishPunct eq 'on');
$optString .= 'RPun, ' if ($RedPunct eq 'on');
$optString .= 'RDia, ' if ($RedDiacrit eq 'on');
$optString .= 'Silme, ' if ($DblSilme eq 'on');
$optString .= 'Ore, ' if ($FinalOre eq 'on');
$optString .= 'Halla, ' if ($InitialHalla eq 'on');
$optString =~ s/, $//;
unless ($remoteIP eq '64.81.50.199')
{
open LOG, ">>$log_file";
print LOG "$dateStamp [$remoteIP] $optString\n $userAgent\n $quettar\n";
close LOG;
}
pop @tengwar;
$tengwessi = join(', ', @tengwar);
$tengwessi =~ s/pace\),/pace\),
\n/g;
$calls_string = join("\n\t
At this point, I'm supposed to display the template file $spew_file (where "I" am the $ENV{SCRIPT_NAME} script). Unfortunately, I couldn't find that file. You may wish to contact the server administrator or otherwise let someone know that there's been a problem with me.