#
# DistillerInit
# Args: none
# Returns: DIstillerStatus code (int)
#
sub DistillerInit {
$ICON_STRING = "
";
0;
}
#
# DistillerMain
# Args: distiller type (MIME string), data (scalar), %args
# (%args is argID,value pairs for arguments to distiller)
# Returns: (new data, MIME type, optional distillerStatus [default distOk])
#
sub DistillerMain {
my $type = pop;
my $data = pop;
my $noframes = 0;
my $frameset = 0;
my $head = 0;
my $html = 0;
my $body = 0;
my $in_link = "";
my $outdata = "";
%args = @_;
while (!($data =~ /^$/)) {
my ($tag, $pretext, $type, $all);
# this is an evil regexp. EVIL... what it does is match all
# text before a tag and one tag. That tag can have >'s inside of
# quotes or single quotes because of javascript type stuff
# just right now I want to say that javascript sucks
$data =~ s/^(([^<]*)(<(\/?\w*)(?:(?:[^>"'])|(?:"(?:[^\\"]|(?:\\.))*")|(?:'(?:[^\\']|(?:\\.)*)*'))*[">']?)?)//s;
$all = $1;
$pretext = $2;
$tag = $3;
$type = $4;
if (!($type =~ /^(?:\/?a)|(?:img)|(?:input)$/i)) {
$outdata .= $all;
if ($type =~ /^body$/i) {
$body = 1;
} elsif ($type =~ /^\/head$/i) {
$head = 1;
} elsif ($type =~ /^html$/i) {
$html = 1;
} elsif ($type =~ /^frameset$/i) {
$frameset = 1;
} elsif ($type =~ /^noframes$/i) {
$noframes = 1;
}
} elsif ($type =~ /^a$/i) {
$outdata .= $all;
if ($tag =~ /href\s*=/i) {
$in_link = $tag;
}
} elsif ($type =~ /^img$/i) {
my $magicsrc;
my $oldsrc;
$tag =~ /src\s*=\s*['"]?([^ "']*)['"]?/i;
$magicsrc = $1;
$oldsrc = $magicsrc;
if ($oldsrc !~ /^$/) {
if ($tag =~ /(?:ismap)|(?:usemap)/i) {
$magicsrc = &tomagic($magicsrc, "i8=1");
}
if ($tag !~ /(?:width)|(?:height)/i) {
$magicsrc = &tomagic($magicsrc, "i2=1");
}
$tag =~ s/$oldsrc/$magicsrc/;
$magicsrc = $oldsrc;
$magicsrc = &tomagic($magicsrc, "i1=1");
$outdata .= $pretext;
if ($in_link !~ /^$/) {
$outdata .= "$tag&\#164;$in_link";
} else {
if ($tag =~ /(?:ismap)|(?:usemap)/i) {
$outdata .= "$tag&\#164;";
} else {
$tag =~ s/$type/$type border="0"/;
$outdata .= "$tag";
}
}
} else {
$outdata .= $all;
}
} elsif ($type =~ /^input$/i) {
if ($tag =~ /transendid\s*=\s*['"]?(.)([^ "']+)['"]?/i) {
my $type = $1;
my $id = $2;
my $inputtype;
my $transcendval;
if ($tag =~ /type\s*=\s*['"]?([^ "']+)['"]?/i) {
$inputtype = $1;
}
if ($tag =~ /transendval\s*=\s*['"]?([^ "']+)['"]?/i) {
$transendval = $1;
}
if (defined($args{"$id"})) {
my $val = $args{"$id"};
if (($inputtype !~ /^text$/i) && ($inputtype !~ /^$/)) {
if ($transendval !~ /^$/) {
if ($type =~ /s/) {
if ($transendval =~ $val) {
$tag =~ s/>/ CHECKED>/;
}
} else {
if ($transendval == $val) {
$tag =~ s/>/ CHECKED>/;
}
}
} else {
$tag =~ s/>/ VALUE=\"$val\">/;
}
}
}
$outdata .= $pretext . $tag;
} else {
$outdata .= $all;
}
} elsif ($type =~ /^\/a$/i) {
$in_link = "";
$outdata .= $all;
}
}
if ($args{'6'} == 1) {
if ($noframes) {
$outdata =~ s/(<\s*noframes[^>]*>)/\1$ICON_STRING/i;
} elsif ($frameset) {
# do nothing
1;
} elsif ($body) {
$outdata =~ s/(<\s*body[^>]*>)/\1$ICON_STRING/i;
} elsif ($head) {
$outdata =~ s/(<\s*\/head[^>]*>)/\1$ICON_STRING/i;
} elsif ($html) {
$outdata =~ s/(<\s*html[^>]*>)/\1$ICON_STRING/i;
} else {
$outdata = $ICON_STRING . $outdata;
}
}
return ($outdata, 'text/html');
}
sub tomagic {
my $src = shift;
my $magic = shift;
my $m;
if ($src =~ /\*\^(.*)\^(\?.*)?$/) {
$m = $1;
$m =~ s/([\[\\.*?$^()\]])/\\\1/g;
$src =~ s/$m/$m\^$magic/;
} elsif ($src =~ /\?(.*)$/) {
$m = $1;
$m =~ s/([\[\\.*?$^()\]])/\\\1/g;
$src =~ s/\?$m/\*\^$magic\^\?$m/;
} else {
$src .= "*^$magic^";
}
$src;
}
1;