Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix maketext interpolation in PG. #2213

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
292 changes: 246 additions & 46 deletions lib/WeBWorK/Localize.pm
Original file line number Diff line number Diff line change
@@ -1,70 +1,49 @@
package WeBWorK::Localize;
use parent 'Locale::Maketext';

use strict;
use warnings;

use File::Spec;
use Locale::Maketext;
use Locale::Maketext::Lexicon;

use WeBWorK::Utils qw(x);

my $path = "$ENV{WEBWORK_ROOT}/lib/WeBWorK/Localize";
my $pattern = File::Spec->catfile($path, '*.[pm]o');
my $decode = 1;
my $encoding = undef;

# For some reason this next stanza needs to be evaluated
# separately. I'm not sure why it can't be
# directly entered into the code.
# This code was cribbed from Locale::Maketext::Simple if I remember correctly
#

eval "
package WeBWorK::Localize::I18N;
use base 'Locale::Maketext';
%WeBWorK::Localize::I18N::Lexicon = ( '_AUTO' => 1 );
Locale::Maketext::Lexicon->import({
'i-default' => [ 'Auto' ],
'*' => [ Gettext => \$pattern ],
_decode => \$decode,
_encoding => \$encoding,
});
*tense = sub { \$_[1] . ((\$_[2] eq 'present') ? 'ing' : 'ed') };

" or die "Can't process eval in WeBWorK/Localize.pm: line 35: " . $@;

package WeBWorK::Localize;
Locale::Maketext::Lexicon->import({
'i-default' => ['Auto'],
'*' => [ Gettext => File::Spec->catfile("$ENV{WEBWORK_ROOT}/lib/WeBWorK/Localize", '*.[pm]o') ],
_decode => 1,
_encoding => undef,
});
*tense = sub { \$_[1] . ((\$_[2] eq 'present') ? 'ing' : 'ed') };

# This subroutine is shared with the safe compartment in PG to
# allow maketext() to be constructed in PG problems and macros
# It seems to be a little fragile -- possibly it breaks
# on perl 5.8.8
# allow maketext() to be used in PG problems and macros.
sub getLoc {
my $lang = shift;
my $lh = WeBWorK::Localize::I18N->get_handle($lang);
my $lh = WeBWorK::Localize->get_handle($lang);
return sub { $lh->maketext(@_) };
}

sub getLangHandle {
my $lang = shift;
my $lh = WeBWorK::Localize::I18N->get_handle($lang);
return $lh;
return WeBWorK::Localize->get_handle($lang);
}

# this is like [quant] but it doesn't write the number
# This is like [quant] but it doesn't write the number.
# usage: [quant,_1,<singular>,<plural>,<optional zero>]

sub plural {
my ($handle, $num, @forms) = @_;

return '' if @forms == 0;
return $forms[2] if @forms > 2 and $num == 0;
return $forms[2] if @forms > 2 && $num == 0;

# Normal case:
return ($handle->numerate($num, @forms));
return $handle->numerate($num, @forms);
}

# this is like [quant] but it also has -1 case
# usage: [negquant,_1,<neg case>,<singular>,<plural>,<optional zero>]

# This is like [quant] but it also has a negative case. (The one usage in the code interprets this as unlimited.)
# usage: [negquant,_1,<negative case>,<singular>,<plural>,<optional zero>]
sub negquant {
my ($handle, $num, @forms) = @_;

Expand All @@ -73,12 +52,11 @@ sub negquant {
my $negcase = shift @forms;
return $negcase if $num < 0;

return $forms[2] if @forms > 2 and $num == 0;
return ($handle->numf($num) . ' ' . $handle->numerate($num, @forms));
return $forms[2] if @forms > 2 && $num == 0;
return $handle->numf($num) . ' ' . $handle->numerate($num, @forms);
}

# we use x to mark the strings for translation
%Lexicon = (
our %Lexicon = (
'_AUTO' => 1,

'_ONE_COLUMN' => x('One Column'),
Expand All @@ -93,7 +71,229 @@ sub negquant {
'_STATUS' => [ x('Enrolled'), x('Audit'), x('Drop'), x('Proctor') ],
);

package WeBWorK::Localize::I18N;
use base(WeBWorK::Localize);
# Override the Locale::Maketext::_compile method. The only real difference is that this override
# method does not call "use strict" in the code eval. Thus it can be used in the safe zone.
sub _compile {
my ($handle, $string_to_compile) = @_;

# The while regex is more expensive than this check on strings that don't need a compile.
# This op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
# on strings that don't need compiling.
return \"$string_to_compile" if $string_to_compile !~ m/[\[~\]]/ms;

my @code;
my (@c) = (''); # "chunks" -- scratch.
my $call_count = 0;
my $big_pile = '';
{
my $in_group = 0; # start out outside a group
my ($m, @params); # scratch

while (
$string_to_compile =~ # Iterate over chunks.
m/(
[^\~\[\]]+ # non-~[] stuff (Capture everything else here)
|
~. # ~[, ~], ~~, ~other
|
\[ # [ presumably opening a group
|
\] # ] presumably closing a group
|
~ # terminal ~ ?
|
$
)/xgs
)
{
if ($1 eq '[' || $1 eq '') {
# Whether this is "[" or end, force processing of any preceding literal.
if ($in_group) {
if ($1 eq '') {
$handle->_die_pointing($string_to_compile, 'Unterminated bracket group');
} else {
$handle->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
}
} else {
$in_group = 1 if ($1 ne '');

die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
if (length $c[-1]) {
# Now actually processing the preceding literal
$big_pile .= $c[-1];
if (
$Locale::Maketext::USE_LITERALS && (
(ord('A') == 65)
? $c[-1] !~ m/[^\x20-\x7E]/s
# ASCII very safe chars
: $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
# EBCDIC very safe chars
)
)
{
# Normal case -- all very safe chars
$c[-1] =~ s/'/\\'/g;
push @code, q{ '} . $c[-1] . "',\n";
$c[-1] = ''; # reuse this slot
} else {
$c[-1] =~ s/\\\\/\\/g;
push @code, ' $c[' . $#c . "],\n";
push @c, ''; # new chunk
}
}
# else just ignore the empty string.
}

} elsif ($1 eq ']') {
# Close group -- go back in-band
if ($in_group) {
$in_group = 0;

# And now process the group...

if (!length($c[-1]) || $c[-1] =~ m/^\s+$/s) {
$c[-1] = ''; # Reset out chunk
next;
}

($m, @params) = split(/,/, $c[-1], -1);

# A bit of a hack -- we've turned "~,"'s into DELs, so turn them into real commas here.
if (ord('A') == 65) {
# ASCII, etc
for ($m, @params) {tr/\x7F/,/}
} else {
# EBCDIC (1047, 0037, POSIX-BC)
for ($m, @params) {tr/\x07/,/}
}

# Special-case handling of some method names:
if ($m eq '_*' || $m =~ m/^_(-?\d+)$/s) {
# Treat [_1,...] as [,_1,...], etc.
unshift @params, $m;
$m = '';
} elsif ($m eq '*') {
$m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
} elsif ($m eq '#') {
$m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
}

# Most common case: a simple, legal-looking method name.
if ($m eq '') {
# 0-length method name means to just interpolate:
push @code, ' (';
} elsif ($m =~ /^\w+$/s
&& !$handle->{'blacklist'}{$m}
&& (!defined $handle->{'whitelist'} || $handle->{'whitelist'}{$m}))
{
# Exclude anything fancy and restrict to the whitelist/blacklist.
push @code, ' $_[0]->' . $m . '(';
} else {
# TODO: Implement something? Or just too icky to consider?
$handle->_die_pointing(
$string_to_compile,
"Can't use \"$m\" as a method name in bracket group",
2 + length($c[-1])
);
}

pop @c; # we don't need that chunk anymore
++$call_count;

for my $p (@params) {
if ($p eq '_*') {
# Meaning: all parameters except $_[0]
$code[-1] .= ' @_[1 .. $#_], ';
# and yes, that does the right thing for all @_ < 3
} elsif ($p =~ m/^_(-?\d+)$/s) {
# _3 meaning $_[3]
$code[-1] .= '$_[' . (0 + $1) . '], ';
} elsif (
$Locale::Maketext::USE_LITERALS && (
(ord('A') == 65)
? $p !~ m/[^\x20-\x7E]/s
# ASCII very safe chars
: $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
# EBCDIC very safe chars
)
)
{
# Normal case: a literal containing only safe characters
$p =~ s/'/\\'/g;
$code[-1] .= q{'} . $p . q{', };
} else {
# Stow it on the chunk-stack, and just refer to that.
push @c, $p;
push @code, ' $c[' . $#c . '], ';
}
}
$code[-1] .= "),\n";

push @c, '';
} else {
$handle->_die_pointing($string_to_compile, q{Unbalanced ']'});
}

} elsif (substr($1, 0, 1) ne '~') {
# It's stuff not containing "~" or "[" or "]", i.e., a literal blob.
my $text = $1;
$text =~ s/\\/\\\\/g;
$c[-1] .= $text;
} elsif ($1 eq '~~') {
$c[-1] .= '~';
} elsif ($1 eq '~[') {
$c[-1] .= '[';
} elsif ($1 eq '~]') {
$c[-1] .= ']';
} elsif ($1 eq '~,') {
if ($in_group) {
# This is a hack, based on the assumption that no one will actually
# want a DEL inside a bracket group. Let's hope that's it's true.
if (ord('A') == 65) {
# ASCII etc
$c[-1] .= "\x7F";
} else {
# EBCDIC (cp 1047, 0037, POSIX-BC)
$c[-1] .= "\x07";
}
} else {
$c[-1] .= '~,';
}
} elsif ($1 eq '~') {
# This is possible only at string end, it seems.
$c[-1] .= '~';
} else {
# It's a "~X" where X is not a special character.
# Consider it a literal ~ and X.
my $text = $1;
$text =~ s/\\/\\\\/g;
$c[-1] .= $text;
}
}
}

if ($call_count) {
undef $big_pile; # Well, nevermind that.
} else {
# It's all literals! So don't bother with the eval. Return a SCALAR reference.
return \$big_pile;
}

die q{Last chunk isn't null??} if @c && length $c[-1]; # sanity
if (@code == 0) {
# Not possible?
return \'';
} elsif (@code > 1) {
# Most cases, presumably!
unshift @code, "join '',\n";
}
unshift @code, "sub {\n";
push @code, "}\n";

my $sub = eval(join '', @code);
die "$@ while eval-ling " . join('', @code) if $@; # Should be impossible.

return $sub;
}

1;