-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1brc-mce-mestia.pl
97 lines (86 loc) · 2.31 KB
/
1brc-mce-mestia.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
use MCE::Loop;
usage {
die "$0 <filename> <numbers of forks>\n";
}
usage() if !$ARGV[0];
my $file = shift // die "Usage: $0 filename\n";
my $proc = shift // 8;
my $data = {};
my $size = -s $file;
# If chunk_size is not defined it is set automatically to 5M by MCE
#my $chunk_size = int( $size / $proc );
# optimal chunk_size for the 1bio file
#my $chunk_size = 5242880;
MCE::Loop->init(
max_workers => $proc,
use_slurpio => 1,
parallel_io => 1,
# chunk_size => $chunk_size,
);
my @results = mce_loop_f {
my ( $mce, $chunk_ref, $chunk_id ) = @_;
MCE->gather( proc_chunk( $chunk_ref ) );
}
$file;
MCE::Loop->finish;
update_global_hash($_) for @results;
print "{";
for ( sort keys %$data ) {
my $cd = $data->{$_};
printf "%s=%.1f/%.1f/%.1f, ", $_, $cd->{min}, $cd->{sum} / $cd->{cnt},
$cd->{max};
}
say "}\n";
sub proc_chunk {
my $data = {};
for my $line ( ( split( '\n', ${ $_[0] } ) ) ) {
my ( $city, $temp ) = split( ';', $line ); # get city and temperature
if ( $data->{$city} ) {
my $cd = $data->{$city};
if ( $temp > $cd->{max} ) { # max
$cd->{max} = $temp;
}
elsif ( $temp < $cd->{min} ) { # min
$cd->{min} = $temp;
}
$cd->{sum} += $temp;
$cd->{cnt}++;
}
else {
$data->{$city} = {
max => $temp,
min => $temp,
sum => $temp,
cnt => 1
} # initialise city
}
}
return $data;
}
sub update_global_hash {
my ($datast) = @_;
for my $city ( keys %{$datast} ) {
my $max = $datast->{$city}->{max};
my $min = $datast->{$city}->{min};
my $sum = $datast->{$city}->{sum};
my $cnt = $datast->{$city}->{cnt};
if ( $data->{$city} ) {
my $cd = $data->{$city};
if ( $max > $cd->{max} ) { # max
$cd->{max} = $max;
}
elsif ( $min < $cd->{min} ) { # min
$cd->{min} = $min;
}
$cd->{sum} += $sum;
$cd->{cnt} += $cnt;
}
else {
$data->{$city} = $datast->{$city}; #init
}
}
}