Files
xtables-addons/geoip/xt_geoip_build
Jan Engelhardt 5e19871613 geoip: build tool should not rely on directory name
Fix this:

	GeoLite2-Country-CSV_20180905$ /usr/lib/xtables-addons/xt_geoip_build

	Use of uninitialized value $dir in concatenation (.) or string at
	/usr/lib/xtables-addons/xt_geoip_build line 59.
	Couldn't open list country names

Do not rely on any directory names (they change). Use the current
directory as the default source directory, similar to the older
xt_geoip_build (well, *.csv was passed as arguments).
2018-09-07 15:03:20 +02:00

266 lines
6.0 KiB
Perl
Executable File

#!/usr/bin/perl
#
# Converter for MaxMind (GeoLite2) CSV database to binary, for xt_geoip
# Copyright Jan Engelhardt, 2008-2011
# Copyright Philip Prindeville, 2018
#
use Getopt::Long;
use Net::CIDR::Lite;
use Socket qw(AF_INET AF_INET6 inet_pton);
use warnings;
use Text::CSV_XS; # or trade for Text::CSV
use strict;
my $csv = Text::CSV_XS->new({
allow_whitespace => 1,
binary => 1,
eol => $/,
}); # or Text::CSV
my $source_dir = ".";
my $target_dir = ".";
&Getopt::Long::Configure(qw(bundling));
&GetOptions(
"D=s" => \$target_dir,
"S=s" => \$source_dir,
);
if (!-d $source_dir) {
print STDERR "Source directory \"$source_dir\" does not exist.\n";
exit 1;
}
if (!-d $target_dir) {
print STDERR "Target directory \"$target_dir\" does not exist.\n";
exit 1;
}
my %countryId;
my %countryName;
&loadCountries();
&dump(&collect());
sub loadCountries
{
sub id; sub cc; sub long; sub ct; sub cn;
%countryId = ();
%countryName = ();
my $file = "$source_dir/GeoLite2-Country-Locations-en.csv";
open(my $fh, '<', $file) || die "Couldn't open list country names\n";
# first line is headers
my $row = $csv->getline($fh);
my %header = map { ($row->[$_], $_); } (0..$#{$row});
my %pairs = (
country_iso_code => 'ISO Country Code',
geoname_id => 'ID',
country_name => 'Country Name',
continent_code => 'Continent Code',
continent_name => 'Continent Name',
);
# verify that the columns we need are present
map { die "Table has no $pairs{$_} column\n" unless (exists $header{$_}); } keys %pairs;
my %remapping = (
id => 'geoname_id',
cc => 'country_iso_code',
long => 'country_name',
ct => 'continent_code',
cn => 'continent_name',
);
# now create a function which returns the value of that column #
map { eval "sub $_ () { \$header{\$remapping{$_}}; }" ; } keys %remapping;
while (my $row = $csv->getline($fh)) {
if ($row->[cc] eq '' && $row->[long] eq '') {
$countryId{$row->[id]} = $row->[ct];
$countryName{$row->[ct]} = $row->[cn];
} else {
$countryId{$row->[id]} = $row->[cc];
$countryName{$row->[cc]} = $row->[long];
}
}
$countryName{A1} = 'Anonymous Proxy';
$countryName{A2} = 'Satellite Provider';
$countryName{O1} = 'Other Country';
close($fh);
# clean up the namespace
undef &id; undef &cc; undef &long; undef &ct; undef &cn;
}
sub lookupCountry
{
my ($id, $rid, $proxy, $sat) = @_;
if ($proxy) {
return 'A1';
} elsif ($sat) {
return 'A2';
}
$id ||= $rid;
if ($id eq '') {
return 'O1';
}
die "Unknown id: $id line $.\n" unless (exists $countryId{$id});
return $countryId{$id};
}
sub collect
{
my ($file, $fh, $row);
my (%country, %header);
sub net; sub id; sub rid; sub proxy; sub sat;
my %pairs = (
network => 'Network',
registered_country_geoname_id => 'Registered Country ID',
geoname_id => 'Country ID',
is_anonymous_proxy => 'Anonymous Proxy',
is_satellite_provider => 'Satellite',
);
foreach (sort keys %countryName) {
$country{$_} = {
name => $countryName{$_},
pool_v4 => Net::CIDR::Lite->new(),
pool_v6 => Net::CIDR::Lite->new(),
};
}
$file = "$source_dir/GeoLite2-Country-Blocks-IPv4.csv";
open($fh, '<', $file) || die "Can't open IPv4 database\n";
# first line is headers
$row = $csv->getline($fh);
%header = map { ($row->[$_], $_); } (0..$#{$row});
# verify that the columns we need are present
map { die "Table has no %pairs{$_} column\n" unless (exists $header{$_}); } keys %pairs;
my %remapping = (
net => 'network',
id => 'geoname_id',
rid => 'registered_country_geoname_id',
proxy => 'is_anonymous_proxy',
sat => 'is_satellite_provider',
);
# now create a function which returns the value of that column #
map { eval "sub $_ () { \$header{\$remapping{$_}}; }" ; } keys %remapping;
while ($row = $csv->getline($fh)) {
my ($cc, $cidr);
$cc = lookupCountry($row->[id], $row->[rid], $row->[proxy], $row->[sat]);
$cidr = $row->[net];
$country{$cc}->{pool_v4}->add($cidr);
if ($. % 4096 == 0) {
print STDERR "\r\e[2K$. entries";
}
}
print STDERR "\r\e[2K$. entries total\n";
close($fh);
# clean up the namespace
undef &net; undef &id; undef &rid; undef &proxy; undef &sat;
$file = "$source_dir/GeoLite2-Country-Blocks-IPv6.csv";
open($fh, '<', $file) || die "Can't open IPv6 database\n";
# first line is headers
$row = $csv->getline($fh);
%header = map { ($row->[$_], $_); } (0..$#{$row});
# verify that the columns we need are present
map { die "Table has no %pairs{$_} column\n" unless (exists $header{$_}); } keys %pairs;
# unlikely the IPv6 table has different columns, but just to be sure
# create a function which returns the value of that column #
map { eval "sub $_ () { \$header{\$remapping{$_}}; }" ; } keys %remapping;
while ($row = $csv->getline($fh)) {
my ($cc, $cidr);
$cc = lookupCountry($row->[id], $row->[rid], $row->[proxy], $row->[sat]);
$cidr = $row->[net];
$country{$cc}->{pool_v6}->add($cidr);
if ($. % 4096 == 0) {
print STDERR "\r\e[2K$. entries";
}
}
print STDERR "\r\e[2K$. entries total\n";
close($fh);
# clean up the namespace
undef &net; undef &id; undef &rid; undef &proxy; undef &sat;
return \%country;
}
sub dump
{
my $country = shift @_;
foreach my $iso_code (sort keys %{$country}) {
&dump_one($iso_code, $country->{$iso_code});
}
}
sub dump_one
{
my($iso_code, $country) = @_;
my @ranges;
@ranges = $country->{pool_v4}->list_range();
writeCountry($iso_code, $country->{name}, AF_INET, @ranges);
@ranges = $country->{pool_v6}->list_range();
writeCountry($iso_code, $country->{name}, AF_INET6, @ranges);
}
sub writeCountry
{
my ($iso_code, $name, $family, @ranges) = @_;
my $fh;
printf "%5u IPv%s ranges for %s %s\n",
scalar(@ranges),
($family == AF_INET ? '4' : '6'),
$iso_code, $name;
my $file = "$target_dir/".uc($iso_code).".iv".($family == AF_INET ? '4' : '6');
if (!open($fh, '>', $file)) {
print STDERR "Error opening $file: $!\n";
exit 1;
}
binmode($fh);
foreach my $range (@ranges) {
my ($start, $end) = split('-', $range);
$start = inet_pton($family, $start);
$end = inet_pton($family, $end);
print $fh $start, $end;
}
close $fh;
}