From aaeb4a56fc4de9e95e201c6513f16f0f94ea3fa5 Mon Sep 17 00:00:00 2001 From: corwin Date: Tue, 21 Apr 2026 11:38:13 -0500 Subject: [PATCH 1/2] additional IP dup checking options The check (--check=# or -#) is now a bit field allowing a new check level of 3 (three, --check=3 or -3) to enable both parse time checks using a system call to ipset test per record and restore time checks. --- combine-saves.pl | 114 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 27 deletions(-) diff --git a/combine-saves.pl b/combine-saves.pl index 4ae5ac8..5dee707 100755 --- a/combine-saves.pl +++ b/combine-saves.pl @@ -22,6 +22,24 @@ # ################# +use strict; +use warnings; + +# if no DB file +our ($default_db_file) = q(max-counts.txt);; + +# specify as -# or --check=# (e.g. -0 or --check=0) +# 0, no; 1, -exist; 2, ipset test, 3, both exit and test +our ($check); + +our @check_bits = map unpack("B*", pack("N", $_)), 0,1; + +# whether to print to STDERR for each add skipped +# this only affects $check = 2 (--check=2/-2) +our ($checkwarn); + +our (%m,%h); + sub usage { my $message = shift; $message .= "\n" if $message; @@ -39,37 +57,69 @@ sub max2 { return $rv; } +sub db { + my %m; + my( $file ) = grep defined&&length,( + @_, + $default_db_file, + q(max-counts.txt) + ); #$default_db_file; + open my$FH, '<', $file + or usage( q(ERROR: cannot open DB) + . qq( "$file": $!) + . ' ('.( 0+$! ).')' + ); + while (my $line = <$FH>) { + chomp $line; + if ($line) { + my($k,$v) = split /\s+/, $line; + if ($k and $v) { + $m{$k} = $v; + } + } + } + return %m; +} + BEGIN # program is a filter so we must wrap start-up processing { - # avoid extra "BEGIN faile--" messages - local $SIG{__DIE__} = sub {warn @_; exit 1}; + # avoid extra "BEGIN failed--" messages + $SIG{__DIE__} = sub {warn @_; exit 1}; # display usage if requested usage() if grep /^-+[?h]/, @ARGV; + # process options (must come first, must start with -) + while (@ARGV and $ARGV[0] =~ /^-+(.*)/) { + local $_ = $1; + if (/^(?:c(?:check)?)?=?([0123]|t(?:est)?)$/) { + $check = $1; + $check = 2 # --check=test + if lc($1) =~ /^t/; + shift @ARGV; + } + elsif (/^w?(?:arn)?$/) { + $checkwarn = 1; + shift @ARGV; + } + else { + usage( qq(ERROR: unknown option "$ARGV[0]") ); + } + } + + # default: use -exist option to add (via ipset restore) + $checkwarn = 0 unless defined $checkwarn; + $check = 1 unless defined $check; + $check = unpack("B*", pack("N", $check)) if $check; + + # and unless we have input die usage( qq(ERROR: STDIN is non a pipe or redirection) ) if -t STDIN; - # take max-count-file from args, if any - my $SET_COUNTS = @ARGV ? shift : q(max-counts.txt); - - open my$FH,q(<),$SET_COUNTS - or usage( - q(ERROR: cannot open max-count-file) - . qq( "$SET_COUNTS": $!) - . ' ('.(0+$!).')' - ); - - # load hash of set => max - while(<$FH>) - { - chomp; - my( $k, $v ) = split; - $m{ $k } = max2( $v ) - if $k and $v - } + # read db, check remaining command-line DB file + %m = db( @ARGV ); # use Data::Dumper; die Dumper( \%m ); } if( /^create (\S+)/ ) @@ -78,8 +128,8 @@ if( /^create (\S+)/ ) $_ = ''; # don't print again } elsif( exists $m{ $1 } ) { $h{$1} = 1; # ensure this is the only printing - $n = $1; # grab the name - $v = $m{$n}; # lookup max + my $n = $1; # grab the name + my $v = $m{$n}; # lookup max # mangle the create to inject maxelem from DB s/^create $n (.*?maxelem) \d+ (.*)$/create $n $1 $v $2/ #and warn qq[set $n=$v] @@ -89,10 +139,20 @@ if( /^create (\S+)/ ) } elsif( /^add (\S+)/ ) { - # $_ = '' unless exists $m{$1}; - if (exists $m{$1}) { - s/$/ -exist/; # make dup-safe - } else { - $_ = ''; # skip add for set we cannot create + my $set = $1; + $_ = '', next unless exists $m{$1}; + if ($check) { + if ($check & ( 1<< $check_bits[1])) { + my( $ip ) = /$set (\S+)/s; + unless (system( qq(ipset test "$set" "$ip" >/dev/null 2>&1) )) { + warn qq[skip $set $ip (ipset test -eq 0)\n] + if $checkwarn; + $_ = ''; + next; + } + } + if ($check & ( 1<< $check_bits[0])) { + s/$/ -exist/ + } } } From a402bc48827742209ee9e8357568dcfedefabd3b Mon Sep 17 00:00:00 2001 From: corwin Date: Tue, 21 Apr 2026 11:43:00 -0500 Subject: [PATCH 2/2] add IP deduplication optons Add $check new variable taken from environment; it can be set to an integeter to generate the apprprite option to combine-saves.pl --- save-restore.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/save-restore.sh b/save-restore.sh index 8b990a1..0b5be59 100755 --- a/save-restore.sh +++ b/save-restore.sh @@ -25,14 +25,25 @@ # ################# +## +# options to control duplicate IP checks: +# 0 - no checking +# 1 - in restore, append -exist to ipset add +# 2 - in parse, run ipcheck test check +# 3 - in both restore and parse, exist & test +check=${check:-} # take default from combine-saves.pl + folder=${folder:-} savext=${savext:-.save} db=${db:-name-maxelem.db} + dbtmp=${dbtmp:-$db.tmp} + ipset=${ipset:-$(which ipset)} ipsetcmd=${ipsetcmd:-restore} + scriptdir=${scriptdir:-$(dirname $0)} savecnt="${scriptdir}/save-count.pl" sortcnt="${scriptdir}/combine-counts.pl" @@ -40,6 +51,10 @@ loadset="${scriptdir}/combine-saves.pl" files=${files:-"$@"} +if test -n "$check" ; then + check="--check=$checkdef"; +fi + if test -z "$files" ; then files=$(ls -1 ${folder}*${savext} 2>/dev/null) fi @@ -53,7 +68,7 @@ cat "$files" | \ $savecnt >$dbtmp \ && $sortcnt <$dbtmp >$db \ && cat "$files" \ - | $loadset $db \ + | $loadset $check $db \ | $ipset $ipsetcmd RV=$? rm -f $dbtmp 2>/dev/null