ipset: use awk instead of grep to avoid wrong ip matches

This commit is contained in:
bol-van 2021-11-05 00:11:05 +03:00
parent 2bf47f4fff
commit 8adf03603c
5 changed files with 29 additions and 14 deletions

View File

@ -26,7 +26,7 @@ file_extract_lines()
# $2 - from line (starting with 0)
# $3 - line count
# awk "{ err=1 } NR < $(($2+1)) { next } { print; err=0 } NR == $(($2+$3)) { exit err } END {exit err}" "$1"
awk "NR < $(($2+1)) { next } { print } NR == $(($2+$3)) { exit }" "$1"
$AWK "NR < $(($2+1)) { next } { print } NR == $(($2+$3)) { exit }" "$1"
}
ipset_restore_chunked()
{
@ -160,7 +160,7 @@ elif exists ipset; then
# only /tmp is considered tmpfs. other locations mean tmpdir was redirected to a disk
SAVERAM=0
[ "$TMPDIR" = "/tmp" ] && {
RAMSIZE=$($GREP MemTotal /proc/meminfo | awk '{print $2}')
RAMSIZE=$($GREP MemTotal /proc/meminfo | $AWK '{print $2}')
[ "$RAMSIZE" -lt "110000" ] && SAVERAM=1
}
print_reloading_backend ipset

View File

@ -60,6 +60,12 @@ else
GREP=$(which grep)
fi
# GNU awk is faster
if exists gawk; then
AWK=gawk
else
AWK=awk
fi
grep_supports_b()
{
@ -68,17 +74,16 @@ grep_supports_b()
}
get_ip_regex()
{
REG_IPV4='((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(/([0-9]|[12][0-9]|3[012]))?'
REG_IPV6='[0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}|:)+(/([0-9][0-9]?|1[01][0-9]|12[0-8]))?'
REG_IPV4='((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\/([0-9]|[12][0-9]|3[012]))?'
REG_IPV6='[0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}|:)+(\/([0-9][0-9]?|1[01][0-9]|12[0-8]))?'
# good but too slow
# REG_IPV6='([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,7}:(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(/[0-9]+)?|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(/[0-9]+)?|:((:[0-9a-fA-F]{1,4}){1,7}|:)(/([0-9][0-9]?|1[01][0-9]|12[0-8]))?'
grep_supports_b && {
REG_IPV4="\b$REG_IPV4\b"
REG_IPV6="\b$REG_IPV6\b"
}
# grep_supports_b && {
# REG_IPV4="\b$REG_IPV4\b"
# REG_IPV6="\b$REG_IPV6\b"
# }
}
ip2net4()
{
if [ -x "$IP2NET" ]; then

View File

@ -9,6 +9,11 @@ ZREESTR="$TMPDIR/reestr.txt"
#ZURL_REESTR=https://reestr.rublacklist.net/api/current
ZURL_REESTR=https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv
awkgrep()
{
# $1 - pattern
nice -n 5 $AWK "{while ( match(\$0,/($1[ |;])/) ) { print substr(\$0,RSTART,RLENGTH-1); \$0=substr(\$0,RSTART+RLENGTH) } }"
}
dig_reestr()
{
@ -25,12 +30,12 @@ dig_reestr()
# find entries with https or without domain name - they should be banned by IP
# 2971-18 is TELEGRAM. lots of proxy IPs banned, list grows very large
(nice -n 5 $GREP -avE "$DOMMASK" "$ZREESTR" ; $GREP -a "https://" "$ZREESTR") |
nice -n 5 $GREP -oE "$1" | cut_local | sort -u >$TMP
awkgrep "$1" | cut_local | sort -u >$TMP
ip2net$4 <"$TMP" | zz "$3"
# other IPs go to regular zapret list
tail -n +2 "$ZREESTR" | nice -n 5 $GREP -oE "$1" | cut_local | nice -n 5 $GREP -xvFf "$TMP" | ip2net$4 | zz "$2"
tail -n +2 "$ZREESTR" | awkgrep "$1" | cut_local | nice -n 5 $GREP -xvFf "$TMP" | ip2net$4 | zz "$2"
rm -f "$TMP"
}

View File

@ -25,7 +25,7 @@ if test $dlsize -lt 204800; then
echo list file is too small. can be bad.
exit 2
fi
(LANG=C cut -s -f2 -d';' "$ZREESTR" | LANG=C sed -Ee 's/^\*\.(.+)$/\1/' -ne 's/^[a-z0-9A-Z._-]+$/&/p' | awk '{ print tolower($0) }' ; cat "$ZUSERLIST" ) | sort -u | zz "$ZHOSTLIST"
(LANG=C cut -s -f2 -d';' "$ZREESTR" | LANG=C sed -Ee 's/^\*\.(.+)$/\1/' -ne 's/^[a-z0-9A-Z._-]+$/&/p' | $AWK '{ print tolower($0) }' ; cat "$ZUSERLIST" ) | sort -u | zz "$ZHOSTLIST"
rm -f "$ZREESTR"
hup_zapret_daemons

View File

@ -10,6 +10,12 @@ ZREESTR="$TMPDIR/reestr.txt"
ZURL_REESTR=https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv
awkgrep()
{
# $1 - pattern
nice -n 5 $AWK "{while ( match(\$0,/($1[ |;])/) ) { print substr(\$0,RSTART,RLENGTH-1); \$0=substr(\$0,RSTART+RLENGTH) } }"
}
dig_reestr()
{
# $1 - grep ipmask
@ -18,10 +24,9 @@ dig_reestr()
echo processing reestr list $2
tail -n +2 "$ZREESTR" | nice -n 5 $GREP -oE "$1" | cut_local | ip2net$3 | zz "$2"
tail -n +2 "$ZREESTR" | awkgrep "$1" | cut_local | ip2net$3 | zz "$2"
}
getuser && {
# assume all https banned by ip
curl -k --fail --max-time 600 --connect-timeout 5 --retry 3 --max-filesize 251658240 "$ZURL_REESTR" -o "$ZREESTR" ||