New linter/ dir with pslint.py selftest
This commit is contained in:
parent
811513f17e
commit
2914afa8c7
|
@ -1,8 +1,9 @@
|
||||||
*.gz
|
|
||||||
*.o
|
|
||||||
*.lo
|
|
||||||
*.la
|
|
||||||
*.exe
|
*.exe
|
||||||
|
*.gz
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
*.log
|
||||||
|
*.o
|
||||||
*~
|
*~
|
||||||
*/.deps
|
*/.deps
|
||||||
*/.libs
|
*/.libs
|
||||||
|
|
|
@ -14,6 +14,9 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
|
||||||
pkgconfigdir = $(libdir)/pkgconfig
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
pkgconfig_DATA = libpsl.pc
|
pkgconfig_DATA = libpsl.pc
|
||||||
|
|
||||||
|
clean-local:
|
||||||
|
rm -rf $(srcdir)/linter/log
|
||||||
|
|
||||||
EXTRA_DIST = config.rpath LICENSE
|
EXTRA_DIST = config.rpath LICENSE
|
||||||
dist-hook:
|
dist-hook:
|
||||||
mkdir -p $(distdir)/list/tests
|
mkdir -p $(distdir)/list/tests
|
||||||
|
|
|
@ -27,18 +27,19 @@ import sys
|
||||||
|
|
||||||
nline = 0
|
nline = 0
|
||||||
line = ""
|
line = ""
|
||||||
|
orig_line = ""
|
||||||
warnings = 0
|
warnings = 0
|
||||||
errors = 0
|
errors = 0
|
||||||
skip_order_check = False
|
skip_order_check = False
|
||||||
|
|
||||||
def warning(msg):
|
def warning(msg):
|
||||||
global warnings, line, nline
|
global warnings, orig_line, nline
|
||||||
print('%d: warning: %s%s' % (nline, msg, ": \'" + line + "\'" if line else ""))
|
print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
|
||||||
warnings += 1
|
warnings += 1
|
||||||
|
|
||||||
def error(msg):
|
def error(msg):
|
||||||
global errors, line, nline
|
global errors, orig_line, nline
|
||||||
print('%d: error: %s%s' % (nline, msg, ": \'" + line + "\'" if line else ""))
|
print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
|
||||||
errors += 1
|
errors += 1
|
||||||
# skip_order_check = True
|
# skip_order_check = True
|
||||||
|
|
||||||
|
@ -73,6 +74,7 @@ def check_order(group):
|
||||||
warning('Incorrectly sorted group of domains')
|
warning('Incorrectly sorted group of domains')
|
||||||
print(" " + str(group))
|
print(" " + str(group))
|
||||||
print(" " + str(sorted_group))
|
print(" " + str(sorted_group))
|
||||||
|
print("Correct sorting would be:")
|
||||||
print_psl(sorted_group)
|
print_psl(sorted_group)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
@ -81,7 +83,7 @@ def check_order(group):
|
||||||
|
|
||||||
def lint_psl(infile):
|
def lint_psl(infile):
|
||||||
"""Parses PSL file and performs syntax checking"""
|
"""Parses PSL file and performs syntax checking"""
|
||||||
global line, nline
|
global orig_line, nline
|
||||||
|
|
||||||
PSL_FLAG_EXCEPTION = (1<<0)
|
PSL_FLAG_EXCEPTION = (1<<0)
|
||||||
PSL_FLAG_WILDCARD = (1<<1)
|
PSL_FLAG_WILDCARD = (1<<1)
|
||||||
|
@ -91,10 +93,12 @@ def lint_psl(infile):
|
||||||
|
|
||||||
line2number = {}
|
line2number = {}
|
||||||
line2flag = {}
|
line2flag = {}
|
||||||
section = 0
|
|
||||||
group = []
|
group = []
|
||||||
|
section = 0
|
||||||
|
icann_sections = 0
|
||||||
|
private_sections = 0
|
||||||
|
|
||||||
lines = [line.strip('\r\n') for line in infile]
|
lines = [line.strip('\n') for line in infile]
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
nline += 1
|
nline += 1
|
||||||
|
@ -102,23 +106,28 @@ def lint_psl(infile):
|
||||||
# check for leadind/trailing whitespace
|
# check for leadind/trailing whitespace
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
if stripped != line:
|
if stripped != line:
|
||||||
|
line = line.replace('\t','\\t')
|
||||||
|
line = line.replace('\r','^M')
|
||||||
warning('Leading/Trailing whitespace')
|
warning('Leading/Trailing whitespace')
|
||||||
|
orig_line = line
|
||||||
line = stripped
|
line = stripped
|
||||||
|
|
||||||
# empty line (end of sorted domain group)
|
# empty line (end of sorted domain group)
|
||||||
if not line:
|
if not line:
|
||||||
check_order(group)
|
# check_order(group)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# check for section begin/end
|
# check for section begin/end
|
||||||
if line[0:2] == "//":
|
if line[0:2] == "//":
|
||||||
check_order(group)
|
# check_order(group)
|
||||||
|
|
||||||
if section == 0:
|
if section == 0:
|
||||||
if line == "// ===BEGIN ICANN DOMAINS===":
|
if line == "// ===BEGIN ICANN DOMAINS===":
|
||||||
section = PSL_FLAG_ICANN
|
section = PSL_FLAG_ICANN
|
||||||
|
icann_sections += 1
|
||||||
elif line == "// ===BEGIN PRIVATE DOMAINS===":
|
elif line == "// ===BEGIN PRIVATE DOMAINS===":
|
||||||
section = PSL_FLAG_PRIVATE
|
section = PSL_FLAG_PRIVATE
|
||||||
|
private_sections += 1
|
||||||
elif line[3:11] == "===BEGIN":
|
elif line[3:11] == "===BEGIN":
|
||||||
error('Unexpected begin of unknown section')
|
error('Unexpected begin of unknown section')
|
||||||
elif line[3:9] == "===END":
|
elif line[3:9] == "===END":
|
||||||
|
@ -148,7 +157,11 @@ def lint_psl(infile):
|
||||||
|
|
||||||
# decode UTF-8 input into unicode, needed only for python 2.x
|
# decode UTF-8 input into unicode, needed only for python 2.x
|
||||||
if sys.version_info[0] < 3:
|
if sys.version_info[0] < 3:
|
||||||
|
try:
|
||||||
line = line.decode('utf-8')
|
line = line.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
error('Invalid UTF-8 character')
|
||||||
|
continue
|
||||||
|
|
||||||
# each rule must be lowercase (or more exactly: not uppercase and not titlecase)
|
# each rule must be lowercase (or more exactly: not uppercase and not titlecase)
|
||||||
if line != line.lower():
|
if line != line.lower():
|
||||||
|
@ -170,6 +183,7 @@ def lint_psl(infile):
|
||||||
# wildcard and exception must not combine
|
# wildcard and exception must not combine
|
||||||
if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
|
if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
|
||||||
error('Combination of wildcard and exception')
|
error('Combination of wildcard and exception')
|
||||||
|
continue
|
||||||
|
|
||||||
labels = line.split('.')
|
labels = line.split('.')
|
||||||
|
|
||||||
|
@ -179,6 +193,13 @@ def lint_psl(infile):
|
||||||
# else:
|
# else:
|
||||||
# group.append(list(reversed(line.split('.'))))
|
# group.append(list(reversed(line.split('.'))))
|
||||||
|
|
||||||
|
if flags & PSL_FLAG_EXCEPTION and len(labels) > 1:
|
||||||
|
domain = ".".join(str(label) for label in labels[1:])
|
||||||
|
if not domain in line2flag:
|
||||||
|
error('Exception without previous wildcard')
|
||||||
|
elif not line2flag[domain] & PSL_FLAG_WILDCARD:
|
||||||
|
error('Exception without previous wildcard')
|
||||||
|
|
||||||
for label in labels:
|
for label in labels:
|
||||||
if not label:
|
if not label:
|
||||||
error('Leading/trailing or multiple dot')
|
error('Leading/trailing or multiple dot')
|
||||||
|
@ -208,11 +229,26 @@ def lint_psl(infile):
|
||||||
!foo.bar + *.foo.bar
|
!foo.bar + *.foo.bar
|
||||||
'''
|
'''
|
||||||
error('Found doublette/ambiguity (previous line was %d)' % line2number[line])
|
error('Found doublette/ambiguity (previous line was %d)' % line2number[line])
|
||||||
continue
|
|
||||||
|
|
||||||
line2number[line] = nline
|
line2number[line] = nline
|
||||||
line2flag[line] = flags
|
line2flag[line] = flags
|
||||||
|
|
||||||
|
orig_line = None
|
||||||
|
|
||||||
|
if section == PSL_FLAG_ICANN:
|
||||||
|
error('ICANN section not closed')
|
||||||
|
elif section == PSL_FLAG_PRIVATE:
|
||||||
|
error('PRIVATE section not closed')
|
||||||
|
|
||||||
|
if icann_sections < 1:
|
||||||
|
warning('No ICANN section found')
|
||||||
|
elif icann_sections > 1:
|
||||||
|
warning('%d ICANN sections found' % icann_sections)
|
||||||
|
|
||||||
|
if private_sections < 1:
|
||||||
|
warning('No PRIVATE section found')
|
||||||
|
elif private_sections > 1:
|
||||||
|
warning('%d PRIVATE sections found' % private_sections)
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
"""Prints the usage"""
|
"""Prints the usage"""
|
|
@ -0,0 +1,20 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
rc=0
|
||||||
|
rm -rf log
|
||||||
|
mkdir -p log
|
||||||
|
|
||||||
|
for file in `ls *.input|cut -d'.' -f1`; do
|
||||||
|
echo -n "${file}: "
|
||||||
|
./pslint.py ${file}.input >log/${file}.log 2>&1
|
||||||
|
diff ${file}.expected log/${file}.log >log/${file}.diff
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo OK
|
||||||
|
rm log/${file}.diff
|
||||||
|
else
|
||||||
|
echo FAILED
|
||||||
|
rc=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
exit $rc
|
|
@ -0,0 +1,4 @@
|
||||||
|
10: error: Illegal character: 'a.exam#ple.com'
|
||||||
|
11: error: Illegal character: 'b.exam ple.com'
|
||||||
|
13: error: Invalid UTF-8 character: 'd.测è¯'
|
||||||
|
15: warning: No PRIVATE section found
|
|
@ -0,0 +1,15 @@
|
||||||
|
// test:
|
||||||
|
// - label contains illegal character
|
||||||
|
// - c. is valid UTF-8
|
||||||
|
// - d. has invalid UTF-8 code for the TLD
|
||||||
|
//
|
||||||
|
// best viewed with 'LC_ALL=C vi <filename>'
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
a.exam#ple.com
|
||||||
|
b.exam ple.com
|
||||||
|
c.测试
|
||||||
|
d.测è¯
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,4 @@
|
||||||
|
9: error: Leading/trailing or multiple dot: '.a.example.com'
|
||||||
|
10: error: Leading/trailing or multiple dot: 'b.example.com.'
|
||||||
|
11: error: Leading/trailing or multiple dot: 'c..example.com'
|
||||||
|
13: warning: No PRIVATE section found
|
|
@ -0,0 +1,13 @@
|
||||||
|
// test:
|
||||||
|
// - leading dot
|
||||||
|
// - trailing dot
|
||||||
|
// - consecutive dots
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
// example.com: https://www.iana.org/domains/reserved
|
||||||
|
.a.example.com
|
||||||
|
b.example.com.
|
||||||
|
c..example.com
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,6 @@
|
||||||
|
9: error: Found doublette/ambiguity (previous line was 8): '*.com'
|
||||||
|
13: error: Found doublette/ambiguity (previous line was 12): '!www.com'
|
||||||
|
17: error: Found doublette/ambiguity (previous line was 16): '*.example.com'
|
||||||
|
21: error: Found doublette/ambiguity (previous line was 20): 'example1.com'
|
||||||
|
24: error: Found doublette/ambiguity (previous line was 17): 'example.com'
|
||||||
|
26: warning: No PRIVATE section found
|
|
@ -0,0 +1,26 @@
|
||||||
|
// test:
|
||||||
|
// - valid wildcard usage
|
||||||
|
// - invalid wildcard usage
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
// *.com implicitely includes .com
|
||||||
|
com
|
||||||
|
*.com
|
||||||
|
|
||||||
|
// double exception
|
||||||
|
!www.com
|
||||||
|
!www.com
|
||||||
|
|
||||||
|
// double wildcard
|
||||||
|
*.example.com
|
||||||
|
*.example.com
|
||||||
|
|
||||||
|
// double plain rule
|
||||||
|
example1.com
|
||||||
|
example1.com
|
||||||
|
|
||||||
|
// redundant/overlapping rule
|
||||||
|
example.com
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,6 @@
|
||||||
|
17: error: Leading/trailing or multiple dot: '!.example.com'
|
||||||
|
18: error: Illegal character: 'w!w.example.com'
|
||||||
|
19: error: Found doublette/ambiguity (previous line was 12): '!www.example.com'
|
||||||
|
20: error: Exception without previous wildcard: '!a.b.example.com'
|
||||||
|
21: error: Exception without previous wildcard: '!a.c.example.com'
|
||||||
|
23: warning: No PRIVATE section found
|
|
@ -0,0 +1,23 @@
|
||||||
|
// test:
|
||||||
|
// - valid exception
|
||||||
|
// - invalid exceptions
|
||||||
|
// - same exception twice
|
||||||
|
// - exception without wildcard
|
||||||
|
// - exception with prevailing '*' rule (!localhost)
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
// valid
|
||||||
|
*.example.com
|
||||||
|
!www.example.com
|
||||||
|
!localhost
|
||||||
|
c.example.com
|
||||||
|
|
||||||
|
// invalid
|
||||||
|
!.example.com
|
||||||
|
w!w.example.com
|
||||||
|
!www.example.com
|
||||||
|
!a.b.example.com
|
||||||
|
!a.c.example.com
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,3 @@
|
||||||
|
7: error: Punycode found: 'a.xn--0zwm56d'
|
||||||
|
8: error: Double minus found: 'a.ex--ample.com'
|
||||||
|
10: warning: No PRIVATE section found
|
|
@ -0,0 +1,10 @@
|
||||||
|
// test:
|
||||||
|
// - label is punycode
|
||||||
|
// - label has double minus
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
a.xn--0zwm56d
|
||||||
|
a.ex--ample.com
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,3 @@
|
||||||
|
4: error: Rule outside of section: 'example.com'
|
||||||
|
4: warning: No ICANN section found
|
||||||
|
4: warning: No PRIVATE section found
|
|
@ -0,0 +1,4 @@
|
||||||
|
// test:
|
||||||
|
// - no section at all
|
||||||
|
|
||||||
|
example.com
|
|
@ -0,0 +1,2 @@
|
||||||
|
11: warning: 2 ICANN sections found
|
||||||
|
11: warning: No PRIVATE section found
|
|
@ -0,0 +1,11 @@
|
||||||
|
// test:
|
||||||
|
// - two ICANN sections
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
example.com
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,2 @@
|
||||||
|
11: warning: No ICANN section found
|
||||||
|
11: warning: 2 PRIVATE sections found
|
|
@ -0,0 +1,11 @@
|
||||||
|
// test:
|
||||||
|
// - two PRIVATE sections
|
||||||
|
|
||||||
|
// ===BEGIN PRIVATE DOMAINS===
|
||||||
|
|
||||||
|
example.com
|
||||||
|
|
||||||
|
// ===END PRIVATE DOMAINS===
|
||||||
|
|
||||||
|
// ===BEGIN PRIVATE DOMAINS===
|
||||||
|
// ===END PRIVATE DOMAINS===
|
|
@ -0,0 +1,3 @@
|
||||||
|
8: error: Unexpected end of section: '// ===END PRIVATE DOMAINS==='
|
||||||
|
8: error: ICANN section not closed
|
||||||
|
8: warning: No PRIVATE section found
|
|
@ -0,0 +1,8 @@
|
||||||
|
// test:
|
||||||
|
// - ICANN section improperly closed
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
example.com
|
||||||
|
|
||||||
|
// ===END PRIVATE DOMAINS===
|
|
@ -0,0 +1,7 @@
|
||||||
|
12: warning: Leading/Trailing whitespace: '// example.com: https://www.iana.org/domains/reserved'
|
||||||
|
13: warning: Leading/Trailing whitespace: ' a.example.com'
|
||||||
|
14: warning: Leading/Trailing whitespace: 'b.example.com '
|
||||||
|
15: warning: Leading/Trailing whitespace: '\tc.example.com'
|
||||||
|
16: warning: Leading/Trailing whitespace: 'd.example.com\t'
|
||||||
|
17: warning: Leading/Trailing whitespace: 'e.example.com^M'
|
||||||
|
19: warning: No PRIVATE section found
|
|
@ -0,0 +1,19 @@
|
||||||
|
// test:
|
||||||
|
// - leading space
|
||||||
|
// - trailing space, empty line with spaces
|
||||||
|
// - leading tab
|
||||||
|
// - trailing tab
|
||||||
|
// - line ends with CRLF
|
||||||
|
// - empty line with spaces
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
// example.com: https://www.iana.org/domains/reserved
|
||||||
|
a.example.com
|
||||||
|
b.example.com
|
||||||
|
c.example.com
|
||||||
|
d.example.com
|
||||||
|
e.example.com
|
||||||
|
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
|
@ -0,0 +1,5 @@
|
||||||
|
11: error: Illegal character: '**.com'
|
||||||
|
12: error: Illegal character: 'a*.com'
|
||||||
|
13: error: Illegal character: 'b.*.com'
|
||||||
|
14: error: Illegal character: 'a.b.*'
|
||||||
|
16: warning: No PRIVATE section found
|
|
@ -0,0 +1,16 @@
|
||||||
|
// test:
|
||||||
|
// - valid wildcard usage
|
||||||
|
// - invalid wildcard usage
|
||||||
|
|
||||||
|
// ===BEGIN ICANN DOMAINS===
|
||||||
|
|
||||||
|
// valid
|
||||||
|
*.com
|
||||||
|
|
||||||
|
// invalid
|
||||||
|
**.com
|
||||||
|
a*.com
|
||||||
|
b.*.com
|
||||||
|
a.b.*
|
||||||
|
|
||||||
|
// ===END ICANN DOMAINS===
|
Loading…
Reference in New Issue