New linter/ dir with pslint.py selftest

This commit is contained in:
Tim Rühsen 2016-02-18 16:40:06 +01:00
parent 811513f17e
commit 2914afa8c7
26 changed files with 282 additions and 21 deletions

9
.gitignore vendored
View File

@ -1,8 +1,9 @@
*.gz
*.o
*.lo
*.la
*.exe *.exe
*.gz
*.la
*.lo
*.log
*.o
*~ *~
*/.deps */.deps
*/.libs */.libs

View File

@ -14,6 +14,9 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
pkgconfigdir = $(libdir)/pkgconfig pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libpsl.pc pkgconfig_DATA = libpsl.pc
clean-local:
rm -rf $(srcdir)/linter/log
EXTRA_DIST = config.rpath LICENSE EXTRA_DIST = config.rpath LICENSE
dist-hook: dist-hook:
mkdir -p $(distdir)/list/tests mkdir -p $(distdir)/list/tests

View File

@ -27,18 +27,19 @@ import sys
nline = 0 nline = 0
line = "" line = ""
orig_line = ""
warnings = 0 warnings = 0
errors = 0 errors = 0
skip_order_check = False skip_order_check = False
def warning(msg): def warning(msg):
global warnings, line, nline global warnings, orig_line, nline
print('%d: warning: %s%s' % (nline, msg, ": \'" + line + "\'" if line else "")) print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
warnings += 1 warnings += 1
def error(msg): def error(msg):
global errors, line, nline global errors, orig_line, nline
print('%d: error: %s%s' % (nline, msg, ": \'" + line + "\'" if line else "")) print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
errors += 1 errors += 1
# skip_order_check = True # skip_order_check = True
@ -73,6 +74,7 @@ def check_order(group):
warning('Incorrectly sorted group of domains') warning('Incorrectly sorted group of domains')
print(" " + str(group)) print(" " + str(group))
print(" " + str(sorted_group)) print(" " + str(sorted_group))
print("Correct sorting would be:")
print_psl(sorted_group) print_psl(sorted_group)
finally: finally:
@ -81,7 +83,7 @@ def check_order(group):
def lint_psl(infile): def lint_psl(infile):
"""Parses PSL file and performs syntax checking""" """Parses PSL file and performs syntax checking"""
global line, nline global orig_line, nline
PSL_FLAG_EXCEPTION = (1<<0) PSL_FLAG_EXCEPTION = (1<<0)
PSL_FLAG_WILDCARD = (1<<1) PSL_FLAG_WILDCARD = (1<<1)
@ -91,10 +93,12 @@ def lint_psl(infile):
line2number = {} line2number = {}
line2flag = {} line2flag = {}
section = 0
group = [] group = []
section = 0
icann_sections = 0
private_sections = 0
lines = [line.strip('\r\n') for line in infile] lines = [line.strip('\n') for line in infile]
for line in lines: for line in lines:
nline += 1 nline += 1
@ -102,23 +106,28 @@ def lint_psl(infile):
# check for leadind/trailing whitespace # check for leadind/trailing whitespace
stripped = line.strip() stripped = line.strip()
if stripped != line: if stripped != line:
line = line.replace('\t','\\t')
line = line.replace('\r','^M')
warning('Leading/Trailing whitespace') warning('Leading/Trailing whitespace')
orig_line = line
line = stripped line = stripped
# empty line (end of sorted domain group) # empty line (end of sorted domain group)
if not line: if not line:
check_order(group) # check_order(group)
continue continue
# check for section begin/end # check for section begin/end
if line[0:2] == "//": if line[0:2] == "//":
check_order(group) # check_order(group)
if section == 0: if section == 0:
if line == "// ===BEGIN ICANN DOMAINS===": if line == "// ===BEGIN ICANN DOMAINS===":
section = PSL_FLAG_ICANN section = PSL_FLAG_ICANN
icann_sections += 1
elif line == "// ===BEGIN PRIVATE DOMAINS===": elif line == "// ===BEGIN PRIVATE DOMAINS===":
section = PSL_FLAG_PRIVATE section = PSL_FLAG_PRIVATE
private_sections += 1
elif line[3:11] == "===BEGIN": elif line[3:11] == "===BEGIN":
error('Unexpected begin of unknown section') error('Unexpected begin of unknown section')
elif line[3:9] == "===END": elif line[3:9] == "===END":
@ -148,7 +157,11 @@ def lint_psl(infile):
# decode UTF-8 input into unicode, needed only for python 2.x # decode UTF-8 input into unicode, needed only for python 2.x
if sys.version_info[0] < 3: if sys.version_info[0] < 3:
try:
line = line.decode('utf-8') line = line.decode('utf-8')
except UnicodeDecodeError:
error('Invalid UTF-8 character')
continue
# each rule must be lowercase (or more exactly: not uppercase and not titlecase) # each rule must be lowercase (or more exactly: not uppercase and not titlecase)
if line != line.lower(): if line != line.lower():
@ -170,6 +183,7 @@ def lint_psl(infile):
# wildcard and exception must not combine # wildcard and exception must not combine
if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION: if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
error('Combination of wildcard and exception') error('Combination of wildcard and exception')
continue
labels = line.split('.') labels = line.split('.')
@ -179,6 +193,13 @@ def lint_psl(infile):
# else: # else:
# group.append(list(reversed(line.split('.')))) # group.append(list(reversed(line.split('.'))))
if flags & PSL_FLAG_EXCEPTION and len(labels) > 1:
domain = ".".join(str(label) for label in labels[1:])
if not domain in line2flag:
error('Exception without previous wildcard')
elif not line2flag[domain] & PSL_FLAG_WILDCARD:
error('Exception without previous wildcard')
for label in labels: for label in labels:
if not label: if not label:
error('Leading/trailing or multiple dot') error('Leading/trailing or multiple dot')
@ -208,11 +229,26 @@ def lint_psl(infile):
!foo.bar + *.foo.bar !foo.bar + *.foo.bar
''' '''
error('Found doublette/ambiguity (previous line was %d)' % line2number[line]) error('Found doublette/ambiguity (previous line was %d)' % line2number[line])
continue
line2number[line] = nline line2number[line] = nline
line2flag[line] = flags line2flag[line] = flags
orig_line = None
if section == PSL_FLAG_ICANN:
error('ICANN section not closed')
elif section == PSL_FLAG_PRIVATE:
error('PRIVATE section not closed')
if icann_sections < 1:
warning('No ICANN section found')
elif icann_sections > 1:
warning('%d ICANN sections found' % icann_sections)
if private_sections < 1:
warning('No PRIVATE section found')
elif private_sections > 1:
warning('%d PRIVATE sections found' % private_sections)
def usage(): def usage():
"""Prints the usage""" """Prints the usage"""

20
linter/pslint_selftest.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/sh
rc=0
rm -rf log
mkdir -p log
for file in `ls *.input|cut -d'.' -f1`; do
echo -n "${file}: "
./pslint.py ${file}.input >log/${file}.log 2>&1
diff ${file}.expected log/${file}.log >log/${file}.diff
if [ $? -eq 0 ]; then
echo OK
rm log/${file}.diff
else
echo FAILED
rc=1
fi
done
exit $rc

View File

@ -0,0 +1,4 @@
10: error: Illegal character: 'a.exam#ple.com'
11: error: Illegal character: 'b.exam ple.com'
13: error: Invalid UTF-8 character: 'd.测è¯'
15: warning: No PRIVATE section found

View File

@ -0,0 +1,15 @@
// test:
// - label contains illegal character
// - c. is valid UTF-8
// - d. has invalid UTF-8 code for the TLD
//
// best viewed with 'LC_ALL=C vi <filename>'
// ===BEGIN ICANN DOMAINS===
a.exam#ple.com
b.exam ple.com
c.测试
d.测è¯
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,4 @@
9: error: Leading/trailing or multiple dot: '.a.example.com'
10: error: Leading/trailing or multiple dot: 'b.example.com.'
11: error: Leading/trailing or multiple dot: 'c..example.com'
13: warning: No PRIVATE section found

13
linter/test_dots.input Normal file
View File

@ -0,0 +1,13 @@
// test:
// - leading dot
// - trailing dot
// - consecutive dots
// ===BEGIN ICANN DOMAINS===
// example.com: https://www.iana.org/domains/reserved
.a.example.com
b.example.com.
c..example.com
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,6 @@
9: error: Found doublette/ambiguity (previous line was 8): '*.com'
13: error: Found doublette/ambiguity (previous line was 12): '!www.com'
17: error: Found doublette/ambiguity (previous line was 16): '*.example.com'
21: error: Found doublette/ambiguity (previous line was 20): 'example1.com'
24: error: Found doublette/ambiguity (previous line was 17): 'example.com'
26: warning: No PRIVATE section found

View File

@ -0,0 +1,26 @@
// test:
// - valid wildcard usage
// - invalid wildcard usage
// ===BEGIN ICANN DOMAINS===
// *.com implicitely includes .com
com
*.com
// double exception
!www.com
!www.com
// double wildcard
*.example.com
*.example.com
// double plain rule
example1.com
example1.com
// redundant/overlapping rule
example.com
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,6 @@
17: error: Leading/trailing or multiple dot: '!.example.com'
18: error: Illegal character: 'w!w.example.com'
19: error: Found doublette/ambiguity (previous line was 12): '!www.example.com'
20: error: Exception without previous wildcard: '!a.b.example.com'
21: error: Exception without previous wildcard: '!a.c.example.com'
23: warning: No PRIVATE section found

View File

@ -0,0 +1,23 @@
// test:
// - valid exception
// - invalid exceptions
// - same exception twice
// - exception without wildcard
// - exception with prevailing '*' rule (!localhost)
// ===BEGIN ICANN DOMAINS===
// valid
*.example.com
!www.example.com
!localhost
c.example.com
// invalid
!.example.com
w!w.example.com
!www.example.com
!a.b.example.com
!a.c.example.com
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,3 @@
7: error: Punycode found: 'a.xn--0zwm56d'
8: error: Double minus found: 'a.ex--ample.com'
10: warning: No PRIVATE section found

View File

@ -0,0 +1,10 @@
// test:
// - label is punycode
// - label has double minus
// ===BEGIN ICANN DOMAINS===
a.xn--0zwm56d
a.ex--ample.com
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,3 @@
4: error: Rule outside of section: 'example.com'
4: warning: No ICANN section found
4: warning: No PRIVATE section found

View File

@ -0,0 +1,4 @@
// test:
// - no section at all
example.com

View File

@ -0,0 +1,2 @@
11: warning: 2 ICANN sections found
11: warning: No PRIVATE section found

View File

@ -0,0 +1,11 @@
// test:
// - two ICANN sections
// ===BEGIN ICANN DOMAINS===
example.com
// ===END ICANN DOMAINS===
// ===BEGIN ICANN DOMAINS===
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,2 @@
11: warning: No ICANN section found
11: warning: 2 PRIVATE sections found

View File

@ -0,0 +1,11 @@
// test:
// - two PRIVATE sections
// ===BEGIN PRIVATE DOMAINS===
example.com
// ===END PRIVATE DOMAINS===
// ===BEGIN PRIVATE DOMAINS===
// ===END PRIVATE DOMAINS===

View File

@ -0,0 +1,3 @@
8: error: Unexpected end of section: '// ===END PRIVATE DOMAINS==='
8: error: ICANN section not closed
8: warning: No PRIVATE section found

View File

@ -0,0 +1,8 @@
// test:
// - ICANN section improperly closed
// ===BEGIN ICANN DOMAINS===
example.com
// ===END PRIVATE DOMAINS===

View File

@ -0,0 +1,7 @@
12: warning: Leading/Trailing whitespace: '// example.com: https://www.iana.org/domains/reserved'
13: warning: Leading/Trailing whitespace: ' a.example.com'
14: warning: Leading/Trailing whitespace: 'b.example.com '
15: warning: Leading/Trailing whitespace: '\tc.example.com'
16: warning: Leading/Trailing whitespace: 'd.example.com\t'
17: warning: Leading/Trailing whitespace: 'e.example.com^M'
19: warning: No PRIVATE section found

19
linter/test_spaces.input Normal file
View File

@ -0,0 +1,19 @@
// test:
// - leading space
// - trailing space, empty line with spaces
// - leading tab
// - trailing tab
// - line ends with CRLF
// - empty line with spaces
// ===BEGIN ICANN DOMAINS===
// example.com: https://www.iana.org/domains/reserved
a.example.com
b.example.com
c.example.com
d.example.com
e.example.com
// ===END ICANN DOMAINS===

View File

@ -0,0 +1,5 @@
11: error: Illegal character: '**.com'
12: error: Illegal character: 'a*.com'
13: error: Illegal character: 'b.*.com'
14: error: Illegal character: 'a.b.*'
16: warning: No PRIVATE section found

View File

@ -0,0 +1,16 @@
// test:
// - valid wildcard usage
// - invalid wildcard usage
// ===BEGIN ICANN DOMAINS===
// valid
*.com
// invalid
**.com
a*.com
b.*.com
a.b.*
// ===END ICANN DOMAINS===