New linter/ dir with pslint.py selftest

2016-02-18 16:40:06 +01:00 · 2016-02-18 16:40:06 +01:00 · 2914afa8c7
parent 811513f17e
commit 2914afa8c7
26 changed files with 282 additions and 21 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,8 +1,9 @@
 *.gz
 *.o
 *.lo
 *.la
 *.exe
 *.gz
 *.la
 *.lo
 *.log
 *.o
 *~
 */.deps
 */.libs
--- a/Makefile.am
+++ b/Makefile.am
@ -14,6 +14,9 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = libpsl.pc
 clean-local:
 	rm -rf $(srcdir)/linter/log
 EXTRA_DIST = config.rpath LICENSE
 dist-hook:
 	mkdir -p $(distdir)/list/tests
--- a/linter/pslint.py
+++ b/linter/pslint.py
@ -27,18 +27,19 @@ import sys
 nline = 0
 line = ""
 orig_line = ""
 warnings = 0
 errors = 0
 skip_order_check = False
 def warning(msg):
-	global warnings, line, nline
+	global warnings, orig_line, nline
-	print('%d: warning: %s%s' % (nline, msg, ": \'" + line + "\'" if line else ""))
+	print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
 	warnings += 1
 def error(msg):
-	global errors, line, nline
+	global errors, orig_line, nline
-	print('%d: error: %s%s' % (nline, msg, ": \'" + line + "\'" if line else ""))
+	print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
 	errors += 1
 #	skip_order_check = True
@ -73,6 +74,7 @@ def check_order(group):
 			warning('Incorrectly sorted group of domains')
 			print("  " + str(group))
 			print("  " + str(sorted_group))
                        print("Correct sorting would be:")
 			print_psl(sorted_group)
 	finally:
@ -81,7 +83,7 @@ def check_order(group):
 def lint_psl(infile):
 	"""Parses PSL file and performs syntax checking"""
-	global line, nline
+	global orig_line, nline
 	PSL_FLAG_EXCEPTION = (1<<0)
 	PSL_FLAG_WILDCARD = (1<<1)
@ -91,10 +93,12 @@ def lint_psl(infile):
 	line2number = {}
 	line2flag = {}
 	section = 0
 	group = []
 	section = 0
 	icann_sections = 0
 	private_sections = 0
-	lines = [line.strip('\r\n') for line in infile]
+	lines = [line.strip('\n') for line in infile]
 	for line in lines:
 		nline += 1
@ -102,23 +106,28 @@ def lint_psl(infile):
 		# check for leadind/trailing whitespace
 		stripped = line.strip()
 		if stripped != line:
 			line = line.replace('\t','\\t')
 			line = line.replace('\r','^M')
 			warning('Leading/Trailing whitespace')
 		orig_line = line
 		line = stripped
 		# empty line (end of sorted domain group)
 		if not line:
-			check_order(group)
+			# check_order(group)
 			continue
 		# check for section begin/end
 		if line[0:2] == "//":
-			check_order(group)
+			# check_order(group)
 			if section == 0:
 				if line == "// ===BEGIN ICANN DOMAINS===":
 					section = PSL_FLAG_ICANN
 					icann_sections += 1
 				elif line == "// ===BEGIN PRIVATE DOMAINS===":
 					section = PSL_FLAG_PRIVATE
 					private_sections += 1
 				elif line[3:11] == "===BEGIN":
 					error('Unexpected begin of unknown section')
 				elif line[3:9] == "===END":
@ -148,7 +157,11 @@ def lint_psl(infile):
 		# decode UTF-8 input into unicode, needed only for python 2.x
 		if sys.version_info[0] < 3:
 			try:
 				line = line.decode('utf-8')
 			except UnicodeDecodeError:
 				error('Invalid UTF-8 character')
 				continue
 		# each rule must be lowercase (or more exactly: not uppercase and not titlecase)
 		if line != line.lower():
@ -170,6 +183,7 @@ def lint_psl(infile):
 		# wildcard and exception must not combine
 		if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
 			error('Combination of wildcard and exception')
 			continue
 		labels = line.split('.')
@ -179,6 +193,13 @@ def lint_psl(infile):
 #		else:
 #			group.append(list(reversed(line.split('.'))))
 		if flags & PSL_FLAG_EXCEPTION and len(labels) > 1:
 			domain = ".".join(str(label) for label in labels[1:])
 			if not domain in line2flag:
 				error('Exception without previous wildcard')
 			elif not line2flag[domain] & PSL_FLAG_WILDCARD:
 				error('Exception without previous wildcard')
 		for label in labels:
 			if not label:
 				error('Leading/trailing or multiple dot')
@ -208,11 +229,26 @@ def lint_psl(infile):
 			     !foo.bar + *.foo.bar
 			'''
 			error('Found doublette/ambiguity (previous line was %d)' % line2number[line])
 			continue
 		line2number[line] = nline
 		line2flag[line] = flags
 	orig_line = None
 	if section == PSL_FLAG_ICANN:
 		error('ICANN section not closed')
 	elif section == PSL_FLAG_PRIVATE:
 		error('PRIVATE section not closed')
 	if icann_sections < 1:
 		warning('No ICANN section found')
 	elif icann_sections > 1:
 		warning('%d ICANN sections found' % icann_sections)
 	if private_sections < 1:
 		warning('No PRIVATE section found')
 	elif private_sections > 1:
 		warning('%d PRIVATE sections found' % private_sections)
 def usage():
 	"""Prints the usage"""
--- a/linter/pslint_selftest.sh
+++ b/linter/pslint_selftest.sh
@ -0,0 +1,20 @@
 #!/bin/sh
 rc=0
 rm -rf log
 mkdir -p log
 for file in `ls *.input|cut -d'.' -f1`; do
  echo -n "${file}: "
  ./pslint.py ${file}.input >log/${file}.log 2>&1
  diff ${file}.expected log/${file}.log >log/${file}.diff
  if [ $? -eq 0 ]; then
    echo OK
    rm log/${file}.diff
  else
    echo FAILED
    rc=1
  fi
 done
 exit $rc
--- a/linter/test_allowedchars.expected
+++ b/linter/test_allowedchars.expected
@ -0,0 +1,4 @@
 10: error: Illegal character: 'a.exam#ple.com'
 11: error: Illegal character: 'b.exam ple.com'
 13: error: Invalid UTF-8 character: 'd.æµ‹è¯'
 15: warning: No PRIVATE section found
--- a/linter/test_allowedchars.input
+++ b/linter/test_allowedchars.input
@ -0,0 +1,15 @@
 // test:
 // - label contains illegal character
 // - c. is valid UTF-8
 // - d. has invalid UTF-8 code for the TLD
 //
 // best viewed with 'LC_ALL=C vi <filename>'
 // ===BEGIN ICANN DOMAINS===
 a.exam#ple.com
 b.exam ple.com
 c.æµ‹è¯•
 d.æµ‹è¯
 // ===END ICANN DOMAINS===
--- a/linter/test_dots.expected
+++ b/linter/test_dots.expected
@ -0,0 +1,4 @@
 9: error: Leading/trailing or multiple dot: '.a.example.com'
 10: error: Leading/trailing or multiple dot: 'b.example.com.'
 11: error: Leading/trailing or multiple dot: 'c..example.com'
 13: warning: No PRIVATE section found
--- a/linter/test_dots.input
+++ b/linter/test_dots.input
@ -0,0 +1,13 @@
 // test:
 // - leading dot
 // - trailing dot
 // - consecutive dots
 // ===BEGIN ICANN DOMAINS===
 // example.com: https://www.iana.org/domains/reserved
 .a.example.com
 b.example.com.
 c..example.com
 // ===END ICANN DOMAINS===
--- a/linter/test_duplicate.expected
+++ b/linter/test_duplicate.expected
@ -0,0 +1,6 @@
 9: error: Found doublette/ambiguity (previous line was 8): '*.com'
 13: error: Found doublette/ambiguity (previous line was 12): '!www.com'
 17: error: Found doublette/ambiguity (previous line was 16): '*.example.com'
 21: error: Found doublette/ambiguity (previous line was 20): 'example1.com'
 24: error: Found doublette/ambiguity (previous line was 17): 'example.com'
 26: warning: No PRIVATE section found
--- a/linter/test_duplicate.input
+++ b/linter/test_duplicate.input
@ -0,0 +1,26 @@
 // test:
 // - valid wildcard usage
 // - invalid wildcard usage
 // ===BEGIN ICANN DOMAINS===
 // *.com implicitely includes .com
 com
 *.com
 // double exception
 !www.com
 !www.com
 // double wildcard
 *.example.com
 *.example.com
 // double plain rule
 example1.com
 example1.com
 // redundant/overlapping rule
 example.com
 // ===END ICANN DOMAINS===
--- a/linter/test_exception.expected
+++ b/linter/test_exception.expected
@ -0,0 +1,6 @@
 17: error: Leading/trailing or multiple dot: '!.example.com'
 18: error: Illegal character: 'w!w.example.com'
 19: error: Found doublette/ambiguity (previous line was 12): '!www.example.com'
 20: error: Exception without previous wildcard: '!a.b.example.com'
 21: error: Exception without previous wildcard: '!a.c.example.com'
 23: warning: No PRIVATE section found
--- a/linter/test_exception.input
+++ b/linter/test_exception.input
@ -0,0 +1,23 @@
 // test:
 // - valid exception
 // - invalid exceptions
 // - same exception twice
 // - exception without wildcard
 // - exception with prevailing '*' rule (!localhost)
 // ===BEGIN ICANN DOMAINS===
 // valid
 *.example.com
 !www.example.com
 !localhost
 c.example.com
 // invalid
 !.example.com
 w!w.example.com
 !www.example.com
 !a.b.example.com
 !a.c.example.com
 // ===END ICANN DOMAINS===
--- a/linter/test_punycode.expected
+++ b/linter/test_punycode.expected
@ -0,0 +1,3 @@
 7: error: Punycode found: 'a.xn--0zwm56d'
 8: error: Double minus found: 'a.ex--ample.com'
 10: warning: No PRIVATE section found
--- a/linter/test_punycode.input
+++ b/linter/test_punycode.input
@ -0,0 +1,10 @@
 // test:
 // - label is punycode
 // - label has double minus
 // ===BEGIN ICANN DOMAINS===
 a.xn--0zwm56d
 a.ex--ample.com
 // ===END ICANN DOMAINS===
--- a/linter/test_section1.expected
+++ b/linter/test_section1.expected
@ -0,0 +1,3 @@
 4: error: Rule outside of section: 'example.com'
 4: warning: No ICANN section found
 4: warning: No PRIVATE section found
--- a/linter/test_section1.input
+++ b/linter/test_section1.input
@ -0,0 +1,4 @@
 // test:
 // - no section at all
 example.com
--- a/linter/test_section2.expected
+++ b/linter/test_section2.expected
@ -0,0 +1,2 @@
 11: warning: 2 ICANN sections found
 11: warning: No PRIVATE section found
--- a/linter/test_section2.input
+++ b/linter/test_section2.input
@ -0,0 +1,11 @@
 // test:
 // - two ICANN sections
 // ===BEGIN ICANN DOMAINS===
 example.com
 // ===END ICANN DOMAINS===
 // ===BEGIN ICANN DOMAINS===
 // ===END ICANN DOMAINS===
--- a/linter/test_section3.expected
+++ b/linter/test_section3.expected
@ -0,0 +1,2 @@
 11: warning: No ICANN section found
 11: warning: 2 PRIVATE sections found
--- a/linter/test_section3.input
+++ b/linter/test_section3.input
@ -0,0 +1,11 @@
 // test:
 // - two PRIVATE sections
 // ===BEGIN PRIVATE DOMAINS===
 example.com
 // ===END PRIVATE DOMAINS===
 // ===BEGIN PRIVATE DOMAINS===
 // ===END PRIVATE DOMAINS===
--- a/linter/test_section4.expected
+++ b/linter/test_section4.expected
@ -0,0 +1,3 @@
 8: error: Unexpected end of section: '// ===END PRIVATE DOMAINS==='
 8: error: ICANN section not closed
 8: warning: No PRIVATE section found
--- a/linter/test_section4.input
+++ b/linter/test_section4.input
@ -0,0 +1,8 @@
 // test:
 // - ICANN section improperly closed
 // ===BEGIN ICANN DOMAINS===
 example.com
 // ===END PRIVATE DOMAINS===
--- a/linter/test_spaces.expected
+++ b/linter/test_spaces.expected
@ -0,0 +1,7 @@
 12: warning: Leading/Trailing whitespace: '// example.com: https://www.iana.org/domains/reserved'
 13: warning: Leading/Trailing whitespace: ' a.example.com'
 14: warning: Leading/Trailing whitespace: 'b.example.com '
 15: warning: Leading/Trailing whitespace: '\tc.example.com'
 16: warning: Leading/Trailing whitespace: 'd.example.com\t'
 17: warning: Leading/Trailing whitespace: 'e.example.com^M'
 19: warning: No PRIVATE section found
--- a/linter/test_spaces.input
+++ b/linter/test_spaces.input
@ -0,0 +1,19 @@
 // test:
 // - leading space
 // - trailing space, empty line with spaces
 // - leading tab
 // - trailing tab
 // - line ends with CRLF
 // - empty line with spaces
 // ===BEGIN ICANN DOMAINS===
 // example.com: https://www.iana.org/domains/reserved
 a.example.com
 b.example.com 
 	c.example.com
 d.example.com	
 e.example.com
 // ===END ICANN DOMAINS===
--- a/linter/test_wildcard.expected
+++ b/linter/test_wildcard.expected
@ -0,0 +1,5 @@
 11: error: Illegal character: '**.com'
 12: error: Illegal character: 'a*.com'
 13: error: Illegal character: 'b.*.com'
 14: error: Illegal character: 'a.b.*'
 16: warning: No PRIVATE section found
--- a/linter/test_wildcard.input
+++ b/linter/test_wildcard.input
@ -0,0 +1,16 @@
 // test:
 // - valid wildcard usage
 // - invalid wildcard usage
 // ===BEGIN ICANN DOMAINS===
 // valid
 *.com
 // invalid
 **.com
 a*.com
 b.*.com
 a.b.*
 // ===END ICANN DOMAINS===
		`@ -0,0 +1,2 @@`
							`11: warning: 2 ICANN sections found`
							`11: warning: No PRIVATE section found`
		`@ -0,0 +1,2 @@`
							`11: warning: No ICANN section found`
							`11: warning: 2 PRIVATE sections found`