diff --git a/Makefile.am b/Makefile.am index 482f2db..3904754 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,9 +14,6 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS} pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libpsl.pc -clean-local: - rm -rf $(srcdir)/linter/log - EXTRA_DIST = config.rpath LICENSE dist-hook: mkdir -p $(distdir)/list/tests diff --git a/linter/pslint.py b/linter/pslint.py deleted file mode 100755 index f8fc2d6..0000000 --- a/linter/pslint.py +++ /dev/null @@ -1,272 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*-# -# -# PSL linter written in python -# -# Copyright 2016 Tim Rühsen (tim dot ruehsen at gmx dot de). All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import sys - -nline = 0 -line = "" -orig_line = "" -warnings = 0 -errors = 0 -skip_order_check = False - -def warning(msg): - global warnings, orig_line, nline - print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else "")) - warnings += 1 - -def error(msg): - global errors, orig_line, nline - print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else "")) - errors += 1 -# skip_order_check = True - -def print_psl(list): - for domain in list: - print(".".join(str(label) for label in reversed(domain))) - -def psl_key(s): - if s[0] == '*': - return 0 - if s[0] == '!': - return 1 - return 2 - -def check_order(group): - """Check the correct order of a domain group""" - global skip_order_check - - try: - if skip_order_check or len(group) < 2: - skip_order_check = False - return - - # check if the TLD is the identical within the group - if any(group[0][0] != labels[0] for labels in group): - warning('Domain group TLD is not consistent') - - # sort by # of labels, label-by-label (labels are in reversed order) - sorted_group = sorted(group, key = lambda labels: (len(labels), psl_key(labels[-1][0]), labels)) - - if group != sorted_group: - warning('Incorrectly sorted group of domains') - print(" " + str(group)) - print(" " + str(sorted_group)) - print("Correct sorting would be:") - print_psl(sorted_group) - - finally: - del group[:] - - -def lint_psl(infile): - """Parses PSL file and performs syntax checking""" - global orig_line, nline - - PSL_FLAG_EXCEPTION = (1<<0) - PSL_FLAG_WILDCARD = (1<<1) - PSL_FLAG_ICANN = (1<<2) # entry of ICANN section - PSL_FLAG_PRIVATE = (1<<3) # entry of PRIVATE section - PSL_FLAG_PLAIN = (1<<4) #just used for PSL syntax checking - - line2number = {} - line2flag = {} - group = [] - section = 0 - icann_sections = 0 - private_sections = 0 - - lines = [line.strip('\n') for line in infile] - - for line in lines: - nline += 1 - - # check for leadind/trailing whitespace - stripped = line.strip() - if stripped != line: - line = line.replace('\t','\\t') - line = line.replace('\r','^M') - warning('Leading/Trailing whitespace') - orig_line = line - line = stripped - - # empty line (end of sorted domain group) - if not line: - # check_order(group) - continue - - # check for section begin/end - if line[0:2] == "//": - # check_order(group) - - if section == 0: - if line == "// ===BEGIN ICANN DOMAINS===": - section = PSL_FLAG_ICANN - icann_sections += 1 - elif line == "// ===BEGIN PRIVATE DOMAINS===": - section = PSL_FLAG_PRIVATE - private_sections += 1 - elif line[3:11] == "===BEGIN": - error('Unexpected begin of unknown section') - elif line[3:9] == "===END": - error('End of section without previous begin') - elif section == PSL_FLAG_ICANN: - if line == "// ===END ICANN DOMAINS===": - section = 0 - elif line[3:11] == "===BEGIN": - error('Unexpected begin of section: ') - elif line[3:9] == "===END": - error('Unexpected end of section') - elif section == PSL_FLAG_PRIVATE: - if line == "// ===END PRIVATE DOMAINS===": - section = 0 - elif line[3:11] == "===BEGIN": - error('Unexpected begin of section') - elif line[3:9] == "===END": - error('Unexpected end of section') - - continue # processing of comments ends here - - # No rule must be outside of a section - if section == 0: - error('Rule outside of section') - - group.append(list(reversed(line.split('.')))) - - # decode UTF-8 input into unicode, needed only for python 2.x - if sys.version_info[0] < 3: - try: - line = line.decode('utf-8') - except UnicodeDecodeError: - error('Invalid UTF-8 character') - continue - - # each rule must be lowercase (or more exactly: not uppercase and not titlecase) - if line != line.lower(): - error('Rule must be lowercase') - - # strip leading wildcards - flags = section - # while line[0:2] == '*.': - if line[0:2] == '*.': - flags |= PSL_FLAG_WILDCARD - line = line[2:] - - if line[0] == '!': - flags |= PSL_FLAG_EXCEPTION - line = line[1:] - else: - flags |= PSL_FLAG_PLAIN - - # wildcard and exception must not combine - if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION: - error('Combination of wildcard and exception') - continue - - labels = line.split('.') - - # collect reversed list of labels -# if sys.version_info[0] < 3: -# group.append(list(reversed(line.encode('utf-8').split('.')))) -# else: -# group.append(list(reversed(line.split('.')))) - - if flags & PSL_FLAG_EXCEPTION and len(labels) > 1: - domain = ".".join(str(label) for label in labels[1:]) - if not domain in line2flag: - error('Exception without previous wildcard') - elif not line2flag[domain] & PSL_FLAG_WILDCARD: - error('Exception without previous wildcard') - - for label in labels: - if not label: - error('Leading/trailing or multiple dot') - continue - - if label[0:4] == 'xn--': - error('Punycode found') - continue - - if '--' in label: - error('Double minus found') - continue - - # allowed are a-z,0-9,- and unicode >= 128 (maybe that can be finetuned a bit !?) - for c in label: - if not c.isalnum() and c != '-' and ord(c) < 128: - error('Illegal character') - break - - if line in line2flag: - '''Found existing entry: - Combination of exception and plain rule is contradictionary - !foo.bar + foo.bar - Doublette, since *.foo.bar implies foo.bar: - foo.bar + *.foo.bar - Allowed: - !foo.bar + *.foo.bar - ''' - error('Found doublette/ambiguity (previous line was %d)' % line2number[line]) - - line2number[line] = nline - line2flag[line] = flags - - orig_line = None - - if section == PSL_FLAG_ICANN: - error('ICANN section not closed') - elif section == PSL_FLAG_PRIVATE: - error('PRIVATE section not closed') - - if icann_sections < 1: - warning('No ICANN section found') - elif icann_sections > 1: - warning('%d ICANN sections found' % icann_sections) - - if private_sections < 1: - warning('No PRIVATE section found') - elif private_sections > 1: - warning('%d PRIVATE sections found' % private_sections) - -def usage(): - """Prints the usage""" - print('usage: %s PSLfile' % sys.argv[0]) - print('or %s - # To read PSL from STDIN' % sys.argv[0]) - exit(1) - - -def main(): - """Check syntax of a PSL file""" - if len(sys.argv) < 2: - usage() - - with sys.stdin if sys.argv[-1] == '-' else open(sys.argv[-1], 'r') as infile: - lint_psl(infile) - - return errors != 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/linter/pslint_selftest.sh b/linter/pslint_selftest.sh deleted file mode 100755 index 583e838..0000000 --- a/linter/pslint_selftest.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -rc=0 -rm -rf log -mkdir -p log - -for file in `ls *.input|cut -d'.' -f1`; do - echo -n "${file}: " - ./pslint.py ${file}.input >log/${file}.log 2>&1 - diff ${file}.expected log/${file}.log >log/${file}.diff - if [ $? -eq 0 ]; then - echo OK - rm log/${file}.diff - else - echo FAILED - rc=1 - fi -done - -exit $rc diff --git a/linter/test_allowedchars.expected b/linter/test_allowedchars.expected deleted file mode 100644 index 58e0b69..0000000 --- a/linter/test_allowedchars.expected +++ /dev/null @@ -1,4 +0,0 @@ -10: error: Illegal character: 'a.exam#ple.com' -11: error: Illegal character: 'b.exam ple.com' -13: error: Invalid UTF-8 character: 'd.测è¯' -15: warning: No PRIVATE section found diff --git a/linter/test_allowedchars.input b/linter/test_allowedchars.input deleted file mode 100644 index f6d8778..0000000 --- a/linter/test_allowedchars.input +++ /dev/null @@ -1,15 +0,0 @@ -// test: -// - label contains illegal character -// - c. is valid UTF-8 -// - d. has invalid UTF-8 code for the TLD -// -// best viewed with 'LC_ALL=C vi ' - -// ===BEGIN ICANN DOMAINS=== - -a.exam#ple.com -b.exam ple.com -c.测试 -d.æµ‹è¯ - -// ===END ICANN DOMAINS=== diff --git a/linter/test_dots.expected b/linter/test_dots.expected deleted file mode 100644 index 1987840..0000000 --- a/linter/test_dots.expected +++ /dev/null @@ -1,4 +0,0 @@ -9: error: Leading/trailing or multiple dot: '.a.example.com' -10: error: Leading/trailing or multiple dot: 'b.example.com.' -11: error: Leading/trailing or multiple dot: 'c..example.com' -13: warning: No PRIVATE section found diff --git a/linter/test_dots.input b/linter/test_dots.input deleted file mode 100644 index 3290441..0000000 --- a/linter/test_dots.input +++ /dev/null @@ -1,13 +0,0 @@ -// test: -// - leading dot -// - trailing dot -// - consecutive dots - -// ===BEGIN ICANN DOMAINS=== - -// example.com: https://www.iana.org/domains/reserved -.a.example.com -b.example.com. -c..example.com - -// ===END ICANN DOMAINS=== diff --git a/linter/test_duplicate.expected b/linter/test_duplicate.expected deleted file mode 100644 index 40a5e34..0000000 --- a/linter/test_duplicate.expected +++ /dev/null @@ -1,6 +0,0 @@ -9: error: Found doublette/ambiguity (previous line was 8): '*.com' -13: error: Found doublette/ambiguity (previous line was 12): '!www.com' -17: error: Found doublette/ambiguity (previous line was 16): '*.example.com' -21: error: Found doublette/ambiguity (previous line was 20): 'example1.com' -24: error: Found doublette/ambiguity (previous line was 17): 'example.com' -26: warning: No PRIVATE section found diff --git a/linter/test_duplicate.input b/linter/test_duplicate.input deleted file mode 100644 index 48cd9ff..0000000 --- a/linter/test_duplicate.input +++ /dev/null @@ -1,26 +0,0 @@ -// test: -// - valid wildcard usage -// - invalid wildcard usage - -// ===BEGIN ICANN DOMAINS=== - -// *.com implicitely includes .com -com -*.com - -// double exception -!www.com -!www.com - -// double wildcard -*.example.com -*.example.com - -// double plain rule -example1.com -example1.com - -// redundant/overlapping rule -example.com - -// ===END ICANN DOMAINS=== diff --git a/linter/test_exception.expected b/linter/test_exception.expected deleted file mode 100644 index f656d6f..0000000 --- a/linter/test_exception.expected +++ /dev/null @@ -1,6 +0,0 @@ -17: error: Leading/trailing or multiple dot: '!.example.com' -18: error: Illegal character: 'w!w.example.com' -19: error: Found doublette/ambiguity (previous line was 12): '!www.example.com' -20: error: Exception without previous wildcard: '!a.b.example.com' -21: error: Exception without previous wildcard: '!a.c.example.com' -23: warning: No PRIVATE section found diff --git a/linter/test_exception.input b/linter/test_exception.input deleted file mode 100644 index 96b1d7b..0000000 --- a/linter/test_exception.input +++ /dev/null @@ -1,23 +0,0 @@ -// test: -// - valid exception -// - invalid exceptions -// - same exception twice -// - exception without wildcard -// - exception with prevailing '*' rule (!localhost) - -// ===BEGIN ICANN DOMAINS=== - -// valid -*.example.com -!www.example.com -!localhost -c.example.com - -// invalid -!.example.com -w!w.example.com -!www.example.com -!a.b.example.com -!a.c.example.com - -// ===END ICANN DOMAINS=== diff --git a/linter/test_punycode.expected b/linter/test_punycode.expected deleted file mode 100644 index 1c4ab0e..0000000 --- a/linter/test_punycode.expected +++ /dev/null @@ -1,3 +0,0 @@ -7: error: Punycode found: 'a.xn--0zwm56d' -8: error: Double minus found: 'a.ex--ample.com' -10: warning: No PRIVATE section found diff --git a/linter/test_punycode.input b/linter/test_punycode.input deleted file mode 100644 index be49210..0000000 --- a/linter/test_punycode.input +++ /dev/null @@ -1,10 +0,0 @@ -// test: -// - label is punycode -// - label has double minus - -// ===BEGIN ICANN DOMAINS=== - -a.xn--0zwm56d -a.ex--ample.com - -// ===END ICANN DOMAINS=== diff --git a/linter/test_section1.expected b/linter/test_section1.expected deleted file mode 100644 index d5eedfb..0000000 --- a/linter/test_section1.expected +++ /dev/null @@ -1,3 +0,0 @@ -4: error: Rule outside of section: 'example.com' -4: warning: No ICANN section found -4: warning: No PRIVATE section found diff --git a/linter/test_section1.input b/linter/test_section1.input deleted file mode 100644 index 25f6b66..0000000 --- a/linter/test_section1.input +++ /dev/null @@ -1,4 +0,0 @@ -// test: -// - no section at all - -example.com diff --git a/linter/test_section2.expected b/linter/test_section2.expected deleted file mode 100644 index a5eafb9..0000000 --- a/linter/test_section2.expected +++ /dev/null @@ -1,2 +0,0 @@ -11: warning: 2 ICANN sections found -11: warning: No PRIVATE section found diff --git a/linter/test_section2.input b/linter/test_section2.input deleted file mode 100644 index 90f40a2..0000000 --- a/linter/test_section2.input +++ /dev/null @@ -1,11 +0,0 @@ -// test: -// - two ICANN sections - -// ===BEGIN ICANN DOMAINS=== - -example.com - -// ===END ICANN DOMAINS=== - -// ===BEGIN ICANN DOMAINS=== -// ===END ICANN DOMAINS=== diff --git a/linter/test_section3.expected b/linter/test_section3.expected deleted file mode 100644 index fd28121..0000000 --- a/linter/test_section3.expected +++ /dev/null @@ -1,2 +0,0 @@ -11: warning: No ICANN section found -11: warning: 2 PRIVATE sections found diff --git a/linter/test_section3.input b/linter/test_section3.input deleted file mode 100644 index f3af185..0000000 --- a/linter/test_section3.input +++ /dev/null @@ -1,11 +0,0 @@ -// test: -// - two PRIVATE sections - -// ===BEGIN PRIVATE DOMAINS=== - -example.com - -// ===END PRIVATE DOMAINS=== - -// ===BEGIN PRIVATE DOMAINS=== -// ===END PRIVATE DOMAINS=== diff --git a/linter/test_section4.expected b/linter/test_section4.expected deleted file mode 100644 index e8a0906..0000000 --- a/linter/test_section4.expected +++ /dev/null @@ -1,3 +0,0 @@ -8: error: Unexpected end of section: '// ===END PRIVATE DOMAINS===' -8: error: ICANN section not closed -8: warning: No PRIVATE section found diff --git a/linter/test_section4.input b/linter/test_section4.input deleted file mode 100644 index 98ca338..0000000 --- a/linter/test_section4.input +++ /dev/null @@ -1,8 +0,0 @@ -// test: -// - ICANN section improperly closed - -// ===BEGIN ICANN DOMAINS=== - -example.com - -// ===END PRIVATE DOMAINS=== diff --git a/linter/test_spaces.expected b/linter/test_spaces.expected deleted file mode 100644 index 63650ed..0000000 --- a/linter/test_spaces.expected +++ /dev/null @@ -1,7 +0,0 @@ -12: warning: Leading/Trailing whitespace: '// example.com: https://www.iana.org/domains/reserved' -13: warning: Leading/Trailing whitespace: ' a.example.com' -14: warning: Leading/Trailing whitespace: 'b.example.com ' -15: warning: Leading/Trailing whitespace: '\tc.example.com' -16: warning: Leading/Trailing whitespace: 'd.example.com\t' -17: warning: Leading/Trailing whitespace: 'e.example.com^M' -19: warning: No PRIVATE section found diff --git a/linter/test_spaces.input b/linter/test_spaces.input deleted file mode 100644 index f84f866..0000000 --- a/linter/test_spaces.input +++ /dev/null @@ -1,19 +0,0 @@ -// test: -// - leading space -// - trailing space, empty line with spaces -// - leading tab -// - trailing tab -// - line ends with CRLF -// - empty line with spaces - -// ===BEGIN ICANN DOMAINS=== - -// example.com: https://www.iana.org/domains/reserved - a.example.com -b.example.com - c.example.com -d.example.com -e.example.com - - -// ===END ICANN DOMAINS=== diff --git a/linter/test_wildcard.expected b/linter/test_wildcard.expected deleted file mode 100644 index 5457094..0000000 --- a/linter/test_wildcard.expected +++ /dev/null @@ -1,5 +0,0 @@ -11: error: Illegal character: '**.com' -12: error: Illegal character: 'a*.com' -13: error: Illegal character: 'b.*.com' -14: error: Illegal character: 'a.b.*' -16: warning: No PRIVATE section found diff --git a/linter/test_wildcard.input b/linter/test_wildcard.input deleted file mode 100644 index 1bfb05a..0000000 --- a/linter/test_wildcard.input +++ /dev/null @@ -1,16 +0,0 @@ -// test: -// - valid wildcard usage -// - invalid wildcard usage - -// ===BEGIN ICANN DOMAINS=== - -// valid -*.com - -// invalid -**.com -a*.com -b.*.com -a.b.* - -// ===END ICANN DOMAINS=== diff --git a/list b/list index 1f3ad51..e2f2f4b 160000 --- a/list +++ b/list @@ -1 +1 @@ -Subproject commit 1f3ad51171235aafe423435606e869f0161582e4 +Subproject commit e2f2f4bfe2ae57651afb7268bb9a0b53da5eb8cf