Remove python linter
It has been merged into https://github.com/publicsuffix/list and can be found in list/linter.
This commit is contained in:
parent
cb3fc3b502
commit
f097290c90
|
@ -14,9 +14,6 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
|
|||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = libpsl.pc
|
||||
|
||||
clean-local:
|
||||
rm -rf $(srcdir)/linter/log
|
||||
|
||||
EXTRA_DIST = config.rpath LICENSE
|
||||
dist-hook:
|
||||
mkdir -p $(distdir)/list/tests
|
||||
|
|
272
linter/pslint.py
272
linter/pslint.py
|
@ -1,272 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-#
|
||||
#
|
||||
# PSL linter written in python
|
||||
#
|
||||
# Copyright 2016 Tim Rühsen (tim dot ruehsen at gmx dot de). All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import sys
|
||||
|
||||
nline = 0
|
||||
line = ""
|
||||
orig_line = ""
|
||||
warnings = 0
|
||||
errors = 0
|
||||
skip_order_check = False
|
||||
|
||||
def warning(msg):
|
||||
global warnings, orig_line, nline
|
||||
print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
|
||||
warnings += 1
|
||||
|
||||
def error(msg):
|
||||
global errors, orig_line, nline
|
||||
print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
|
||||
errors += 1
|
||||
# skip_order_check = True
|
||||
|
||||
def print_psl(list):
|
||||
for domain in list:
|
||||
print(".".join(str(label) for label in reversed(domain)))
|
||||
|
||||
def psl_key(s):
|
||||
if s[0] == '*':
|
||||
return 0
|
||||
if s[0] == '!':
|
||||
return 1
|
||||
return 2
|
||||
|
||||
def check_order(group):
|
||||
"""Check the correct order of a domain group"""
|
||||
global skip_order_check
|
||||
|
||||
try:
|
||||
if skip_order_check or len(group) < 2:
|
||||
skip_order_check = False
|
||||
return
|
||||
|
||||
# check if the TLD is the identical within the group
|
||||
if any(group[0][0] != labels[0] for labels in group):
|
||||
warning('Domain group TLD is not consistent')
|
||||
|
||||
# sort by # of labels, label-by-label (labels are in reversed order)
|
||||
sorted_group = sorted(group, key = lambda labels: (len(labels), psl_key(labels[-1][0]), labels))
|
||||
|
||||
if group != sorted_group:
|
||||
warning('Incorrectly sorted group of domains')
|
||||
print(" " + str(group))
|
||||
print(" " + str(sorted_group))
|
||||
print("Correct sorting would be:")
|
||||
print_psl(sorted_group)
|
||||
|
||||
finally:
|
||||
del group[:]
|
||||
|
||||
|
||||
def lint_psl(infile):
|
||||
"""Parses PSL file and performs syntax checking"""
|
||||
global orig_line, nline
|
||||
|
||||
PSL_FLAG_EXCEPTION = (1<<0)
|
||||
PSL_FLAG_WILDCARD = (1<<1)
|
||||
PSL_FLAG_ICANN = (1<<2) # entry of ICANN section
|
||||
PSL_FLAG_PRIVATE = (1<<3) # entry of PRIVATE section
|
||||
PSL_FLAG_PLAIN = (1<<4) #just used for PSL syntax checking
|
||||
|
||||
line2number = {}
|
||||
line2flag = {}
|
||||
group = []
|
||||
section = 0
|
||||
icann_sections = 0
|
||||
private_sections = 0
|
||||
|
||||
lines = [line.strip('\n') for line in infile]
|
||||
|
||||
for line in lines:
|
||||
nline += 1
|
||||
|
||||
# check for leadind/trailing whitespace
|
||||
stripped = line.strip()
|
||||
if stripped != line:
|
||||
line = line.replace('\t','\\t')
|
||||
line = line.replace('\r','^M')
|
||||
warning('Leading/Trailing whitespace')
|
||||
orig_line = line
|
||||
line = stripped
|
||||
|
||||
# empty line (end of sorted domain group)
|
||||
if not line:
|
||||
# check_order(group)
|
||||
continue
|
||||
|
||||
# check for section begin/end
|
||||
if line[0:2] == "//":
|
||||
# check_order(group)
|
||||
|
||||
if section == 0:
|
||||
if line == "// ===BEGIN ICANN DOMAINS===":
|
||||
section = PSL_FLAG_ICANN
|
||||
icann_sections += 1
|
||||
elif line == "// ===BEGIN PRIVATE DOMAINS===":
|
||||
section = PSL_FLAG_PRIVATE
|
||||
private_sections += 1
|
||||
elif line[3:11] == "===BEGIN":
|
||||
error('Unexpected begin of unknown section')
|
||||
elif line[3:9] == "===END":
|
||||
error('End of section without previous begin')
|
||||
elif section == PSL_FLAG_ICANN:
|
||||
if line == "// ===END ICANN DOMAINS===":
|
||||
section = 0
|
||||
elif line[3:11] == "===BEGIN":
|
||||
error('Unexpected begin of section: ')
|
||||
elif line[3:9] == "===END":
|
||||
error('Unexpected end of section')
|
||||
elif section == PSL_FLAG_PRIVATE:
|
||||
if line == "// ===END PRIVATE DOMAINS===":
|
||||
section = 0
|
||||
elif line[3:11] == "===BEGIN":
|
||||
error('Unexpected begin of section')
|
||||
elif line[3:9] == "===END":
|
||||
error('Unexpected end of section')
|
||||
|
||||
continue # processing of comments ends here
|
||||
|
||||
# No rule must be outside of a section
|
||||
if section == 0:
|
||||
error('Rule outside of section')
|
||||
|
||||
group.append(list(reversed(line.split('.'))))
|
||||
|
||||
# decode UTF-8 input into unicode, needed only for python 2.x
|
||||
if sys.version_info[0] < 3:
|
||||
try:
|
||||
line = line.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
error('Invalid UTF-8 character')
|
||||
continue
|
||||
|
||||
# each rule must be lowercase (or more exactly: not uppercase and not titlecase)
|
||||
if line != line.lower():
|
||||
error('Rule must be lowercase')
|
||||
|
||||
# strip leading wildcards
|
||||
flags = section
|
||||
# while line[0:2] == '*.':
|
||||
if line[0:2] == '*.':
|
||||
flags |= PSL_FLAG_WILDCARD
|
||||
line = line[2:]
|
||||
|
||||
if line[0] == '!':
|
||||
flags |= PSL_FLAG_EXCEPTION
|
||||
line = line[1:]
|
||||
else:
|
||||
flags |= PSL_FLAG_PLAIN
|
||||
|
||||
# wildcard and exception must not combine
|
||||
if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
|
||||
error('Combination of wildcard and exception')
|
||||
continue
|
||||
|
||||
labels = line.split('.')
|
||||
|
||||
# collect reversed list of labels
|
||||
# if sys.version_info[0] < 3:
|
||||
# group.append(list(reversed(line.encode('utf-8').split('.'))))
|
||||
# else:
|
||||
# group.append(list(reversed(line.split('.'))))
|
||||
|
||||
if flags & PSL_FLAG_EXCEPTION and len(labels) > 1:
|
||||
domain = ".".join(str(label) for label in labels[1:])
|
||||
if not domain in line2flag:
|
||||
error('Exception without previous wildcard')
|
||||
elif not line2flag[domain] & PSL_FLAG_WILDCARD:
|
||||
error('Exception without previous wildcard')
|
||||
|
||||
for label in labels:
|
||||
if not label:
|
||||
error('Leading/trailing or multiple dot')
|
||||
continue
|
||||
|
||||
if label[0:4] == 'xn--':
|
||||
error('Punycode found')
|
||||
continue
|
||||
|
||||
if '--' in label:
|
||||
error('Double minus found')
|
||||
continue
|
||||
|
||||
# allowed are a-z,0-9,- and unicode >= 128 (maybe that can be finetuned a bit !?)
|
||||
for c in label:
|
||||
if not c.isalnum() and c != '-' and ord(c) < 128:
|
||||
error('Illegal character')
|
||||
break
|
||||
|
||||
if line in line2flag:
|
||||
'''Found existing entry:
|
||||
Combination of exception and plain rule is contradictionary
|
||||
!foo.bar + foo.bar
|
||||
Doublette, since *.foo.bar implies foo.bar:
|
||||
foo.bar + *.foo.bar
|
||||
Allowed:
|
||||
!foo.bar + *.foo.bar
|
||||
'''
|
||||
error('Found doublette/ambiguity (previous line was %d)' % line2number[line])
|
||||
|
||||
line2number[line] = nline
|
||||
line2flag[line] = flags
|
||||
|
||||
orig_line = None
|
||||
|
||||
if section == PSL_FLAG_ICANN:
|
||||
error('ICANN section not closed')
|
||||
elif section == PSL_FLAG_PRIVATE:
|
||||
error('PRIVATE section not closed')
|
||||
|
||||
if icann_sections < 1:
|
||||
warning('No ICANN section found')
|
||||
elif icann_sections > 1:
|
||||
warning('%d ICANN sections found' % icann_sections)
|
||||
|
||||
if private_sections < 1:
|
||||
warning('No PRIVATE section found')
|
||||
elif private_sections > 1:
|
||||
warning('%d PRIVATE sections found' % private_sections)
|
||||
|
||||
def usage():
|
||||
"""Prints the usage"""
|
||||
print('usage: %s PSLfile' % sys.argv[0])
|
||||
print('or %s - # To read PSL from STDIN' % sys.argv[0])
|
||||
exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
"""Check syntax of a PSL file"""
|
||||
if len(sys.argv) < 2:
|
||||
usage()
|
||||
|
||||
with sys.stdin if sys.argv[-1] == '-' else open(sys.argv[-1], 'r') as infile:
|
||||
lint_psl(infile)
|
||||
|
||||
return errors != 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
|
@ -1,20 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
rc=0
|
||||
rm -rf log
|
||||
mkdir -p log
|
||||
|
||||
for file in `ls *.input|cut -d'.' -f1`; do
|
||||
echo -n "${file}: "
|
||||
./pslint.py ${file}.input >log/${file}.log 2>&1
|
||||
diff ${file}.expected log/${file}.log >log/${file}.diff
|
||||
if [ $? -eq 0 ]; then
|
||||
echo OK
|
||||
rm log/${file}.diff
|
||||
else
|
||||
echo FAILED
|
||||
rc=1
|
||||
fi
|
||||
done
|
||||
|
||||
exit $rc
|
|
@ -1,4 +0,0 @@
|
|||
10: error: Illegal character: 'a.exam#ple.com'
|
||||
11: error: Illegal character: 'b.exam ple.com'
|
||||
13: error: Invalid UTF-8 character: 'd.测è¯'
|
||||
15: warning: No PRIVATE section found
|
|
@ -1,15 +0,0 @@
|
|||
// test:
|
||||
// - label contains illegal character
|
||||
// - c. is valid UTF-8
|
||||
// - d. has invalid UTF-8 code for the TLD
|
||||
//
|
||||
// best viewed with 'LC_ALL=C vi <filename>'
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
a.exam#ple.com
|
||||
b.exam ple.com
|
||||
c.测试
|
||||
d.测è¯
|
||||
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,4 +0,0 @@
|
|||
9: error: Leading/trailing or multiple dot: '.a.example.com'
|
||||
10: error: Leading/trailing or multiple dot: 'b.example.com.'
|
||||
11: error: Leading/trailing or multiple dot: 'c..example.com'
|
||||
13: warning: No PRIVATE section found
|
|
@ -1,13 +0,0 @@
|
|||
// test:
|
||||
// - leading dot
|
||||
// - trailing dot
|
||||
// - consecutive dots
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
// example.com: https://www.iana.org/domains/reserved
|
||||
.a.example.com
|
||||
b.example.com.
|
||||
c..example.com
|
||||
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,6 +0,0 @@
|
|||
9: error: Found doublette/ambiguity (previous line was 8): '*.com'
|
||||
13: error: Found doublette/ambiguity (previous line was 12): '!www.com'
|
||||
17: error: Found doublette/ambiguity (previous line was 16): '*.example.com'
|
||||
21: error: Found doublette/ambiguity (previous line was 20): 'example1.com'
|
||||
24: error: Found doublette/ambiguity (previous line was 17): 'example.com'
|
||||
26: warning: No PRIVATE section found
|
|
@ -1,26 +0,0 @@
|
|||
// test:
|
||||
// - valid wildcard usage
|
||||
// - invalid wildcard usage
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
// *.com implicitely includes .com
|
||||
com
|
||||
*.com
|
||||
|
||||
// double exception
|
||||
!www.com
|
||||
!www.com
|
||||
|
||||
// double wildcard
|
||||
*.example.com
|
||||
*.example.com
|
||||
|
||||
// double plain rule
|
||||
example1.com
|
||||
example1.com
|
||||
|
||||
// redundant/overlapping rule
|
||||
example.com
|
||||
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,6 +0,0 @@
|
|||
17: error: Leading/trailing or multiple dot: '!.example.com'
|
||||
18: error: Illegal character: 'w!w.example.com'
|
||||
19: error: Found doublette/ambiguity (previous line was 12): '!www.example.com'
|
||||
20: error: Exception without previous wildcard: '!a.b.example.com'
|
||||
21: error: Exception without previous wildcard: '!a.c.example.com'
|
||||
23: warning: No PRIVATE section found
|
|
@ -1,23 +0,0 @@
|
|||
// test:
|
||||
// - valid exception
|
||||
// - invalid exceptions
|
||||
// - same exception twice
|
||||
// - exception without wildcard
|
||||
// - exception with prevailing '*' rule (!localhost)
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
// valid
|
||||
*.example.com
|
||||
!www.example.com
|
||||
!localhost
|
||||
c.example.com
|
||||
|
||||
// invalid
|
||||
!.example.com
|
||||
w!w.example.com
|
||||
!www.example.com
|
||||
!a.b.example.com
|
||||
!a.c.example.com
|
||||
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,3 +0,0 @@
|
|||
7: error: Punycode found: 'a.xn--0zwm56d'
|
||||
8: error: Double minus found: 'a.ex--ample.com'
|
||||
10: warning: No PRIVATE section found
|
|
@ -1,10 +0,0 @@
|
|||
// test:
|
||||
// - label is punycode
|
||||
// - label has double minus
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
a.xn--0zwm56d
|
||||
a.ex--ample.com
|
||||
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,3 +0,0 @@
|
|||
4: error: Rule outside of section: 'example.com'
|
||||
4: warning: No ICANN section found
|
||||
4: warning: No PRIVATE section found
|
|
@ -1,4 +0,0 @@
|
|||
// test:
|
||||
// - no section at all
|
||||
|
||||
example.com
|
|
@ -1,2 +0,0 @@
|
|||
11: warning: 2 ICANN sections found
|
||||
11: warning: No PRIVATE section found
|
|
@ -1,11 +0,0 @@
|
|||
// test:
|
||||
// - two ICANN sections
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
example.com
|
||||
|
||||
// ===END ICANN DOMAINS===
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,2 +0,0 @@
|
|||
11: warning: No ICANN section found
|
||||
11: warning: 2 PRIVATE sections found
|
|
@ -1,11 +0,0 @@
|
|||
// test:
|
||||
// - two PRIVATE sections
|
||||
|
||||
// ===BEGIN PRIVATE DOMAINS===
|
||||
|
||||
example.com
|
||||
|
||||
// ===END PRIVATE DOMAINS===
|
||||
|
||||
// ===BEGIN PRIVATE DOMAINS===
|
||||
// ===END PRIVATE DOMAINS===
|
|
@ -1,3 +0,0 @@
|
|||
8: error: Unexpected end of section: '// ===END PRIVATE DOMAINS==='
|
||||
8: error: ICANN section not closed
|
||||
8: warning: No PRIVATE section found
|
|
@ -1,8 +0,0 @@
|
|||
// test:
|
||||
// - ICANN section improperly closed
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
example.com
|
||||
|
||||
// ===END PRIVATE DOMAINS===
|
|
@ -1,7 +0,0 @@
|
|||
12: warning: Leading/Trailing whitespace: '// example.com: https://www.iana.org/domains/reserved'
|
||||
13: warning: Leading/Trailing whitespace: ' a.example.com'
|
||||
14: warning: Leading/Trailing whitespace: 'b.example.com '
|
||||
15: warning: Leading/Trailing whitespace: '\tc.example.com'
|
||||
16: warning: Leading/Trailing whitespace: 'd.example.com\t'
|
||||
17: warning: Leading/Trailing whitespace: 'e.example.com^M'
|
||||
19: warning: No PRIVATE section found
|
|
@ -1,19 +0,0 @@
|
|||
// test:
|
||||
// - leading space
|
||||
// - trailing space, empty line with spaces
|
||||
// - leading tab
|
||||
// - trailing tab
|
||||
// - line ends with CRLF
|
||||
// - empty line with spaces
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
// example.com: https://www.iana.org/domains/reserved
|
||||
a.example.com
|
||||
b.example.com
|
||||
c.example.com
|
||||
d.example.com
|
||||
e.example.com
|
||||
|
||||
|
||||
// ===END ICANN DOMAINS===
|
|
@ -1,5 +0,0 @@
|
|||
11: error: Illegal character: '**.com'
|
||||
12: error: Illegal character: 'a*.com'
|
||||
13: error: Illegal character: 'b.*.com'
|
||||
14: error: Illegal character: 'a.b.*'
|
||||
16: warning: No PRIVATE section found
|
|
@ -1,16 +0,0 @@
|
|||
// test:
|
||||
// - valid wildcard usage
|
||||
// - invalid wildcard usage
|
||||
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
|
||||
// valid
|
||||
*.com
|
||||
|
||||
// invalid
|
||||
**.com
|
||||
a*.com
|
||||
b.*.com
|
||||
a.b.*
|
||||
|
||||
// ===END ICANN DOMAINS===
|
2
list
2
list
|
@ -1 +1 @@
|
|||
Subproject commit 1f3ad51171235aafe423435606e869f0161582e4
|
||||
Subproject commit e2f2f4bfe2ae57651afb7268bb9a0b53da5eb8cf
|
Loading…
Reference in New Issue