Add PSL linter written in Python
This commit is contained in:
parent
8c39291f55
commit
3ba8903915
|
@ -0,0 +1,185 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-#
|
||||||
|
#
|
||||||
|
# PSL linter written in python
|
||||||
|
#
|
||||||
|
# Copyright 2016 Tim Rühsen (tim dot ruehsen at gmx dot de). All rights reserved.
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
# copy of this software and associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
# Software is furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
# DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
nline = 0
|
||||||
|
line = ""
|
||||||
|
warnings = 0
|
||||||
|
errors = 0
|
||||||
|
|
||||||
|
def warning(msg):
|
||||||
|
global warnings, line, nline
|
||||||
|
print('%d: warning: %s: \'%s\'' % (nline, msg, line))
|
||||||
|
warnings += 1
|
||||||
|
|
||||||
|
def error(msg):
|
||||||
|
global errors, line, nline
|
||||||
|
print('%d: error: %s: \'%s\'' % (nline, msg, line))
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
def lint_psl(infile):
|
||||||
|
"""Parses PSL file and extract strings and return code"""
|
||||||
|
global line, nline
|
||||||
|
|
||||||
|
PSL_FLAG_EXCEPTION = (1<<0)
|
||||||
|
PSL_FLAG_WILDCARD = (1<<1)
|
||||||
|
PSL_FLAG_ICANN = (1<<2) # entry of ICANN section
|
||||||
|
PSL_FLAG_PRIVATE = (1<<3) # entry of PRIVATE section
|
||||||
|
PSL_FLAG_PLAIN = (1<<4) #just used for PSL syntax checking
|
||||||
|
|
||||||
|
psl = {}
|
||||||
|
section = 0
|
||||||
|
|
||||||
|
lines = [line.strip('\r\n') for line in infile]
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
nline += 1
|
||||||
|
|
||||||
|
# check for leadind/trailing whitespace
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped != line:
|
||||||
|
warning('Leading/Trailing whitespace')
|
||||||
|
line = stripped
|
||||||
|
|
||||||
|
# empty line
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# check for section begin/end
|
||||||
|
if line[0:2] == "//":
|
||||||
|
if section == 0:
|
||||||
|
if line == "// ===BEGIN ICANN DOMAINS===":
|
||||||
|
section = PSL_FLAG_ICANN
|
||||||
|
elif line == "// ===BEGIN PRIVATE DOMAINS===":
|
||||||
|
section = PSL_FLAG_PRIVATE
|
||||||
|
elif line[3:8] == "===BEGIN":
|
||||||
|
error('Unexpected begin of unknown section')
|
||||||
|
elif line[3:6] == "===END":
|
||||||
|
error('End of section without previous begin')
|
||||||
|
elif section == PSL_FLAG_ICANN:
|
||||||
|
if line == "// ===END ICANN DOMAINS===":
|
||||||
|
section = 0
|
||||||
|
elif line[3:8] == "===BEGIN":
|
||||||
|
error('Unexpected begin of section: ')
|
||||||
|
elif line[3:6] == "===END":
|
||||||
|
error('Unexpected end of section')
|
||||||
|
elif section == PSL_FLAG_PRIVATE:
|
||||||
|
if line == "// ===END ICANN DOMAINS===":
|
||||||
|
section = 0
|
||||||
|
elif line[3:8] == "===BEGIN":
|
||||||
|
error('Unexpected begin of section')
|
||||||
|
elif line[3:6] == "===END":
|
||||||
|
error('Unexpected end of section')
|
||||||
|
|
||||||
|
continue # processing of comments ends here
|
||||||
|
|
||||||
|
# No rule must be outside of a section
|
||||||
|
if section == 0:
|
||||||
|
error('Rule outside of section')
|
||||||
|
|
||||||
|
# decode UTF-8 input into unicode, needed only for python 2.x
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
line = line.decode('utf-8')
|
||||||
|
|
||||||
|
# each rule must be lowercase (or more exactly: not uppercase and not titlecase)
|
||||||
|
if line != line.lower():
|
||||||
|
error('Rule must be lowercase')
|
||||||
|
|
||||||
|
# strip leading wildcards
|
||||||
|
flags = 0
|
||||||
|
# while line[0:2] == '*.':
|
||||||
|
if line[0:2] == '*.':
|
||||||
|
flags = PSL_FLAG_WILDCARD | PSL_FLAG_PLAIN | section
|
||||||
|
line = line[2:]
|
||||||
|
|
||||||
|
if line[0] == '!':
|
||||||
|
flags = PSL_FLAG_EXCEPTION | section
|
||||||
|
line = line[1:]
|
||||||
|
|
||||||
|
# wildcard and exception must not combine
|
||||||
|
if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
|
||||||
|
error('Combination of wildcard and exception')
|
||||||
|
|
||||||
|
labels = line.split('.')
|
||||||
|
|
||||||
|
for label in labels:
|
||||||
|
if not label:
|
||||||
|
error('Leading/trailing or multiple dot')
|
||||||
|
continue
|
||||||
|
|
||||||
|
if label[0:4] == 'xn--':
|
||||||
|
error('Punycode found')
|
||||||
|
continue
|
||||||
|
|
||||||
|
if '--' in label:
|
||||||
|
error('Double minus found')
|
||||||
|
continue
|
||||||
|
|
||||||
|
# allowed are a-z,0-9,- and unicode >= 128 (maybe that can be finetuned a bit !?)
|
||||||
|
for c in label:
|
||||||
|
if not c.isalnum() and c != '-' and ord(c) < 128:
|
||||||
|
error('Illegal character')
|
||||||
|
break
|
||||||
|
|
||||||
|
if line in psl:
|
||||||
|
"""Found existing entry:
|
||||||
|
Combination of exception and plain rule is ambiguous
|
||||||
|
!foo.bar
|
||||||
|
foo.bar
|
||||||
|
|
||||||
|
Allowed:
|
||||||
|
!foo.bar + *.foo.bar
|
||||||
|
foo.bar + *.foo.bar
|
||||||
|
"""
|
||||||
|
error('Found doublette/ambiguity (previous line was %d)' % psl[line])
|
||||||
|
continue
|
||||||
|
|
||||||
|
psl[line] = nline
|
||||||
|
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
"""Prints the usage"""
|
||||||
|
print('usage: %s PSLfile' % sys.argv[0])
|
||||||
|
print('or %s - # To read PSL from STDIN' % sys.argv[0])
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Check syntax of a PSL file"""
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
if sys.argv[-1] == '-':
|
||||||
|
lint_psl(sys.stdin)
|
||||||
|
else:
|
||||||
|
with open(sys.argv[-1], 'r') as infile:
|
||||||
|
lint_psl(infile)
|
||||||
|
|
||||||
|
return errors != 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
Loading…
Reference in New Issue