From e72367e0d5608794aeecf876a57b8e85bf3e3c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98istein=20S=C3=B8vik?= Date: Sun, 12 Aug 2018 18:39:45 +0200 Subject: [PATCH] Added check to match html blocks --- linter/linter_lib/linter_auto_md.py | 80 +++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 4 deletions(-) diff --git a/linter/linter_lib/linter_auto_md.py b/linter/linter_lib/linter_auto_md.py index aaa0cee92b..c6725fd69b 100644 --- a/linter/linter_lib/linter_auto_md.py +++ b/linter/linter_lib/linter_auto_md.py @@ -100,7 +100,7 @@ def generate_content(temp_content, title): content = levenshtein_lst(content, TAGS_['language'], error_line) # Changes all internal quotations to ' and ' while outer quotation is " and " content = re.sub(r'\"(.*)\"', r"'\1'", content) - if re.search(r"[^\w\sæøåÆØÅ]", content): + if re.search(r"[^\w\sæøåÆØÅ,-.?]", content): content = '"{}"'.format(content) return content @@ -435,7 +435,7 @@ def fix_headers(header): return fix_headers('## ' + header_3.group(1).strip()) # Makes sure every title has exactly one space after the last # - header = re.sub(r'^ *(#+) *(.)', r'\1 \2', header) + header = re.sub(r'^ *(#+) *(.*)', r'\1 \2', header).strip() # Removes all punctuation from titles header = re.sub(r'(#+ .*)(\.|\,|\;) *$', r'\1', header) @@ -489,7 +489,7 @@ def is_header(line): asks the user to confirm whether the line is an header. ''' - header = re.search(r'^(#+)( *)(.)', line) + header = re.search(r'^(#+)( *)([^#\r\n])', line) if not header: header_2 = re.search(r'^([^=\r\n]+)={3,}$', line) header_3 = re.search(r'^([^=\r\n]+)-{3,}$', line) @@ -672,6 +672,71 @@ def add_newline_end_of_file(md_data): return md_data +def is_html_closed(md_data_lst): + ''' This makes sure every opened html tag is properly closed, + if it is not the file should NOT be linted. + + This file returns a tuple of the form: + + (False, 410-460:[Missing]: ) + + This means there is an error in the file and that the file is missing + an somewhere from line 410 to line 460. + ''' + + html_keys = [] + last_html_line = 1 + for i, line in enumerate(md_data_lst): + line_number = i + 1 + # If we are in a html block, use regex to check for end of block, else append + # Searches for expressions of the form: + match = re.search(r'^ *< */ *(\w+).*?> *$', line) + if match: + last_closed_tag = match.group(1) + if not html_keys: + # If we find a closed tag before any open ones, return error + error_msg = ' {}-{}:[{}]: {}'.format(color_word(last_html_line+1, MAIN_2_CLR), + color_word(line_number, MAIN_2_CLR), + color_word('Missing', ERROR_CLR), + color_word(line, ERROR_CLR)) + return (False, error_msg) + if not html_keys[-1][-1] == last_closed_tag: + # This checks if the last closed tag matches the last opened tag + error_msg = ' {}-{}:[{}]: {}'.format(color_word(html_keys[-1][0]+1, MAIN_2_CLR), + color_word(line_number-1, MAIN_2_CLR), + color_word('Missing', ERROR_CLR), + color_word(''.format(html_keys[-1][-1]), ERROR_CLR)) + return (False, error_msg) + + last_html_line = line_number + del html_keys[-1] + + # This regex searches for html + match = re.search( + r'^ *(< *(\w+).*?>.*?< *\/\2.*?>|(< *(\w+).*?>)) *(\r?\n|$)', + line) + if not match: + continue + if not match.group(4): + continue + # Finds codeblocks with no end. Example: