-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_twin4j.py
46 lines (32 loc) · 1.18 KB
/
parse_twin4j.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import glob
import re
def find_featured_community_member(lines):
for idx, line in enumerate(lines):
matches = re.match("=== Featured Community Member(s)?:", line)
if matches:
return idx
return -1
def filter_lines(lines):
filtered_lines = []
exclude = False
for line in lines:
if line.startswith("++++"):
if exclude:
exclude = False
else:
exclude = True
if not exclude:
filtered_lines.append(line)
return filtered_lines
paths = glob.glob("/Users/markneedham/projects/twin4j/adoc/*.adoc")
for path in paths:
with open(path, "r") as twin4j_file:
lines = twin4j_file.readlines()
lines = [line for line in filter_lines(lines) if len(line.strip()) > 0]
index = find_featured_community_member(lines)
potential = lines[index + 1: index + 10]
valid_lines = [line for line in potential
if "twitter" in line or "linkedin" in line or "featured community member" in line]
for line in valid_lines:
matches = re.match("(https?://.*)\[(.*)\^?\]", line)
print(line, matches)