Skip to content

Commit

Permalink
Use anly alphanumeric character in identifier (remove accents)
Browse files Browse the repository at this point in the history
Correct regex
  • Loading branch information
chdemko committed Nov 12, 2015
1 parent ac9a096 commit c42b0b1
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 70 deletions.
12 changes: 6 additions & 6 deletions pandoc-numbering-sample.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ This is the first section

Exercise #

This is the first exercise. Have also a look at the [](#second).
This is the first exercise. Have also a look at the [](#exercise:second).

> Theorem (Needed for the [second exercise](#second)) #theorem1
> Theorem (Needed for the [second exercise](#exercise:second)) #theorem:first
>
> This is a the first theorem.
> Look at the [exercise](#second "Go to the exercise #").
> Look at the [exercise](#exercise:second "Go to the exercise #").
Exercise (This is the second exercise) #second
Exercise (This is the second exercise) #exercise:second

Use [_theorem #_](#theorem1)
Use [_theorem #_](#theorem:first)

This is the second section
==========================

> Theorem #
>
> Another theorem.
> Another theorem. Can be usefull in [](#exercise:1)
Unnumbered ##

157 changes: 94 additions & 63 deletions pandoc_numbering.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
Pandoc filter to number all kinds of things.
"""

from pandocfilters import walk, stringify, Str, Space, Para, Strong, Span, Link, Emph
from pandocfilters import walk, stringify, Str, Space, Para, Strong, Span, Link, Emph, RawInline
from functools import reduce
import sys
import json
import io
import codecs
import re
import unicodedata

count = {}
numbers = {}
labels = {}
replace = None
information = {}

def toJSONFilters(actions):
"""Converts a list of actions into a filter
Expand All @@ -34,79 +34,110 @@ def toJSONFilters(actions):
format = ""

altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)

json.dump(altered, sys.stdout)

def removeAccents(string):
nfkd_form = unicodedata.normalize('NFKD', string)
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])

def toIdentifier(string):
# replace invalid characters by dash
string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))

# Remove leading digits
string = re.sub('^[^a-zA-Z]+', '', string)

return string

def numbering(key, value, format, meta):
if key == 'Para':
length = len(value)
if length >= 3 and value[length - 2] == Space() and value[length - 1]['t'] == 'Str':
last = value[length - 1]['c']
if last[0] == '#':
tag = last

if re.match('^#([a-zA-Z][\w:.-]*)?$', last):
# Is it a Para and the last element is an identifier beginning with '#'
if len(last) == 1 or last[1] != '#':
global count, numbers, labels

# Detect the title
title = None
if value[length - 3]['t'] == 'Str' and value[length - 3]['c'][-1:] == ')':
for (i, item) in enumerate(value):
if item['t'] == 'Str' and item['c'][0] == '(':
title = Emph(value[i - 1:length - 2])
value = value[:i - 1] + value[length - 2:]
length = i + 1
break

# Convert the value to a category (eliminating the '#')
category = stringify(value[:length - 2])
if category not in count:
count[category] = 0
count[category] = count[category] + 1

# Replace the '#' by the category count
value[length - 1]['c'] = str(count[category])

# Prepare the final text
text = [Strong(value)]

# Add the title to the final text
if title != None:
text.append(title)

if tag != '#':
# Store the numbers and the label for automatic numbering (See referencing function)
numbers[tag] = value[length - 1]['c']
labels[tag] = value

# Return the final text in a Span element embedded in a Para element
return Para([Span([tag[1:], [], []], text)])
else:
# Return the final text in a Para element
return Para(text)
global count, information

# Detect the title
title = None
if value[length - 3]['t'] == 'Str' and value[length - 3]['c'][-1:] == ')':
for (i, item) in enumerate(value):
if item['t'] == 'Str' and item['c'][0] == '(':
title = Emph(value[i:length - 2])
value = value[:i - 1] + value[length - 2:]
length = i + 1
break

# Convert the value to a name (eliminating the '#')
name = toIdentifier(stringify(value[:length - 2]))

# Is it a new category?
if name not in count:
count[name] = 0

count[name] = count[name] + 1

# Get the number
number = str(count[name])

# Determine the tag
if last == '#':
tag = name + ':' + number
else:
# Special case where the last element is '##...'
value[length - 1]['c'] = value[length - 1]['c'].replace('##', '#', 1)
return Para(value)
tag = last[1:]

# Replace the '#' by the name count
value[length - 1]['c'] = number

# Prepare the final text
text = [Strong(value)]

# Add the title to the final text
if title != None:
text.append(Space())
text.append(title)

# Store the numbers and the label for automatic numbering (See referencing function)
information[tag] = {'number': number, 'text': value}

# Prepare the contents
contents = [Span([tag, [], []], text)]

# Special cas for LaTeX
if format == 'latex':
contents.insert(0, RawInline('tex', '\\phantomsection'))

# Return the contents in a Para element
return Para(contents)
elif re.match('^##([a-zA-Z][\w:.-]*)?$', last):
# Special case where the last element is '##...'
value[length - 1]['c'] = value[length - 1]['c'].replace('##', '#', 1)
return Para(value)

replace = None

def referencing(key, value, format, meta):
global numbers, labels, replace
global information, replace

# Is it a link with a right tag?
# Is it a link with a right reference?
if key == 'Link':
[text, [identifier, title]] = value
if identifier in numbers:
# Replace all '#' with the corresponding number in the title
value[1][1] = title.replace('#', numbers[identifier])

if text == []:
# The link text is empty, replace it with the default label
value[0] = labels[identifier]
else:
# The link text is not empty, replace all '#' with the corresponding number
replace = numbers[identifier]
value[0] = walk(text, replacing, format, meta)
[text, [reference, title]] = value
if re.match('^#([a-zA-Z][\w:.-]*)?$', reference):
# Compute the name
tag = reference[1:]

if tag in information:
# Replace all '#' with the corresponding number in the title
value[1][1] = title.replace('#', information[tag]['number'])

if text == []:
# The link text is empty, replace it with the default label
value[0] = information[tag]['text']
else:
# The link text is not empty, replace all '#' with the corresponding number
replace = information[tag]['number']
value[0] = walk(text, replacing, format, meta)

def replacing(key, value, format, meta):
global replace
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.3.1',
version='0.3.2',

# The project's description
description='A pandoc filter for automatic numbering',
Expand Down

0 comments on commit c42b0b1

Please sign in to comment.