diff options
| author | Joris Guyonvarch | 2025-12-26 18:41:26 +0100 |
|---|---|---|
| committer | Joris Guyonvarch | 2025-12-27 20:41:44 +0100 |
| commit | a110c200e86d2325af07167531fac0f61d9681a0 (patch) | |
| tree | 90e843f915a2e153ba735849afd83710d90560bf /src/str_format.py | |
| parent | a26d92ad5055fa057647158eb79511e7b1841162 (diff) | |
Switch to GUI to manage the library
Allow to regroup the CLI and the view into one unique tool.
Diffstat (limited to 'src/str_format.py')
| -rw-r--r-- | src/str_format.py | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/src/str_format.py b/src/str_format.py new file mode 100644 index 0000000..5d8c412 --- /dev/null +++ b/src/str_format.py @@ -0,0 +1,72 @@ +import pathlib +import re +import unicodedata + +def safe_path(name): + simplified = ''.join([alnum_or_space(c) for c in unaccent(name.lower())]) + return '-'.join(simplified.split()) + +def unaccent(s): + return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn') + +def alnum_or_space(c): + if c.isalnum(): + return c + else: + return ' ' + +def cleanup_text(s, lang): + s = re.sub('\'', '’', s) + s = re.sub(r'\.\.\.', '…', s) + s = re.sub(r'\. \. \.', '…', s) + s = cleanup_double_quotes(s, lang) + s = cleanup_paragraphs(s) + + if lang == 'fr': + s = re.sub('“', '«', s) + s = re.sub('”', '»', s) + + # Replace space by insecable spaces + s = re.sub(r' ([:?\!])', r' \1', s) + s = re.sub('« ', '« ', s) + s = re.sub(' »', ' »', s) + + # Add missing insecable spaces + s = re.sub(r'([^ ]):', r'\1 :', s) + s = re.sub(r'([^ ])\?', r'\1 ?', s) + s = re.sub(r'([^ ])\!', r'\1 !', s) + s = re.sub(r'([^ ])»', r'\1 »', s) + s = re.sub(r'«([^ ])', r'« \1', s) + + elif lang == 'en': + s = re.sub('« ', '“', s) + s = re.sub(' »', '”', s) + s = re.sub('«', '“', s) + s = re.sub('»', '”', s) + + return s + +def cleanup_double_quotes(s, lang): + res = '' + quoted = False + for c in s: + if c == '"': + if quoted: + quoted = False + if lang == 'fr': + res += '»' + elif lang == 'en': + res += '”' + else: + quoted = True + if lang == 'fr': + res += '«' + elif lang == 'en': + res += '“' + else: + res += c + return res + +def cleanup_paragraphs(s): + ps = [f' {p.strip()}' for p in re.split(r'\n+', s) if p.strip()] + return '\n\n'.join(ps) |
