From 8bd8cb23204a8ee665c2c568a34cf8ba74e68e52 Mon Sep 17 00:00:00 2001 From: Nicolas Sceaux Date: Fri, 28 Aug 2015 18:47:12 +0200 Subject: [PATCH] =?utf8?q?syllabify=20:=20prise=20en=20compte=20langues=20?= =?utf8?q?sans=20espace=20ins=C3=A9cable=20avant=20les=20ponctuations=20do?= =?utf8?q?ubles?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- scripts/livret.py | 37 +++++++++++++++++++++++++++---------- scripts/paroles.py | 28 ++++++++++++++++++++++------ scripts/syllabify.py | 4 +++- 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/scripts/livret.py b/scripts/livret.py index ce84cfff..cda442cf 100644 --- a/scripts/livret.py +++ b/scripts/livret.py @@ -65,8 +65,9 @@ class LilyLine(): return self._text class Lilybretto(): - def __init__(self): + def __init__(self, language): self._lines = [] + self.language = language def add_line(self, line): self._lines.append(line) @@ -74,17 +75,20 @@ class Lilybretto(): def get_lines(self): return self._lines - def syllabify(self, - sign_tokenizer = SignTokenizer(), - syllable_tokenizer = SyllableTokenizerWithWordSeparation()): + def syllabify(self): + sign_tokenizer = SignTokenizer(language = self.language) + syllable_tokenizer = SyllableTokenizerWithWordSeparation() for line in self._lines: line.syllabify(sign_tokenizer, syllable_tokenizer) class RawLibrettoReader(): - def read(self, filename): - file = open(filename, 'r') - libretto = Lilybretto() + def __init__(self, language="fr"): + self.language = language + + def read(self, file): + #file = open(filename, 'r') + libretto = Lilybretto(self.language) verse_parts = [] for line in file: verse_match = re.match(r"^%#(\S*) (.*)$", line) @@ -119,9 +123,22 @@ class RawLibrettoReader(): return libretto if __name__ == '__main__': - for filename in sys.argv[1:]: - reader = RawLibrettoReader() - libretto = reader.read(filename) + parser = argparse.ArgumentParser( + description='LilPond libretto generation.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--language', + default='fr', + help='verse language (fr, it)') + parser.add_argument( + 'files', metavar='FILE', + type=argparse.FileType('r'), + nargs='+', + help='input files') + args = vars(parser.parse_args()) + for file in args['files']: + reader = RawLibrettoReader(args['language']) + libretto = reader.read(file) libretto.syllabify() for line in libretto.get_lines(): print(line.get_lily_text()) diff --git a/scripts/paroles.py b/scripts/paroles.py index b916217e..b295bad8 100644 --- a/scripts/paroles.py +++ b/scripts/paroles.py @@ -1,9 +1,12 @@ from syllabify import * class Lyricsifier(): - def read_and_write(self, filename): - file = open(filename, 'r') - sign_tokenizer = SignTokenizer() + def __init__(self, language='fr'): + self.language = language + + def read_and_write(self, file): + #file = open(filename, 'r') + sign_tokenizer = SignTokenizer(language=self.language) syllable_tokenizer = SyllableTokenizerWithWordSeparation() for line in file: verse_match = re.match(r"^%#(\S*) (.*)$", line) @@ -17,7 +20,20 @@ class Lyricsifier(): print("") if __name__ == '__main__': - for filename in sys.argv[1:]: - transformer = Lyricsifier() - transformer.read_and_write(filename) + parser = argparse.ArgumentParser( + description='LilPond lyrics generation.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--language', + default='fr', + help='verse language (fr, it)') + parser.add_argument( + 'files', metavar='FILE', + type=argparse.FileType('r'), + nargs='+', + help='input files') + args = vars(parser.parse_args()) + for file in args['files']: + transformer = Lyricsifier(args['language']) + transformer.read_and_write(file) diff --git a/scripts/syllabify.py b/scripts/syllabify.py index 74977374..28181d07 100644 --- a/scripts/syllabify.py +++ b/scripts/syllabify.py @@ -80,6 +80,7 @@ class SignTokenizer(): Can be overriden with ignored_markers constructor keyword """ def __init__(self, + language = "fr", word_separators = " -", word_separator_markers = "°", simple_punctuations = ".,", @@ -96,6 +97,7 @@ class SignTokenizer(): word_separator_markers)) self.simple_punctuations = simple_punctuations self.double_punctuations = double_punctuations + self.space_before_double_punctuations = (language == "fr") self.apostrophes = apostrophes self.forced_syllable_end_marker = forced_syllable_end_marker self.mute_character_marker = mute_character_marker @@ -160,7 +162,7 @@ class SignTokenizer(): # punctuation elif punctuation_match: punct = punctuation_match.group(1) - if punct in self.double_punctuations: + if self.space_before_double_punctuations and punct in self.double_punctuations: self._add_text("\u00A0") self._add_text(punct) i += len(punctuation_match.group(0)) -- 2.11.4.GIT