Fix chord alignment: snap to word boundaries

Improve merge_chord_lyric() to snap chord positions to the start
of the word they fall within, instead of splitting words mid-way.
Fixes artifacts like "Liebespaar \chord{C}e" → "\chord{C}Liebespaare".

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
shahondin1624
2026-04-02 16:36:06 +02:00
parent d875fd225b
commit 7b99778f67
295 changed files with 1804 additions and 1782 deletions

View File

@@ -273,7 +273,11 @@ def parse_chords_with_positions(chord_line: str) -> list:
def merge_chord_lyric(chord_line: str, lyric_line: str) -> str:
"""Merge a chord line and lyric line by inserting \\chord{X} at positions."""
"""Merge a chord line and lyric line by inserting \\chord{X} at positions.
Snaps chord positions to word boundaries: if a chord falls inside a word,
it is moved to the start of that word.
"""
chords = parse_chords_with_positions(chord_line)
if not chords:
@@ -281,27 +285,45 @@ def merge_chord_lyric(chord_line: str, lyric_line: str) -> str:
original_len = len(lyric_line)
# Separate chords into: within lyric range vs beyond
# Only keep chords within lyric range
in_range = [(pos, ch) for pos, ch in chords if pos < original_len]
beyond = [(pos, ch) for pos, ch in chords if pos >= original_len]
# Insert in-range chords from right to left to maintain positions
# Snap each chord position to the start of the word it falls in
snapped = []
for pos, chord in in_range:
# If position is inside a word (not at a space or start of word),
# scan backwards to find the word start
snap_pos = pos
if pos > 0 and pos < len(lyric_line) and lyric_line[pos] != ' ':
# Check if previous char is also non-space (mid-word)
if lyric_line[pos - 1] != ' ':
# Scan backwards to find word start
while snap_pos > 0 and lyric_line[snap_pos - 1] != ' ':
snap_pos -= 1
# If we're at a space, move forward to the next non-space
while snap_pos < len(lyric_line) and lyric_line[snap_pos] == ' ':
snap_pos += 1
snapped.append((snap_pos, chord))
# Deduplicate: if multiple chords snap to the same position, keep all
# but ensure we don't create \chord{A}\chord{B} without space
# Insert from right to left to maintain positions
result = lyric_line
for pos, chord in reversed(in_range):
prev_insert_pos = len(result) + 1
for pos, chord in reversed(snapped):
chord_macro = f"\\chord{{{chord}}}"
result = result[:pos] + chord_macro + result[pos:]
# Append beyond-range chords (they have no corresponding lyric text)
# These are typically overflow chords from the PDF layout
# Skip them as they don't have corresponding lyrics on this line
# (they may belong to the next lyric line or be layout artifacts)
# Avoid inserting at same position as previous (from right)
if pos < prev_insert_pos:
result = result[:pos] + chord_macro + result[pos:]
prev_insert_pos = pos
result = result.strip()
# Collapse multiple spaces but preserve single spaces
result = re.sub(r' +', ' ', result)
# Ensure there's a space before \chord{} when preceded by non-space text
# This prevents leadsheets parsing issues with adjacent chord macros
result = re.sub(r'([^\s\\])\\chord\{', r'\1 \\chord{', result)
# For adjacent chords without text between them (e.g. \chord{A}\chord{B}),
# add a space to prevent leadsheets tabular nesting issues
result = re.sub(r'\}(\\chord\{)', r'} \1', result)
return result