Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 15 additions & 126 deletions confluence-mdx/bin/reverse_sync/list_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,71 +8,16 @@
from reverse_sync.lost_info_patcher import apply_lost_info
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_inner_xhtml
from reverse_sync.text_transfer import transfer_text_changes
from mdx_to_storage.inline import convert_inline
from text_utils import normalize_mdx_to_plain, collapse_ws, strip_list_marker, strip_for_compare
from text_utils import normalize_mdx_to_plain


def _resolve_child_mapping(
old_plain: str,
parent_mapping: BlockMapping,
id_to_mapping: Dict[str, BlockMapping],
) -> Optional[BlockMapping]:
"""Parent mapping의 children 중에서 old_plain과 일치하는 child를 찾는다."""
old_norm = collapse_ws(old_plain)
if not old_norm:
return None

# 1차: collapse_ws 완전 일치
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child and collapse_ws(child.xhtml_plain_text) == old_norm:
return child

# 2차: 공백 무시 완전 일치
old_nospace = re.sub(r'\s+', '', old_norm)
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
if child_nospace == old_nospace:
return child

# 3차: 리스트 마커 제거 후 비교 (XHTML child가 "- text" 형식인 경우)
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
child_unmarked = strip_list_marker(child_nospace)
if child_unmarked != child_nospace and old_nospace == child_unmarked:
return child

# 4차: MDX 쪽 리스트 마커 제거 후 비교
old_unmarked = strip_list_marker(old_nospace)
if old_unmarked != old_nospace:
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
if old_unmarked == child_nospace:
return child

# 5차: 앞부분 prefix 일치 (emoticon/lost_info 차이 허용)
# XHTML에서 ac:emoticon이 텍스트로 치환되지 않는 경우,
# 전체 문자열 비교가 실패할 수 있으므로 앞부분 20자로 비교한다.
# 단, old_nospace가 child보다 2배 이상 긴 경우는 잘못된 매칭으로 판단한다
# (callout 전체 텍스트가 내부 paragraph 첫 줄과 prefix를 공유하는 경우 방지).
_PREFIX_LEN = 20
if len(old_nospace) >= _PREFIX_LEN:
old_prefix = old_nospace[:_PREFIX_LEN]
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
if (len(child_nospace) >= _PREFIX_LEN
and child_nospace[:_PREFIX_LEN] == old_prefix
and len(old_nospace) <= len(child_nospace) * 2):
return child

def _get_ordered_list_start(content: str) -> Optional[int]:
"""MDX 리스트 콘텐츠에서 첫 번째 순서 번호를 반환한다."""
for line in content.split('\n'):
m = re.match(r'^\s*(\d+)\.\s+', line)
if m:
return int(m.group(1))
return None


Expand Down Expand Up @@ -158,10 +103,13 @@ def _regenerate_list_from_parent(
'old_plain_text': parent.xhtml_plain_text,
'new_inner_xhtml': new_inner,
}

# <ol start="N"> 속성 변경 감지: 시작 번호가 달라지면 ol_start 포함
old_start = _get_ordered_list_start(change.old_block.content)
new_start = _get_ordered_list_start(change.new_block.content)
if old_start is not None and new_start is not None and old_start != new_start:
patch['ol_start'] = new_start

return [patch]


Expand All @@ -174,10 +122,9 @@ def build_list_item_patches(
id_to_mapping: Optional[Dict[str, BlockMapping]] = None,
mapping_lost_info: Optional[Dict[str, dict]] = None,
) -> List[Dict[str, str]]:
"""리스트 블록의 각 항목을 개별 매핑과 대조하여 패치를 생성한다.
"""리스트 블록 변경을 XHTML에 패치한다.

R2: child 매칭 성공 시 항상 child inner XHTML 재생성,
child 매칭 실패 시 전체 리스트 inner XHTML 재생성.
sidecar에서 parent mapping을 찾아 전체 리스트 inner XHTML을 재생성한다.
"""
old_items = split_list_items(change.old_block.content)
new_items = split_list_items(change.new_block.content)
Expand All @@ -188,72 +135,14 @@ def build_list_item_patches(
parent_mapping = find_mapping_by_sidecar(
change.index, mdx_to_sidecar, xpath_to_mapping)

# sidecar에 없으면 텍스트 포함 검색으로 parent 찾기
if parent_mapping is None:
from reverse_sync.patch_builder import _find_containing_mapping
old_plain_all = normalize_mdx_to_plain(
change.old_block.content, 'list')
parent_mapping = _find_containing_mapping(
old_plain_all, mappings, used_ids or set())

# 항목 수 불일치 → 전체 리스트 재생성
# 항목 수 불일치 또는 내용 변경 → 전체 리스트 재생성
if len(old_items) != len(new_items):
return _regenerate_list_from_parent(
change, parent_mapping, used_ids, mapping_lost_info)

patches = []
for old_item, new_item in zip(old_items, new_items):
if old_item == new_item:
continue
old_plain = normalize_mdx_to_plain(old_item, 'list')

# parent mapping의 children에서 child 해석 시도
mapping = None
if parent_mapping is not None and parent_mapping.children and id_to_mapping is not None:
mapping = _resolve_child_mapping(
old_plain, parent_mapping, id_to_mapping)

if mapping is None:
# R2: child 매칭 실패 → 전체 리스트 재생성
if old_item != new_item:
return _regenerate_list_from_parent(
change, parent_mapping, used_ids, mapping_lost_info)

# child 매칭 성공: child inner XHTML 재생성
new_plain = normalize_mdx_to_plain(new_item, 'list')

# 멱등성 체크: push 후 XHTML이 이미 업데이트된 경우 건너뜀
if (collapse_ws(old_plain) != collapse_ws(mapping.xhtml_plain_text)
and collapse_ws(new_plain) == collapse_ws(mapping.xhtml_plain_text)):
continue

if used_ids is not None:
used_ids.add(mapping.block_id)

# 재생성 시 소실되는 XHTML 요소 포함 시 텍스트 전이로 폴백
if '<ac:image' in mapping.xhtml_text or '<span style=' in mapping.xhtml_text:
xhtml_text = transfer_text_changes(
old_plain, new_plain, mapping.xhtml_plain_text)
patches.append({
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_plain_text': xhtml_text,
})
continue

new_item_text = re.sub(r'^[-*+]\s+', '', new_item.strip())
new_item_text = re.sub(r'^\d+\.\s+', '', new_item_text)
new_inner = convert_inline(new_item_text)

# 블록 레벨 lost_info 적용
if mapping_lost_info:
block_lost = mapping_lost_info.get(mapping.block_id, {})
if block_lost:
new_inner = apply_lost_info(new_inner, block_lost)

patches.append({
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_inner_xhtml': new_inner,
})

return patches
return []
94 changes: 23 additions & 71 deletions confluence-mdx/bin/reverse_sync/patch_builder.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
"""패치 빌더 — MDX diff 변경과 XHTML 매핑을 결합하여 XHTML 패치를 생성."""
import re
from typing import Dict, List, Optional

from reverse_sync.block_diff import BlockChange, NON_CONTENT_TYPES
from reverse_sync.mapping_recorder import BlockMapping
from mdx_to_storage.parser import Block as MdxBlock
from text_utils import (
normalize_mdx_to_plain, collapse_ws,
strip_for_compare,
)
from reverse_sync.text_transfer import transfer_text_changes
from reverse_sync.sidecar import find_mapping_by_sidecar, SidecarEntry
from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml
from reverse_sync.list_patcher import (
build_list_item_patches,
_resolve_child_mapping,
)
from reverse_sync.table_patcher import (
build_table_row_patches,
Expand All @@ -25,41 +22,6 @@
)


_BLOCK_MARKER_RE = re.compile(r'#{1,6}|\d+\.')


def _strip_block_markers(text: str) -> str:
"""containment 비교를 위해 heading/list 마커를 제거한다."""
return _BLOCK_MARKER_RE.sub('', text)


def _find_containing_mapping(
old_plain: str,
mappings: List[BlockMapping],
used_ids: set,
) -> Optional[BlockMapping]:
"""old_plain 텍스트를 포함하는 XHTML 매핑을 찾는다 (sidecar 폴백)."""
old_norm = collapse_ws(old_plain)
if not old_norm or len(old_norm) < 5:
return None
old_nospace = strip_for_compare(old_norm)
for m in mappings:
if m.block_id in used_ids:
continue
m_nospace = strip_for_compare(m.xhtml_plain_text)
if m_nospace and old_nospace in m_nospace:
return m
# 폴백: heading/list 마커를 제거하고 재시도
old_stripped = _strip_block_markers(old_nospace)
for m in mappings:
if m.block_id in used_ids:
continue
m_stripped = _strip_block_markers(strip_for_compare(m.xhtml_plain_text))
if m_stripped and old_stripped in m_stripped:
return m
return None


def _flush_containing_changes(
containing_changes: dict,
used_ids: 'set | None' = None,
Expand Down Expand Up @@ -92,7 +54,6 @@ def _resolve_mapping_for_change(
used_ids: set,
mdx_to_sidecar: Dict[int, SidecarEntry],
xpath_to_mapping: Dict[str, 'BlockMapping'],
id_to_mapping: Dict[str, BlockMapping],
) -> tuple:
"""변경에 대한 매핑과 처리 전략을 결정한다.

Expand All @@ -106,44 +67,23 @@ def _resolve_mapping_for_change(
mapping = find_mapping_by_sidecar(
change.index, mdx_to_sidecar, xpath_to_mapping)

# Parent mapping → child 해석 시도
if mapping is not None and mapping.children:
child = _resolve_child_mapping(old_plain, mapping, id_to_mapping)
if child is not None:
# callout 블록은 direct 전략 시 _convert_callout_inner가
# <li><p> 구조를 생성할 수 없으므로 containing 전략 사용
if change.old_block.type == 'callout':
return ('containing', mapping)
return ('direct', child)
# 블록 텍스트가 parent에 포함되는지 확인
_old_ns = strip_for_compare(old_plain)
_map_ns = strip_for_compare(mapping.xhtml_plain_text)
if _old_ns and _map_ns and _old_ns not in _map_ns:
if change.old_block.type == 'list':
return ('list', mapping)
return ('containing', mapping)

if mapping is None:
# 폴백: 텍스트 포함 검색으로 containing mapping 찾기
containing = _find_containing_mapping(old_plain, mappings, used_ids)
if containing is not None:
return ('containing', containing)
if change.old_block.type == 'list':
return ('list', None)
if is_markdown_table(change.old_block.content):
return ('table', None)
return ('skip', None)

# 매핑 텍스트에 old_plain이 포함되지 않으면 더 나은 매핑 찾기
if not mapping.children:
old_nospace = strip_for_compare(old_plain)
map_nospace = strip_for_compare(mapping.xhtml_plain_text)
if old_nospace and map_nospace and old_nospace not in map_nospace:
better = _find_containing_mapping(old_plain, mappings, used_ids)
if better is not None:
return ('containing', better)
if change.old_block.type == 'list':
return ('list', mapping)
# callout 블록은 항상 containing 전략 사용
# (_convert_callout_inner가 <li><p> 구조를 생성할 수 없으므로)
if change.old_block.type == 'callout':
return ('containing', mapping)

# Parent mapping이 children을 가지면 containing 전략으로 위임
if mapping.children:
if change.old_block.type == 'list':
return ('list', mapping)
return ('containing', mapping)

# list 블록은 list 전략 사용 (direct 교체 시 <ac:image> 등 Confluence 태그 손실 방지)
if change.old_block.type == 'list':
Expand Down Expand Up @@ -257,7 +197,7 @@ def _mark_used(block_id: str, m: BlockMapping):

strategy, mapping = _resolve_mapping_for_change(
change, old_plain, mappings, used_ids,
mdx_to_sidecar, xpath_to_mapping, id_to_mapping)
mdx_to_sidecar, xpath_to_mapping)

if strategy == 'skip':
continue
Expand Down Expand Up @@ -290,6 +230,18 @@ def _mark_used(block_id: str, m: BlockMapping):
# strategy == 'direct'
_mark_used(mapping.block_id, mapping)

# 헤딩의 경우: 레벨과 텍스트가 동일하면 XHTML을 변경하지 않음
# (### foo → ### foo 같은 prefix 공백 차이만 있는 변경은 XHTML에 전파하지 않음)
if change.old_block.type == 'heading':
old_content = change.old_block.content.strip()
new_content = change.new_block.content.strip()
old_level = len(old_content) - len(old_content.lstrip('#'))
new_level = len(new_content) - len(new_content.lstrip('#'))
old_text = old_content.lstrip('#').strip()
new_text = new_content.lstrip('#').strip()
if old_level == new_level and old_text == new_text:
continue

# 멱등성 체크: push 후 XHTML이 이미 업데이트된 경우 건너뜀
# (old != xhtml 이고 new == xhtml → 이미 적용된 변경)
if (collapse_ws(old_plain) != collapse_ws(mapping.xhtml_plain_text)
Expand Down
21 changes: 15 additions & 6 deletions confluence-mdx/bin/reverse_sync/roundtrip_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,29 @@ def _normalize_trailing_blank_lines(text: str) -> str:
return stripped + '\n' if stripped else text


def _apply_minimal_normalizations(text: str) -> str:
"""항상 적용하는 최소 정규화 (strict/lenient 모드 공통).
def _normalize_blank_line_after_blockquote(text: str) -> str:
"""blockquote(>) 줄 바로 다음의 단일 빈 줄을 제거한다.

Forward converter가 blockquote 다음에 빈 줄을 추가하는 경우가 있으므로,
비교 시 제거한다.
"""
return re.sub(r'^(>[ \t]*.+)\n\n', r'\1\n', text, flags=re.MULTILINE)

forward converter의 체계적 출력 특성에 의한 차이만 처리한다:
- 인라인 이중 공백 → 단일 공백 (_normalize_consecutive_spaces_in_text)
- <br/> 앞 공백 제거 (_normalize_br_space)

lenient 모드에서는 이 정규화 이후 _apply_normalizations가 추가로 적용된다.

def _apply_minimal_normalizations(text: str) -> str:
"""항상 적용하는 최소 정규화 (forward converter 특성에 의한 차이만 처리).

trailing whitespace는 여기서 처리하지 않는다.
엄격 모드에서는 trailing whitespace 차이가 실패해야 하므로,
_apply_normalizations (lenient 전용)에서만 처리한다.
"""
text = _normalize_consecutive_spaces_in_text(text)
text = _normalize_br_space(text)
text = _normalize_table_cell_padding(text)
text = _strip_first_heading(text)
text = text.lstrip('\n')
text = _normalize_blank_line_after_blockquote(text)
text = _normalize_trailing_blank_lines(text)
return text

Expand Down
Loading
Loading