{escape(child.content)}")
+ case "html_inline":
+ parts.append(str(escape(child.content)))
+ return "".join(parts)
+
+
+def render_inline_text(children: list[SyntaxTreeNode]) -> str:
+ """Render inline AST nodes to plain text (links become their text)."""
+ parts: list[str] = []
+ for child in children:
+ match child.type:
+ case "text":
+ parts.append(child.content)
+ case "softbreak":
+ parts.append(" ")
+ case "code_inline":
+ parts.append(child.content)
+ case "em" | "strong" | "link":
+ parts.append(render_inline_text(child.children))
+ return "".join(parts)
+
+
+# --- AST helpers -------------------------------------------------------------
+
+
+def _heading_text(node: SyntaxTreeNode) -> str:
+ """Extract plain text from a heading node."""
+ for child in node.children:
+ if child.type == "inline":
+ return render_inline_text(child.children)
+ return ""
+
+
+def _extract_description(nodes: list[SyntaxTreeNode]) -> str:
+ """Extract description from the first paragraph if it's a single block.
+
+ Pattern: _Libraries for foo._ -> "Libraries for foo."
+ """
+ if not nodes:
+ return ""
+ first = nodes[0]
+ if first.type != "paragraph":
+ return ""
+ for child in first.children:
+ if child.type == "inline" and len(child.children) == 1:
+ em = child.children[0]
+ if em.type == "em":
+ return render_inline_text(em.children)
+ return ""
+
+
+# --- Entry extraction --------------------------------------------------------
+
+_DESC_SEP_RE = re.compile(r"^\s*[-\u2013\u2014]\s*")
+
+
+def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
+ """Find first direct child of a given type."""
+ for child in node.children:
+ if child.type == child_type:
+ return child
+ return None
+
+
+def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
+ """Find the inline node in a list_item's paragraph."""
+ para = _find_child(node, "paragraph")
+ if para is None:
+ return None
+ return _find_child(para, "inline")
+
+
+def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
+ """Find the first link node among inline children."""
+ for child in inline.children:
+ if child.type == "link":
+ return child
+ return None
+
+
+def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
+ """Check if the link is the first child of inline (a real entry, not a subcategory label)."""
+ return bool(inline.children) and inline.children[0] is link
+
+
+def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
+ """Extract description HTML from inline content after the first link.
+
+ AST: [link("name"), text(" - Description.")] -> "Description."
+ The separator (- / en-dash / em-dash) is stripped.
+ """
+ link_idx = next((i for i, c in enumerate(inline.children) if c is first_link), None)
+ if link_idx is None:
+ return ""
+ desc_children = inline.children[link_idx + 1 :]
+ if not desc_children:
+ return ""
+ html = render_inline_html(desc_children)
+ return _DESC_SEP_RE.sub("", html)
+
+
+def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
+ """Extract entries from a bullet_list AST node.
+
+ Handles three patterns:
+ - Text-only list_item -> subcategory label -> recurse into nested list
+ - Link list_item with nested link-only items -> entry with also_see
+ - Link list_item without nesting -> simple entry
+ """
+ entries: list[ParsedEntry] = []
+
+ for list_item in bullet_list.children:
+ if list_item.type != "list_item":
+ continue
+
+ inline = _find_inline(list_item)
+ if inline is None:
+ continue
+
+ first_link = _find_first_link(inline)
+
+ if first_link is None or not _is_leading_link(inline, first_link):
+ # Subcategory label (plain text or text-before-link) — recurse into nested list
+ nested = _find_child(list_item, "bullet_list")
+ if nested:
+ entries.extend(_parse_list_entries(nested))
+ continue
+
+ # Entry with a link
+ name = render_inline_text(first_link.children)
+ url = first_link.attrGet("href") or ""
+ desc_html = _extract_description_html(inline, first_link)
+
+ # Collect also_see from nested bullet_list
+ also_see: list[AlsoSee] = []
+ nested = _find_child(list_item, "bullet_list")
+ if nested:
+ for sub_item in nested.children:
+ if sub_item.type != "list_item":
+ continue
+ sub_inline = _find_inline(sub_item)
+ if sub_inline:
+ sub_link = _find_first_link(sub_inline)
+ if sub_link:
+ also_see.append(AlsoSee(
+ name=render_inline_text(sub_link.children),
+ url=sub_link.attrGet("href") or "",
+ ))
+
+ entries.append(ParsedEntry(
+ name=name,
+ url=url,
+ description=desc_html,
+ also_see=also_see,
+ ))
+
+ return entries
+
+
+def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEntry]:
+ """Extract all entries from a section's content nodes."""
+ entries: list[ParsedEntry] = []
+ for node in content_nodes:
+ if node.type == "bullet_list":
+ entries.extend(_parse_list_entries(node))
+ return entries
+
+
+# --- Content HTML rendering --------------------------------------------------
+
+
+def _render_bullet_list_html(
+ bullet_list: SyntaxTreeNode,
+ *,
+ is_sub: bool = False,
+) -> str:
+ """Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
+ out: list[str] = []
+
+ for list_item in bullet_list.children:
+ if list_item.type != "list_item":
+ continue
+
+ inline = _find_inline(list_item)
+ if inline is None:
+ continue
+
+ first_link = _find_first_link(inline)
+
+ if first_link is None or not _is_leading_link(inline, first_link):
+ # Subcategory label (plain text or text-before-link)
+ label = str(escape(render_inline_text(inline.children)))
+ out.append(f'
+ {{ subtitle }}
Maintained by
+ @vinta
+ and
+ @JinyangWang27.
+
| # | +Project Name | +GitHub Stars | +Last Commit | +Category | +Details | +
|---|---|---|---|---|---|
| {{ loop.index }} | ++ {{ entry.name }} + | ++ {% if entry.stars is not none %}{{ "{:,}".format(entry.stars) }}{% + else %}—{% endif %} + | ++ {% if entry.last_commit_at %}{% else %}—{% endif %} + | ++ {% for cat in entry.categories %} + + {% endfor %} + + | +→ | +
| + | + + | ++ | |||
some code" in render_inline_html(children)
+
+ def test_mixed_link_and_text(self):
+ children = _parse_inline("See [foo](https://x.com) for details.")
+ html = render_inline_html(children)
+ assert "See " in html
+ assert ">foo" in html
+ assert " for details." in html
+
+
+class TestRenderInlineText:
+ def test_plain_text(self):
+ children = _parse_inline("Hello world")
+ assert render_inline_text(children) == "Hello world"
+
+ def test_link_becomes_text(self):
+ children = _parse_inline("See [awesome-algos](https://github.com/x/y).")
+ assert render_inline_text(children) == "See awesome-algos."
+
+ def test_emphasis_stripped(self):
+ children = _parse_inline("*italic* text")
+ assert render_inline_text(children) == "italic text"
+
+ def test_code_inline_kept(self):
+ children = _parse_inline("`code` here")
+ assert render_inline_text(children) == "code here"
+
+
+MINIMAL_README = textwrap.dedent("""\
+ # Awesome Python
+
+ Some intro text.
+
+ ---
+
+ ## Alpha
+
+ _Libraries for alpha stuff._
+
+ - [lib-a](https://example.com/a) - Does A.
+ - [lib-b](https://example.com/b) - Does B.
+
+ ## Beta
+
+ _Tools for beta._
+
+ - [lib-c](https://example.com/c) - Does C.
+
+ # Resources
+
+ Where to discover resources.
+
+ ## Newsletters
+
+ - [News One](https://example.com/n1)
+ - [News Two](https://example.com/n2)
+
+ ## Podcasts
+
+ - [Pod One](https://example.com/p1)
+
+ # Contributing
+
+ Please contribute!
+""")
+
+
+GROUPED_README = textwrap.dedent("""\
+ # Awesome Python
+
+ Some intro text.
+
+ ---
+
+ **Group One**
+
+ ## Alpha
+
+ _Libraries for alpha stuff._
+
+ - [lib-a](https://example.com/a) - Does A.
+ - [lib-b](https://example.com/b) - Does B.
+
+ **Group Two**
+
+ ## Beta
+
+ _Tools for beta._
+
+ - [lib-c](https://example.com/c) - Does C.
+
+ ## Gamma
+
+ - [lib-d](https://example.com/d) - Does D.
+
+ # Resources
+
+ Where to discover resources.
+
+ ## Newsletters
+
+ - [News One](https://example.com/n1)
+
+ # Contributing
+
+ Please contribute!
+""")
+
+
+class TestParseReadmeSections:
+ def test_ungrouped_categories_go_to_other(self):
+ groups, resources = parse_readme(MINIMAL_README)
+ assert len(groups) == 1
+ assert groups[0]["name"] == "Other"
+ assert len(groups[0]["categories"]) == 2
+
+ def test_ungrouped_category_names(self):
+ groups, _ = parse_readme(MINIMAL_README)
+ cats = groups[0]["categories"]
+ assert cats[0]["name"] == "Alpha"
+ assert cats[1]["name"] == "Beta"
+
+ def test_resource_count(self):
+ _, resources = parse_readme(MINIMAL_README)
+ assert len(resources) == 2
+
+ def test_category_slugs(self):
+ groups, _ = parse_readme(MINIMAL_README)
+ cats = groups[0]["categories"]
+ assert cats[0]["slug"] == "alpha"
+ assert cats[1]["slug"] == "beta"
+
+ def test_category_description(self):
+ groups, _ = parse_readme(MINIMAL_README)
+ cats = groups[0]["categories"]
+ assert cats[0]["description"] == "Libraries for alpha stuff."
+ assert cats[1]["description"] == "Tools for beta."
+
+ def test_resource_names(self):
+ _, resources = parse_readme(MINIMAL_README)
+ assert resources[0]["name"] == "Newsletters"
+ assert resources[1]["name"] == "Podcasts"
+
+ def test_contributing_skipped(self):
+ groups, resources = parse_readme(MINIMAL_README)
+ all_names = []
+ for g in groups:
+ all_names.extend(c["name"] for c in g["categories"])
+ all_names.extend(r["name"] for r in resources)
+ assert "Contributing" not in all_names
+
+ def test_no_separator(self):
+ groups, resources = parse_readme("# Just a heading\n\nSome text.\n")
+ assert groups == []
+ assert resources == []
+
+ def test_no_description(self):
+ readme = textwrap.dedent("""\
+ # Title
+
+ ---
+
+ ## NullDesc
+
+ - [item](https://x.com) - Thing.
+
+ # Resources
+
+ ## Tips
+
+ - [tip](https://x.com)
+
+ # Contributing
+
+ Done.
+ """)
+ groups, resources = parse_readme(readme)
+ cats = groups[0]["categories"]
+ assert cats[0]["description"] == ""
+ assert cats[0]["entries"][0]["name"] == "item"
+
+ def test_description_with_link_stripped(self):
+ readme = textwrap.dedent("""\
+ # T
+
+ ---
+
+ ## Algos
+
+ _Algorithms. Also see [awesome-algos](https://example.com)._
+
+ - [lib](https://x.com) - Lib.
+
+ # Contributing
+
+ Done.
+ """)
+ groups, _ = parse_readme(readme)
+ cats = groups[0]["categories"]
+ assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
+
+
+class TestParseGroupedReadme:
+ def test_group_count(self):
+ groups, _ = parse_readme(GROUPED_README)
+ assert len(groups) == 2
+
+ def test_group_names(self):
+ groups, _ = parse_readme(GROUPED_README)
+ assert groups[0]["name"] == "Group One"
+ assert groups[1]["name"] == "Group Two"
+
+ def test_group_slugs(self):
+ groups, _ = parse_readme(GROUPED_README)
+ assert groups[0]["slug"] == "group-one"
+ assert groups[1]["slug"] == "group-two"
+
+ def test_group_one_has_one_category(self):
+ groups, _ = parse_readme(GROUPED_README)
+ assert len(groups[0]["categories"]) == 1
+ assert groups[0]["categories"][0]["name"] == "Alpha"
+
+ def test_group_two_has_two_categories(self):
+ groups, _ = parse_readme(GROUPED_README)
+ assert len(groups[1]["categories"]) == 2
+ assert groups[1]["categories"][0]["name"] == "Beta"
+ assert groups[1]["categories"][1]["name"] == "Gamma"
+
+ def test_resources_still_parsed(self):
+ _, resources = parse_readme(GROUPED_README)
+ assert len(resources) == 1
+ assert resources[0]["name"] == "Newsletters"
+
+ def test_empty_group_skipped(self):
+ readme = textwrap.dedent("""\
+ # T
+
+ ---
+
+ **Empty**
+
+ **HasCats**
+
+ ## Cat
+
+ - [x](https://x.com) - X.
+
+ # Contributing
+
+ Done.
+ """)
+ groups, _ = parse_readme(readme)
+ assert len(groups) == 1
+ assert groups[0]["name"] == "HasCats"
+
+ def test_bold_with_extra_text_not_group_marker(self):
+ readme = textwrap.dedent("""\
+ # T
+
+ ---
+
+ **Note:** This is not a group marker.
+
+ ## Cat
+
+ - [x](https://x.com) - X.
+
+ # Contributing
+
+ Done.
+ """)
+ groups, _ = parse_readme(readme)
+ # "Note:" has text after the strong node, so it's not a group marker
+ # Category goes into "Other"
+ assert len(groups) == 1
+ assert groups[0]["name"] == "Other"
+
+ def test_categories_before_any_group_marker(self):
+ readme = textwrap.dedent("""\
+ # T
+
+ ---
+
+ ## Orphan
+
+ - [x](https://x.com) - X.
+
+ **A Group**
+
+ ## Grouped
+
+ - [y](https://x.com) - Y.
+
+ # Contributing
+
+ Done.
+ """)
+ groups, _ = parse_readme(readme)
+ assert len(groups) == 2
+ assert groups[0]["name"] == "Other"
+ assert groups[0]["categories"][0]["name"] == "Orphan"
+ assert groups[1]["name"] == "A Group"
+ assert groups[1]["categories"][0]["name"] == "Grouped"
+
+
+def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
+ """Helper: parse markdown and return all block nodes."""
+ md = MarkdownIt("commonmark")
+ root = SyntaxTreeNode(md.parse(md_text))
+ return root.children
+
+
+class TestParseSectionEntries:
+ def test_flat_entries(self):
+ nodes = _content_nodes(
+ "- [django](https://example.com/d) - A web framework.\n"
+ "- [flask](https://example.com/f) - A micro framework.\n"
+ )
+ entries = _parse_section_entries(nodes)
+ assert len(entries) == 2
+ assert entries[0]["name"] == "django"
+ assert entries[0]["url"] == "https://example.com/d"
+ assert "web framework" in entries[0]["description"]
+ assert entries[0]["also_see"] == []
+ assert entries[1]["name"] == "flask"
+
+ def test_link_only_entry(self):
+ nodes = _content_nodes("- [tool](https://x.com)\n")
+ entries = _parse_section_entries(nodes)
+ assert len(entries) == 1
+ assert entries[0]["name"] == "tool"
+ assert entries[0]["description"] == ""
+
+ def test_subcategorized_entries(self):
+ nodes = _content_nodes(
+ "- Algorithms\n"
+ " - [algos](https://x.com/a) - Algo lib.\n"
+ " - [sorts](https://x.com/s) - Sort lib.\n"
+ "- Design Patterns\n"
+ " - [patterns](https://x.com/p) - Pattern lib.\n"
+ )
+ entries = _parse_section_entries(nodes)
+ assert len(entries) == 3
+ assert entries[0]["name"] == "algos"
+ assert entries[2]["name"] == "patterns"
+
+ def test_text_before_link_is_subcategory(self):
+ nodes = _content_nodes(
+ "- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
+ " - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
+ " - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
+ )
+ entries = _parse_section_entries(nodes)
+ # awesome-mysql is a subcategory label, not an entry
+ assert len(entries) == 2
+ names = [e["name"] for e in entries]
+ assert "awesome-mysql" not in names
+ assert "mysqlclient" in names
+ assert "pymysql" in names
+
+ def test_also_see_sub_entries(self):
+ nodes = _content_nodes(
+ "- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"
+ " - [awesome-asyncio](https://github.com/timofurrer/awesome-asyncio)\n"
+ "- [trio](https://github.com/python-trio/trio) - Friendly async.\n"
+ )
+ entries = _parse_section_entries(nodes)
+ assert len(entries) == 2
+ assert entries[0]["name"] == "asyncio"
+ assert len(entries[0]["also_see"]) == 1
+ assert entries[0]["also_see"][0]["name"] == "awesome-asyncio"
+ assert entries[1]["name"] == "trio"
+ assert entries[1]["also_see"] == []
+
+ def test_entry_count_includes_also_see(self):
+ readme = textwrap.dedent("""\
+ # T
+
+ ---
+
+ ## Async
+
+ - [asyncio](https://x.com) - Async I/O.
+ - [awesome-asyncio](https://y.com)
+ - [trio](https://z.com) - Friendly async.
+
+ # Contributing
+
+ Done.
+ """)
+ groups, _ = parse_readme(readme)
+ cats = groups[0]["categories"]
+ # 2 main entries + 1 also_see = 3
+ assert cats[0]["entry_count"] == 3
+
+ def test_preview_first_four_names(self):
+ readme = textwrap.dedent("""\
+ # T
+
+ ---
+
+ ## Libs
+
+ - [alpha](https://x.com) - A.
+ - [beta](https://x.com) - B.
+ - [gamma](https://x.com) - C.
+ - [delta](https://x.com) - D.
+ - [epsilon](https://x.com) - E.
+
+ # Contributing
+
+ Done.
+ """)
+ groups, _ = parse_readme(readme)
+ cats = groups[0]["categories"]
+ assert cats[0]["preview"] == "alpha, beta, gamma, delta"
+
+ def test_description_html_escapes_xss(self):
+ nodes = _content_nodes('- [lib](https://x.com) - A lib.\n')
+ entries = _parse_section_entries(nodes)
+ assert "\n")
+ html = _render_section_html(nodes)
+ assert "