jsvine · AbdullahMehmoodAwan · Oct 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format
 
 ## [0.11.7] - 2025-06-12
 
+## [Unreleased]
+
+### Added
+- Added `PDF.table_of_contents` and `Page.table_of_contents` properties to expose document outlines (bookmarks) directly through pdfplumber.  
+  This enables easy access to a document’s Table of Contents for navigation or metadata extraction.  
+  ([#1034](https://github.com/jsvine/pdfplumber/issues/1034) by @AbdullahMehmoodAwan)
+
 ### Added
 - Add access to `Page.trimbox`, `Page.bleedbox`, and `Page.artbox` (h/t @samuelbradshaw). ([#1313](https://github.com/jsvine/pdfplumber/issues/1313) + [7e364e6](https://github.com/jsvine/pdfplumber/commit/7e364e6193c6e8bafa9b46587c0fdd4a46405399))
 

diff --git a/pdfplumber/page.py b/pdfplumber/page.py
@@ -251,6 +251,15 @@ def structure_tree(self) -> List[Dict[str, Any]]:
             return [elem.to_dict() for elem in PDFStructTree(self.pdf, self)]
         except StructTreeMissing:
             return []
+
+    @property
+    def table_of_contents(self):
+        """
+        Returns the document-level Table of Contents.
+        This is the same as pdfplumber.PDF.table_of_contents, but accessible from a page.
+        """
+        return self.pdf.table_of_contents
+
 
     @property
     def layout(self) -> LTPage:

diff --git a/pdfplumber/pdf.py b/pdfplumber/pdf.py
@@ -203,3 +203,31 @@ def to_dict(self, object_types: Optional[List[str]] = None) -> Dict[str, Any]:
             "metadata": self.metadata,
             "pages": [page.to_dict(object_types) for page in self.pages],
         }
+
+    @property
+    def table_of_contents(self) -> List[Dict[str, Any]]:
+        """
+        Returns the document's outline (Table of Contents) if available.
+        Each entry is represented as a dictionary:
+        {"title": str, "page_number": int or None}.
+        """
+        outlines: List[Dict[str, Any]] = []
+        try:
+            if hasattr(self.doc, "get_outlines"):
+                for (level, title, dest, a, se) in self.doc.get_outlines():
+                    page_number = None
+                    # Get page number safely if destination is valid
+                    if dest and hasattr(dest, "page") and dest.page:
+                        try:
+                            page_number = self.doc.pageid2num(dest.page.idnum)
+                        except Exception:
+                            pass
+                    outlines.append({
+                        "title": title,
+                        "page_number": page_number,
+                        "level": level
+                    })
+        except Exception as e:
+            logger.debug(f"Unable to extract outlines: {e}")
+        return outlines
+
diff --git a/tests/pdfs/toc-sample.pdf b/tests/pdfs/toc-sample.pdf
diff --git a/tests/test_table_of_contents.py b/tests/test_table_of_contents.py
@@ -0,0 +1,24 @@
+"""
+Test for PDF.table_of_contents and Page.table_of_contents properties
+"""
+import pdfplumber
+
+def test_table_of_contents_property():
+    # Path to your sample PDF (must exist)
+    sample_pdf_path = "tests/pdfs/toc-sample.pdf"
+
+    with pdfplumber.open(sample_pdf_path) as pdf:
+        toc = pdf.table_of_contents
+
+        # 1. Check the property exists and is a list
+        assert isinstance(toc, list)
+
+        # 2. If TOC entries exist, ensure they contain the right keys
+        if toc:
+            entry = toc[0]
+            assert "title" in entry
+            assert "level" in entry
+            assert "page_number" in entry
+
+        # 3. Verify the Page.table_of_contents matches PDF.table_of_contents
+        assert toc == pdf.pages[0].table_of_contents