From 7b93822d773d062dd6205acca2756c66b0c77f6a Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 26 Mar 2026 17:23:07 +0000 Subject: [PATCH 1/3] feat(helm): Create Opensearch E2E tutorial for cogstack CE --- .../examples/medcat-opensearch-e2e.ipynb | 363 ++++++++++++++++++ .../examples/medcat-service-tutorial.ipynb | 6 +- .../provisioning/base_index_settings.json | 4 +- .../provisioning/dashboards.ndjson | 3 +- .../generate_synthetic_bulk_ndjson.py | 257 ++++++++++++- 5 files changed, 616 insertions(+), 17 deletions(-) create mode 100644 helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb new file mode 100644 index 0000000..8c337cd --- /dev/null +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a7844791", + "metadata": {}, + "source": [ + "# MedCAT + OpenSearch End-to-End Demo\n", + "\n", + "This is a short, practical walkthrough showing how to turn one clinical note into searchable concepts.\n", + "\n", + "It is an end to end example of how you can use CogStack to unlock the power of your healthcare data.\n", + "\n", + "## Who this is for\n", + "This is for developers, data engineers, and analysts who want to see a practical example of how CogStack, MedCAT and Opensearch can be integrated to let you perform advanced search on your notes. \n", + "\n", + "## What this notebook does\n", + "1. Index one sample note into `discharge`\n", + "2. Search that note back using free text\n", + "3. Search that note back even when we have typos\n", + "4. Perform Named Entity Resolution (NER) by calling MedCAT Service and index them\n", + "5. Search notes by concept\n", + "\n", + "The goal is to show that this process is straightforward: call one API, index results, and query them." + ] + }, + { + "cell_type": "markdown", + "id": "875a64db", + "metadata": {}, + "source": [ + "# Initialisation: Define the inputs and services\n", + "\n", + "### Input Data\n", + "We define a short input for this tutorial. This represents your free text patient data, for example a discharge summary or long doctors note.\n", + "\n", + "The sample sentence contains concepts that the example demo packs used by medcat service have been trained for. \n", + "\n", + "### Service definitions\n", + "We will setup a client for OpenSearch, and define the HTTP endpoint for medcat service.\n", + "\n", + "If using the cogstack community edition helm chart, these should all be setup for you automatically using kubernetes services and env vars. Otherwise change these accordingly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17deaa5c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from datetime import datetime, timezone\n", + "from urllib.parse import urlparse\n", + "\n", + "import pandas as pd\n", + "import requests\n", + "import urllib3\n", + "from IPython.display import display\n", + "from opensearchpy import OpenSearch\n", + "\n", + "# The sample note that we will work with\n", + "sample_text = \"John was diagnosed with Kidney Failure\"\n", + "\n", + "# Service URLs from environment variables\n", + "medcat_base_url = os.getenv(\"MEDCAT_URL\", \"http://cogstack-medcat-service:5000\").rstrip(\"/\")\n", + "medcat_url = medcat_base_url + \"/api/process\"\n", + "\n", + "opensearch_url = os.getenv(\"OPENSEARCH_URL\", \"https://opensearch-cluster-master:9200\")\n", + "opensearch_username = os.getenv(\"OPENSEARCH_USERNAME\", \"admin\")\n", + "opensearch_password = os.getenv(\"OPENSEARCH_PASSWORD\", \"admin\")\n", + "\n", + "parsed = urlparse(opensearch_url)\n", + "\n", + "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)\n", + "\n", + "host_cfg = {\n", + " \"host\": parsed.hostname,\n", + " \"port\": parsed.port or (443 if parsed.scheme == \"https\" else 80),\n", + "}\n", + "if parsed.path and parsed.path != \"/\":\n", + " host_cfg[\"url_prefix\"] = parsed.path.lstrip(\"/\")\n", + "\n", + "client = OpenSearch(\n", + " hosts=[host_cfg],\n", + " http_auth=(opensearch_username, opensearch_password),\n", + " use_ssl=(parsed.scheme == \"https\"),\n", + " verify_certs=False,\n", + ")\n", + "\n", + "# Hardcoded demo indices\n", + "discharge_index = \"discharge\"\n", + "annotations_index = \"discharge_annotations\"\n", + "\n", + "# Static demo note id used across all steps\n", + "note_id = \"demo-note-kidney-failure-001\"" + ] + }, + { + "cell_type": "markdown", + "id": "19340174", + "metadata": {}, + "source": [ + "## 1) Index the note into OpenSearch\n", + "\n", + "We write the note into `discharge`, then immediately run a free-text query (`kidney failure`) to prove it is searchable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3eb85731", + "metadata": {}, + "outputs": [], + "source": [ + "note_doc = {\n", + " \"note_id\": note_id,\n", + " \"subject_id\": subject_id,\n", + " \"text\": sample_text,\n", + " \"storetime\": datetime.now(timezone.utc).strftime(\"%Y-%m-%d %H:%M:%S\"),\n", + "}\n", + "\n", + "client.index(index=discharge_index, id=note_id, body=note_doc, refresh=True)\n" + ] + }, + { + "cell_type": "markdown", + "id": "edad8661", + "metadata": {}, + "source": [ + "## 2) Search that note back using free text\n", + "\n", + "This query uses `match` search, so we can find notes by important words (for example `John kidney`) without requiring an exact full-string match.\n", + "\n", + "In a traditional relational query, you would usually rely on exact equality or simple wildcard `LIKE` patterns. Here, OpenSearch handles tokenized full-text search for us." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25613475", + "metadata": {}, + "outputs": [], + "source": [ + "query_text = \"John kidney\"\n", + "free_text_resp = client.search(\n", + " index=discharge_index,\n", + " body={\"query\": {\"match\": {\"text\": query_text}}},\n", + ")\n", + "hits = free_text_resp[\"hits\"][\"hits\"]\n", + "print(f\"Free-text query used: {query_text}\")\n", + "print(\"This still returns the note even though it is not an exact full sentence match.\")\n", + "print(\"Results from OpenSearch free-text search:\")\n", + "display(pd.DataFrame([hits[0][\"_source\"]]))" + ] + }, + { + "cell_type": "markdown", + "id": "c8aaa960", + "metadata": {}, + "source": [ + "## 3) Fuzzy full-text search (not exact matching)\n", + "\n", + "Now we intentionally misspell the query (`kidny falur`) and still retrieve results.\n", + "\n", + "This demonstrates why OpenSearch is useful for user-entered text and typo-tolerant retrieval." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "595a313e", + "metadata": {}, + "outputs": [], + "source": [ + "fuzzy_query = \"kidny falur\"\n", + "fuzzy_resp = client.search(\n", + " index=discharge_index,\n", + " body={\n", + " \"query\": {\n", + " \"match\": {\n", + " \"text\": {\n", + " \"query\": fuzzy_query,\n", + " \"fuzziness\": \"AUTO\"\n", + " }\n", + " }\n", + " }\n", + " },\n", + ")\n", + "\n", + "fuzzy_hits = fuzzy_resp[\"hits\"][\"hits\"]\n", + "print(f\"Fuzzy query: {fuzzy_query}\")\n", + "print(f\"fuzzy_hits={len(fuzzy_hits)}\")\n", + "display(pd.DataFrame(pd.DataFrame([hits[0][\"_source\"]])))" + ] + }, + { + "cell_type": "markdown", + "id": "690cd9a2", + "metadata": {}, + "source": [ + "## 4) Perform Named Entity Resolution with MedCAT\n", + "\n", + "We can see that we are able to search with free text, and fuzzy match. However, what happens if we want to search accross notes using common terminology?\n", + "\n", + "We can solve this by using named entity resolution (NER) and NLP.\n", + "\n", + "To do this we will call MedCAT at `/api/process` with the same note text.\n", + "\n", + "MedCAT returns structured entities (for example CUI and concept name). This is named entity resolution in one API call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1428daf", + "metadata": {}, + "outputs": [], + "source": [ + "medcat_payload = {\"content\": {\"text\": sample_text}}\n", + "medcat_result = requests.post(medcat_url, json=medcat_payload, timeout=30).json()\n", + "raw_annotations = medcat_result.get(\"result\", {}).get(\"annotations\", [])\n", + "\n", + "annotations = [\n", + " next(iter(ann.values())) if isinstance(ann, dict) and len(ann) == 1 else ann\n", + " for ann in raw_annotations\n", + " if isinstance(ann, dict)\n", + "]\n", + "\n", + "print(f\"annotations_found={len(annotations)}\")\n", + "print(\"Results from MedCAT named entity extraction:\")\n", + "display(pd.DataFrame(annotations))" + ] + }, + { + "cell_type": "markdown", + "id": "1dc6dbf6", + "metadata": {}, + "source": [ + "### 4.1) Index MedCAT entities into `discharge_annotations`\n", + "\n", + "Here we take each MedCAT entity and store it in OpenSearch.\n", + "\n", + "We prefix MedCAT fields with `nlp.` and add `meta.note_id` / `meta.subject_id` so each entity stays linked to its source note." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86152c7b", + "metadata": {}, + "outputs": [], + "source": [ + "indexed = 0\n", + "now_ts = datetime.now(timezone.utc).isoformat()\n", + "\n", + "for i, ann in enumerate(annotations):\n", + " nlp_fields = {f\"nlp.{k}\": v for k, v in ann.items()}\n", + "\n", + " ann_doc = {\n", + " **nlp_fields,\n", + " \"meta.note_id\": note_id,\n", + " \"meta.subject_id\": subject_id,\n", + " \"timestamp\": now_ts,\n", + " }\n", + "\n", + " client.index(\n", + " index=annotations_index,\n", + " id=f\"{note_id}-ann-{i}\",\n", + " body=ann_doc,\n", + " refresh=False,\n", + " )\n", + " indexed += 1\n", + "\n", + "client.indices.refresh(index=annotations_index)\n", + "print(f\"indexed_annotations={indexed}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f0067cc1", + "metadata": {}, + "source": [ + "## 5) Search by concept\n", + "\n", + "Finally, we query `discharge_annotations` using the extracted concept (`nlp.cui` / `nlp.pretty_name`).\n", + "\n", + "This is the main value: instead of searching raw strings, we can retrieve notes by normalized clinical concepts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a6edeb8", + "metadata": {}, + "outputs": [], + "source": [ + "concept_cui = str(annotations[0].get(\"cui\", \"\"))\n", + "\n", + "concept_query = {\n", + " \"query\": {\n", + " \"term\": {\n", + " \"nlp.cui.keyword\": concept_cui\n", + " }\n", + " }\n", + "}\n", + "\n", + "concept_resp = client.search(index=annotations_index, body=concept_query)\n", + "concept_hits = concept_resp[\"hits\"][\"hits\"]\n", + "\n", + "print(f\"Concept CUI search used: {concept_cui}\")\n", + "print(f\"concept_hits={len(concept_hits)}\")\n", + "\n", + "display(pd.DataFrame([h.get(\"_source\", {}) for h in concept_hits]))" + ] + }, + { + "cell_type": "markdown", + "id": "28cf3ed2", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "You have now seen the full end-to-end CogStack flow in a few simple steps:\n", + "- index notes into OpenSearch\n", + "- run free-text and fuzzy search over clinical text\n", + "- call MedCAT to perform named entity resolution\n", + "- index entity outputs\n", + "- retrieve notes by normalized concept (CUI)\n", + "\n", + "This is the core building block for turning unstructured clinical text into searchable, analysable, and operational data.\n", + "\n", + "## What to do next\n", + "- **Visualise the data with OpenSearch Dashboards** \n", + " If you've setup with the CogStack Community Edition and are running on localhost, visit http://localhost:5601/ to now see reports and drill down on this data with the UI\n", + "\n", + "- **Scale this into production ETL** \n", + " Use these exact blocks in your pipelines: ingest note text -> index to OpenSearch -> call MedCAT -> index annotations -> query/serve downstream applications.\n", + "\n", + "- **Use a real MedCAT model** \n", + " Replace the demo model with a domain-appropriate model pack and configuration: [MedCAT v2 README](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/README.md).\n", + "\n", + "- **Explore the platform docs and examples** \n", + " See full docs at [docs.cogstack.org](https://docs.cogstack.org/) and repositories/examples at [github.com/CogStack](https://github.com/CogStack).\n", + "\n", + "- **Add supervised learning with MedCAT Trainer (MLOps)** \n", + " Set up a training and feedback loop to improve extraction quality over time using MedCAT Trainer (annotation -> train -> evaluate -> redeploy)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb index 09124d7..d439f90 100644 --- a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "a5f15845-5a0f-414e-9db7-f414d12bde48", "metadata": {}, "outputs": [], @@ -49,11 +49,11 @@ "sample_text = \"John was diagnosed with Kidney Failure\"\n", "\n", "medcat_base_url = os.getenv(\n", - " \"MEDCAT_URL\", \"http://cogstack-helm-ce-medcat-service:5000\"\n", + " \"MEDCAT_URL\", \"http://cogstack-medcat-service:5000\"\n", ").rstrip(\"/\")\n", "\n", "anoncat_base_url = os.getenv(\n", - " \"ANONCAT_URL\", \"http://cogstack-helm-ce-anoncat-service:5000\"\n", + " \"ANONCAT_URL\", \"http://cogstack-ce-anoncat-service:5000\"\n", ").rstrip(\"/\")\n", "\n", "medcat_url = medcat_base_url + \"/api/process\"\n", diff --git a/helm-charts/cogstack-helm-ce/provisioning/base_index_settings.json b/helm-charts/cogstack-helm-ce/provisioning/base_index_settings.json index 949426e..99d2b78 100644 --- a/helm-charts/cogstack-helm-ce/provisioning/base_index_settings.json +++ b/helm-charts/cogstack-helm-ce/provisioning/base_index_settings.json @@ -5,7 +5,9 @@ "emar", "icustays", "patients", - "poe" + "poe", + "discharge", + "discharge_annotations" ], "template": { "mappings": { diff --git a/helm-charts/cogstack-helm-ce/provisioning/dashboards.ndjson b/helm-charts/cogstack-helm-ce/provisioning/dashboards.ndjson index 4e241db..fedf08b 100644 --- a/helm-charts/cogstack-helm-ce/provisioning/dashboards.ndjson +++ b/helm-charts/cogstack-helm-ce/provisioning/dashboards.ndjson @@ -18,4 +18,5 @@ {"attributes":{"fields":"[{\"count\":0,\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_score\",\"type\":\"number\",\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_type\",\"type\":\"string\",\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"anchor_age\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"anchor_year\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"anchor_year_group\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"anchor_year_group.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"anchor_year_group\"}}},{\"count\":0,\"name\":\"comments\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":1,\"name\":\"dod\",\"type\":\"date\",\"esTypes\":[\"date\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"dose_val_rx\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"gen_dom\",\"type\":\"geo_point\",\"esTypes\":[\"geo_point\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"gen_loc\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"gen_loc.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"gen_loc\"}}},{\"count\":0,\"name\":\"gender\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"gender.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"gender\"}}},{\"count\":0,\"name\":\"subject_id\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"patients"},"id":"e6ff0d40-11f1-11ef-ad94-374d81247b5b","migrationVersion":{"index-pattern":"7.6.0"},"references":[],"type":"index-pattern","updated_at":"2024-05-24T17:32:47.161Z","version":"WzUwLDFd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Patient_Age","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Patient_Age\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"params\":{},\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"params\":{\"field\":\"anchor_age\",\"interval\":\"auto\",\"min_doc_count\":false,\"has_extended_bounds\":false,\"extended_bounds\":{\"max\":\"\",\"min\":\"\"},\"customLabel\":\"Age\"},\"schema\":\"segment\"}],\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"labels\":{\"filter\":true,\"show\":true,\"truncate\":100},\"position\":\"bottom\",\"scale\":{\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{},\"type\":\"category\"}],\"grid\":{\"categoryLines\":false},\"labels\":{\"show\":false},\"legendPosition\":\"right\",\"seriesParams\":[{\"data\":{\"id\":\"1\",\"label\":\"Count\"},\"drawLinesBetweenPoints\":true,\"lineWidth\":2,\"mode\":\"stacked\",\"show\":true,\"showCircles\":true,\"type\":\"histogram\",\"valueAxis\":\"ValueAxis-1\"}],\"thresholdLine\":{\"color\":\"#E7664C\",\"show\":false,\"style\":\"full\",\"value\":10,\"width\":1},\"times\":[],\"type\":\"histogram\",\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"labels\":{\"filter\":false,\"rotate\":0,\"show\":true,\"truncate\":100},\"name\":\"LeftAxis-1\",\"position\":\"left\",\"scale\":{\"mode\":\"normal\",\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{\"text\":\"Count\"},\"type\":\"value\"}]}}"},"id":"9babc1b0-185b-11ef-8167-970bdc38cc41","migrationVersion":{"visualization":"7.10.0"},"references":[{"id":"e6ff0d40-11f1-11ef-ad94-374d81247b5b","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2024-05-22T16:52:00.458Z","version":"WzQzLDFd"} {"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[]}"},"optionsJSON":"{\"hidePanelTitles\":false,\"useMargins\":true}","panelsJSON":"[{\"version\":\"2.19.0\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":15,\"i\":\"f3b28bcf-f3a0-4f97-93fe-2173dd6ae800\"},\"panelIndex\":\"f3b28bcf-f3a0-4f97-93fe-2173dd6ae800\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":15,\"i\":\"b745abf1-caed-4c86-9691-52debc610257\"},\"panelIndex\":\"b745abf1-caed-4c86-9691-52debc610257\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":0,\"y\":15,\"w\":12,\"h\":12,\"i\":\"ddb64d89-b811-41ce-92c1-25bab88c6d6b\"},\"panelIndex\":\"ddb64d89-b811-41ce-92c1-25bab88c6d6b\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":12,\"y\":15,\"w\":12,\"h\":12,\"i\":\"a970b0d9-dff5-4b87-bef4-1d7788e6a4b1\"},\"panelIndex\":\"a970b0d9-dff5-4b87-bef4-1d7788e6a4b1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":24,\"y\":15,\"w\":12,\"h\":12,\"i\":\"b857c534-28da-4a43-9c41-e36353aa86b3\"},\"panelIndex\":\"b857c534-28da-4a43-9c41-e36353aa86b3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":36,\"y\":15,\"w\":12,\"h\":12,\"i\":\"9b043ef6-98a1-4e08-9ed4-7989e7776939\"},\"panelIndex\":\"9b043ef6-98a1-4e08-9ed4-7989e7776939\",\"embeddableConfig\":{},\"panelRefName\":\"panel_5\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":0,\"y\":27,\"w\":26,\"h\":14,\"i\":\"c5b3a525-a1b8-4878-8bb1-61a852adb016\"},\"panelIndex\":\"c5b3a525-a1b8-4878-8bb1-61a852adb016\",\"embeddableConfig\":{},\"panelRefName\":\"panel_6\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":26,\"y\":27,\"w\":22,\"h\":14,\"i\":\"323fbcc4-6724-4767-8b5a-af0fe2111d16\"},\"panelIndex\":\"323fbcc4-6724-4767-8b5a-af0fe2111d16\",\"embeddableConfig\":{},\"panelRefName\":\"panel_7\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":0,\"y\":41,\"w\":24,\"h\":15,\"i\":\"b511c52f-1fd0-4391-bd3a-c34abc0c8f6e\"},\"panelIndex\":\"b511c52f-1fd0-4391-bd3a-c34abc0c8f6e\",\"embeddableConfig\":{},\"panelRefName\":\"panel_8\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":24,\"y\":41,\"w\":24,\"h\":15,\"i\":\"e88e9d91-452e-49a8-8d43-4b37b22c9444\"},\"panelIndex\":\"e88e9d91-452e-49a8-8d43-4b37b22c9444\",\"embeddableConfig\":{},\"panelRefName\":\"panel_9\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":0,\"y\":56,\"w\":24,\"h\":15,\"i\":\"55842b9a-3ae3-4313-9298-35693e14ef2a\"},\"panelIndex\":\"55842b9a-3ae3-4313-9298-35693e14ef2a\",\"embeddableConfig\":{\"vis\":null},\"panelRefName\":\"panel_10\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":24,\"y\":56,\"w\":24,\"h\":15,\"i\":\"71eeee2e-f551-4d2e-bdd6-6bf9fa578dd8\"},\"panelIndex\":\"71eeee2e-f551-4d2e-bdd6-6bf9fa578dd8\",\"embeddableConfig\":{},\"panelRefName\":\"panel_11\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":0,\"y\":71,\"w\":24,\"h\":15,\"i\":\"383dad7a-3c80-4afc-9835-443e576f7dc4\"},\"panelIndex\":\"383dad7a-3c80-4afc-9835-443e576f7dc4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_12\"},{\"version\":\"2.19.0\",\"gridData\":{\"x\":24,\"y\":71,\"w\":24,\"h\":15,\"i\":\"eac18176-42d0-463d-88a5-d52cbba3f21c\"},\"panelIndex\":\"eac18176-42d0-463d-88a5-d52cbba3f21c\",\"embeddableConfig\":{},\"panelRefName\":\"panel_13\"}]","timeRestore":false,"title":"Demo Dashboard","version":1},"id":"69908e20-e7a7-11ee-99af-876c5d67a178","migrationVersion":{"dashboard":"7.9.3"},"references":[{"id":"31302c60-16bf-11ef-9606-3fa2532fcc62","name":"panel_0","type":"visualization"},{"id":"d5b67c20-16b6-11ef-9606-3fa2532fcc62","name":"panel_1","type":"visualization"},{"id":"77448ac0-16bf-11ef-9606-3fa2532fcc62","name":"panel_2","type":"visualization"},{"id":"263f9ff0-1793-11ef-8167-970bdc38cc41","name":"panel_3","type":"visualization"},{"id":"77448ac0-16bf-11ef-9606-3fa2532fcc62","name":"panel_4","type":"visualization"},{"id":"c5ba7510-1792-11ef-8167-970bdc38cc41","name":"panel_5","type":"visualization"},{"id":"b3253970-1856-11ef-8167-970bdc38cc41","name":"panel_6","type":"visualization"},{"id":"ac17f0d0-185d-11ef-8167-970bdc38cc41","name":"panel_7","type":"visualization"},{"id":"0616a360-1390-11ef-9606-3fa2532fcc62","name":"panel_8","type":"map"},{"id":"821e5fc0-0d78-11ef-a513-a5083c720401","name":"panel_9","type":"map"},{"id":"584cd8d0-e7a7-11ee-99af-876c5d67a178","name":"panel_10","type":"visualization"},{"id":"92855e10-1857-11ef-8167-970bdc38cc41","name":"panel_11","type":"visualization"},{"id":"ced096e0-1858-11ef-8167-970bdc38cc41","name":"panel_12","type":"visualization"},{"id":"9babc1b0-185b-11ef-8167-970bdc38cc41","name":"panel_13","type":"visualization"}],"type":"dashboard","updated_at":"2025-07-08T13:47:18.459Z","version":"WzEwMywyMl0="} -{"exportedCount":20,"missingRefCount":0,"missingReferences":[]} \ No newline at end of file +{"attributes":{"fields":"[{\"count\":1,\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_score\",\"type\":\"number\",\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_type\",\"type\":\"string\",\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"charttime\",\"type\":\"date\",\"esTypes\":[\"date\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"comments\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"dose_val_rx\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"hadm_id\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":1,\"name\":\"note_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"note_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"note_id\"}}},{\"count\":0,\"name\":\"note_seq\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"note_type\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"note_type.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"note_type\"}}},{\"count\":1,\"name\":\"storetime\",\"type\":\"date\",\"esTypes\":[\"date\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":2,\"name\":\"subject_id\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":2,\"name\":\"text\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"text.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"text\"}}}]","title":"discharge"},"id":"e4eb4e60-063e-11ef-a513-a5083c720401","migrationVersion":{"index-pattern":"7.6.0"},"references":[],"type":"index-pattern","updated_at":"2025-07-04T13:16:09.804Z","version":"Wzk5LDIyXQ=="} +{"attributes":{"fields":"[{\"count\":0,\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_score\",\"type\":\"number\",\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"_type\",\"type\":\"string\",\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"comments\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"dose_val_rx\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"enrich_top_level_concept\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"meta.note_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"meta.note_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"meta.note_id\"}}},{\"count\":0,\"name\":\"meta.subject_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"meta.subject_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"meta.subject_id\"}}},{\"count\":0,\"name\":\"nlp.acc\",\"type\":\"number\",\"esTypes\":[\"float\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"nlp.context_similarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"nlp.cui\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.cui.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.cui\"}}},{\"count\":0,\"name\":\"nlp.detected_name\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.detected_name.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.detected_name\"}}},{\"count\":0,\"name\":\"nlp.end\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"nlp.icd10\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.icd10.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.icd10\"}}},{\"count\":0,\"name\":\"nlp.id\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":1,\"name\":\"nlp.meta_anns.Presence.confidence\",\"type\":\"number\",\"esTypes\":[\"float\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":1,\"name\":\"nlp.meta_anns.Presence.name\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.meta_anns.Presence.name.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.meta_anns.Presence.name\"}}},{\"count\":1,\"name\":\"nlp.meta_anns.Presence.value\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.meta_anns.Presence.value.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.meta_anns.Presence.value\"}}},{\"count\":0,\"name\":\"nlp.meta_anns.Subject.confidence\",\"type\":\"number\",\"esTypes\":[\"float\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"nlp.meta_anns.Subject.name\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.meta_anns.Subject.name.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.meta_anns.Subject.name\"}}},{\"count\":0,\"name\":\"nlp.meta_anns.Subject.value\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.meta_anns.Subject.value.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.meta_anns.Subject.value\"}}},{\"count\":0,\"name\":\"nlp.meta_anns.Time.confidence\",\"type\":\"number\",\"esTypes\":[\"float\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"nlp.meta_anns.Time.name\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.meta_anns.Time.name.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.meta_anns.Time.name\"}}},{\"count\":0,\"name\":\"nlp.meta_anns.Time.value\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.meta_anns.Time.value.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.meta_anns.Time.value\"}}},{\"count\":0,\"name\":\"nlp.ontologies\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.ontologies.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.ontologies\"}}},{\"count\":0,\"name\":\"nlp.pretty_name\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.pretty_name.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.pretty_name\"}}},{\"count\":0,\"name\":\"nlp.source_value\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.source_value.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.source_value\"}}},{\"count\":0,\"name\":\"nlp.start\",\"type\":\"number\",\"esTypes\":[\"long\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"count\":0,\"name\":\"nlp.type_ids\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.type_ids.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.type_ids\"}}},{\"count\":0,\"name\":\"nlp.types\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"nlp.types.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"nlp.types\"}}},{\"count\":0,\"name\":\"service_model\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"service_model.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"service_model\"}}},{\"count\":0,\"name\":\"service_version\",\"type\":\"string\",\"esTypes\":[\"text\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"count\":0,\"name\":\"service_version.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"service_version\"}}},{\"count\":0,\"name\":\"timestamp\",\"type\":\"date\",\"esTypes\":[\"date\"],\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"discharge_annotations"},"id":"5e5f7820-003f-11f0-922f-1b0cde0c046d","migrationVersion":{"index-pattern":"7.6.0"},"references":[],"type":"index-pattern","updated_at":"2025-11-21T14:33:42.793Z","version":"WzEwOCwzMl0="} \ No newline at end of file diff --git a/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py b/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py index 46c924b..389a38e 100644 --- a/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py +++ b/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py @@ -39,6 +39,8 @@ "icustays", "patients", "poe", + "discharge", + "discharge_annotations", ) @@ -301,6 +303,205 @@ def emar_id(self) -> str: return f"{self.subject_id}-{self.emar_seq}" +@dataclass(frozen=True) +class DischargeNote: + note_id: str + subject_id: int + hadm_id: int + note_type: str + note_seq: int + charttime: datetime + storetime: datetime + text: str + + +def make_discharge_notes(rng: random.Random, admissions: Sequence[Admission]) -> List[DischargeNote]: + notes: List[DischargeNote] = [] + for adm in admissions: + note_type = "DS" + note_seq = bounded_int(rng, 1, 20) + note_id = f"{adm.subject_id}-{note_type}-{note_seq}" + + charttime = rand_datetime(rng, adm.admittime, adm.dischtime, resolution_seconds=60) + storetime = min(adm.dischtime, charttime + timedelta(hours=bounded_int(rng, 0, 24), minutes=bounded_int(rng, 0, 59))) + + # Synthetic discharge-note-like template (non-derivative, intentionally generic). + problems = ( + "Chest pain", + "Shortness of breath", + "Abdominal pain", + "Dizziness", + "Headache", + "Fever", + "Fatigue", + ) + services = ("MEDICINE", "SURGERY", "CARDIOLOGY", "NEUROLOGY", "RESPIRATORY", "RENAL") + meds = ("acetaminophen", "heparin", "pantoprazole", "insulin", "furosemide", "saline flush") + diag = choose(rng, problems) + service = choose(rng, services) + discharge_to = adm.discharge_location + med_list = ", ".join(rng.sample(meds, k=bounded_int(rng, 2, 4))) + + text = ( + "\n" + "Discharge Summary\n" + "=================\n" + f"Subject: {adm.subject_id}\n" + f"Admission ID: {adm.hadm_id}\n" + f"Service: {service}\n" + "\n" + "Chief Complaint:\n" + f"{diag}\n" + "\n" + "Hospital Course:\n" + "The patient was evaluated and treated during this admission. Symptoms improved with supportive care.\n" + "Vital signs remained stable. No acute complications were documented.\n" + "\n" + "Discharge Diagnoses:\n" + f"- {diag}\n" + "\n" + "Discharge Medications:\n" + f"- {med_list}\n" + "\n" + "Follow-up:\n" + "- Primary care follow-up in 1-2 weeks.\n" + "- Return to care if symptoms worsen.\n" + "\n" + "Disposition:\n" + f"{discharge_to}\n" + "\n" + ) + + notes.append( + DischargeNote( + note_id=note_id, + subject_id=adm.subject_id, + hadm_id=adm.hadm_id, + note_type=note_type, + note_seq=note_seq, + charttime=charttime, + storetime=storetime, + text=text, + ) + ) + return notes + + +def discharge_doc(n: DischargeNote) -> Dict[str, object]: + return { + "note_id": n.note_id, + "subject_id": n.subject_id, + "hadm_id": n.hadm_id, + "note_type": n.note_type, + "note_seq": n.note_seq, + "charttime": fmt_dt(n.charttime), + "storetime": fmt_dt(n.storetime), + "text": n.text, + } + + +def _rand_timestamp_utc_iso(rng: random.Random, base: datetime) -> str: + # Example: 2024-04-13T16:17:02.103+00:00 + dt = base + timedelta(seconds=bounded_int(rng, -3600 * 24 * 30, 3600 * 24 * 30)) + ms = bounded_int(rng, 0, 999) + return dt.strftime("%Y-%m-%dT%H:%M:%S") + f".{ms:03d}+00:00" + + +def make_discharge_annotations(rng: random.Random, notes: Sequence[DischargeNote]) -> List[Dict[str, object]]: + ann: List[Dict[str, object]] = [] + pretty_names = ( + "Usage", + "Dose", + "Route", + "Frequency", + "Condition", + "Procedure", + "Symptom", + ) + detected_terms = ( + "use", + "dose", + "oral", + "daily", + "pain", + "fever", + "follow-up", + "medication", + ) + # Keep ontology/model identifiers generic (avoid licensed/sensitive names). + ontologies = ( + "ONTOLOGY_A", + "ONTOLOGY_B", + "ONTOLOGY_C", + "ONTOLOGY_D", + ) + service_models = ("demo_model_a", "demo_model_b", "demo_model_c") + service_versions = ("1.10.2", "1.11.0", "2.0.0") + + for n in notes: + # Seeded random 1–20 annotations per note. + per_note = bounded_int(rng, 1, 20) + text_len = len(n.text) + for _ in range(per_note): + # Pick a safe span inside the text. + if text_len < 10: + start = 0 + end = text_len + else: + start = bounded_int(rng, 0, max(0, text_len - 2)) + span = bounded_int(rng, 1, min(25, max(1, text_len - start))) + end = min(text_len, start + span) + if end <= start: + end = min(text_len, start + 1) + + detected = choose(rng, detected_terms) + pretty = choose(rng, pretty_names) + cui = str(bounded_int(rng, 100_000_000, 999_999_999)) + + doc: Dict[str, object] = { + "nlp.cui": cui, + "enrich_top_level_concept": bounded_int(rng, 100_000, 999_999_999), + "nlp.pretty_name": pretty, + "nlp.end": end, + "nlp.types": [""], + "nlp.detected_name": detected, + "nlp.meta_anns": { + "Presence": { + "confidence": rng.random(), + "name": "Presence", + "value": choose(rng, ("Present", "Absent", "Hypothetical")), + }, + "Time": { + "confidence": rng.random(), + "name": "Time", + "value": choose(rng, ("Recent", "Historical", "Planned")), + }, + "Subject": { + "confidence": rng.random(), + "name": "Subject", + "value": choose(rng, ("Patient", "Family", "Clinician")), + }, + }, + "service_version": choose(rng, service_versions), + "nlp.start": start, + "nlp.source_value": detected, + "nlp.id": bounded_int(rng, 1, 10_000), + "meta.note_id": n.note_id, + "service_model": choose(rng, service_models), + "meta.subject_id": str(n.subject_id), + "nlp.icd10": [], + "nlp.snomed": [], + "nlp.acc": rng.random(), + "nlp.type_ids": [str(bounded_int(rng, 1_000_000, 99_999_999))], + "nlp.context_similarity": rng.random(), + "nlp.ontologies": [choose(rng, ontologies)], + "timestamp": _rand_timestamp_utc_iso(rng, n.storetime), + } + ann.append(doc) + + return ann + + def make_patients(rng: random.Random, n: int) -> List[Patient]: patients: List[Patient] = [] for subject_id in range(n): @@ -637,6 +838,8 @@ def iter_bulk_rows( icustays: Sequence[IcuStay], patients: Sequence[Patient], poe: Sequence[PoeOrder], + discharge: Sequence[DischargeNote], + discharge_annotations: Sequence[Dict[str, object]], ) -> Iterator[Row]: # Deterministic order by index, with _id 1..N per index. for i, a in enumerate(admissions, start=1): @@ -651,6 +854,10 @@ def iter_bulk_rows( yield ("patients", str(i), patient_doc(p)) for i, o in enumerate(poe, start=1): yield ("poe", str(i), poe_doc(o)) + for i, n in enumerate(discharge, start=1): + yield ("discharge", str(i), discharge_doc(n)) + for i, d in enumerate(discharge_annotations, start=1): + yield ("discharge_annotations", str(i), d) def write_bulk_ndjson(path: Path, rows: Iterable[Row]) -> None: @@ -662,10 +869,10 @@ def write_bulk_ndjson(path: Path, rows: Iterable[Row]) -> None: f.write(json.dumps(doc, ensure_ascii=False) + "\n") -def validate_bulk_ndjson(path: Path, expected_n: int) -> None: +def validate_bulk_ndjson(path: Path, expected_counts: Dict[str, int]) -> None: # Lightweight structural validation: alternating meta/doc, correct index names, correct counts. - expected_lines = 2 * (len(INDEX_ORDER) * expected_n) - index_counts: Dict[str, int] = {idx: 0 for idx in INDEX_ORDER} + expected_lines = 2 * sum(expected_counts.values()) + index_counts: Dict[str, int] = {idx: 0 for idx in expected_counts.keys()} with path.open("r", encoding="utf-8") as f: lines = f.readlines() @@ -685,19 +892,33 @@ def validate_bulk_ndjson(path: Path, expected_n: int) -> None: raise SystemExit(f"Validation failed: doc is not an object at line {i+2}") index_counts[idx] += 1 - for idx, count in index_counts.items(): - if count != expected_n: - raise SystemExit(f"Validation failed: index '{idx}' expected {expected_n} docs, got {count}") - - -def build_dataset(rng: random.Random, n: int) -> Tuple[List[Patient], List[Admission], List[IcuStay], List[PoeOrder], List[EmarEvent], List[dict]]: + for idx, expected in expected_counts.items(): + got = index_counts.get(idx, 0) + if got != expected: + raise SystemExit(f"Validation failed: index '{idx}' expected {expected} docs, got {got}") + + +def build_dataset( + rng: random.Random, n: int +) -> Tuple[ + List[Patient], + List[Admission], + List[IcuStay], + List[PoeOrder], + List[EmarEvent], + List[dict], + List[DischargeNote], + List[Dict[str, object]], +]: patients = make_patients(rng, n) admissions = make_admissions(rng, patients) icustays = make_icustays(rng, admissions) poe_orders = make_poe_orders(rng, admissions) emar_events = make_emar_events(rng, admissions, poe_orders) drg_docs = make_drgcodes(rng, admissions) - return patients, admissions, icustays, poe_orders, emar_events, drg_docs + discharge_notes = make_discharge_notes(rng, admissions) + discharge_anns = make_discharge_annotations(rng, discharge_notes) + return patients, admissions, icustays, poe_orders, emar_events, drg_docs, discharge_notes, discharge_anns def parse_args(argv: Sequence[str]) -> argparse.Namespace: @@ -721,7 +942,7 @@ def main(argv: Sequence[str]) -> int: raise SystemExit("--n must be > 0") rng = random.Random(args.seed) - patients, admissions, icustays, poe_orders, emar_events, drg_docs = build_dataset(rng, args.n) + patients, admissions, icustays, poe_orders, emar_events, drg_docs, discharge_notes, discharge_anns = build_dataset(rng, args.n) rows = iter_bulk_rows( admissions=admissions, @@ -730,11 +951,23 @@ def main(argv: Sequence[str]) -> int: icustays=icustays, patients=patients, poe=poe_orders, + discharge=discharge_notes, + discharge_annotations=discharge_anns, ) write_bulk_ndjson(args.out, rows) if args.validate: - validate_bulk_ndjson(args.out, args.n) + expected_counts: Dict[str, int] = { + "admissions": args.n, + "drgcodes": args.n, + "emar": args.n, + "icustays": args.n, + "patients": args.n, + "poe": args.n, + "discharge": len(discharge_notes), + "discharge_annotations": len(discharge_anns), + } + validate_bulk_ndjson(args.out, expected_counts) print(f"Completed synthetic data genration. File written to {args.out}") return 0 From 9ccc1b6467ed2dd59b008b9f67920d35648d1c02 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 26 Mar 2026 17:51:07 +0000 Subject: [PATCH 2/3] feat(helm): Create Opensearch E2E tutorial for cogstack CE --- .../examples/medcat-opensearch-e2e.ipynb | 383 +++++++++++++++++- 1 file changed, 365 insertions(+), 18 deletions(-) diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb index 8c337cd..8a01f38 100644 --- a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb @@ -44,10 +44,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "id": "17deaa5c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/py311/lib/python3.11/site-packages/opensearchpy/connection/http_urllib3.py:214: UserWarning: Connecting to https://opensearch-cluster-master:9200 using SSL with verify_certs=False is insecure.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "import os\n", "from datetime import datetime, timezone\n", @@ -68,7 +77,7 @@ "\n", "opensearch_url = os.getenv(\"OPENSEARCH_URL\", \"https://opensearch-cluster-master:9200\")\n", "opensearch_username = os.getenv(\"OPENSEARCH_USERNAME\", \"admin\")\n", - "opensearch_password = os.getenv(\"OPENSEARCH_PASSWORD\", \"admin\")\n", + "opensearch_password = os.getenv(\"OPENSEARCH_PASSWORD\", \"opensearch-312$A\")\n", "\n", "parsed = urlparse(opensearch_url)\n", "\n", @@ -108,10 +117,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "id": "3eb85731", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'_index': 'discharge',\n", + " '_id': 'demo-note-kidney-failure-001',\n", + " '_version': 10,\n", + " 'result': 'updated',\n", + " 'forced_refresh': True,\n", + " '_shards': {'total': 2, 'successful': 1, 'failed': 0},\n", + " '_seq_no': 1009,\n", + " '_primary_term': 1}" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "note_doc = {\n", " \"note_id\": note_id,\n", @@ -137,10 +164,70 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "id": "25613475", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Free-text query used: John kidney\n", + "This still returns the note even though it is not an exact full sentence match.\n", + "Results from OpenSearch free-text search:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
note_idsubject_idtextstoretime
0demo-note-kidney-failure-0011John was diagnosed with Kidney Failure2026-03-26 17:49:59
\n", + "
" + ], + "text/plain": [ + " note_id subject_id \\\n", + "0 demo-note-kidney-failure-001 1 \n", + "\n", + " text storetime \n", + "0 John was diagnosed with Kidney Failure 2026-03-26 17:49:59 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "query_text = \"John kidney\"\n", "free_text_resp = client.search(\n", @@ -168,10 +255,69 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 70, "id": "595a313e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fuzzy query: kidny falur\n", + "fuzzy_hits=1\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
note_idsubject_idtextstoretime
0demo-note-kidney-failure-0011John was diagnosed with Kidney Failure2026-03-26 17:49:59
\n", + "
" + ], + "text/plain": [ + " note_id subject_id \\\n", + "0 demo-note-kidney-failure-001 1 \n", + "\n", + " text storetime \n", + "0 John was diagnosed with Kidney Failure 2026-03-26 17:49:59 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fuzzy_query = \"kidny falur\"\n", "fuzzy_resp = client.search(\n", @@ -212,10 +358,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "id": "f1428daf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "annotations_found=1\n", + "Results from MedCAT named entity extraction:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pretty_namecuitype_idssource_valuedetected_nameacccontext_similaritystartendidmeta_annscontext_leftcontext_centercontext_right
0Kidney Failure1[T047]Kidney Failurekidney~failure1124380{}[][][]
\n", + "
" + ], + "text/plain": [ + " pretty_name cui type_ids source_value detected_name acc \\\n", + "0 Kidney Failure 1 [T047] Kidney Failure kidney~failure 1 \n", + "\n", + " context_similarity start end id meta_anns context_left context_center \\\n", + "0 1 24 38 0 {} [] [] \n", + "\n", + " context_right \n", + "0 [] " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "medcat_payload = {\"content\": {\"text\": sample_text}}\n", "medcat_result = requests.post(medcat_url, json=medcat_payload, timeout=30).json()\n", @@ -246,10 +474,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 72, "id": "86152c7b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "indexed_annotations=1\n" + ] + } + ], "source": [ "indexed = 0\n", "now_ts = datetime.now(timezone.utc).isoformat()\n", @@ -290,10 +526,104 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "id": "3a6edeb8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concept CUI search used: 1\n", + "concept_hits=1\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nlp.pretty_namenlp.cuinlp.type_idsnlp.source_valuenlp.detected_namenlp.accnlp.context_similaritynlp.startnlp.endnlp.idnlp.meta_annsnlp.context_leftnlp.context_centernlp.context_rightmeta.note_idmeta.subject_idtimestamp
0Kidney Failure1[T047]Kidney Failurekidney~failure1124380{}[][][]demo-note-kidney-failure-00112026-03-26T17:50:17.140165+00:00
\n", + "
" + ], + "text/plain": [ + " nlp.pretty_name nlp.cui nlp.type_ids nlp.source_value nlp.detected_name \\\n", + "0 Kidney Failure 1 [T047] Kidney Failure kidney~failure \n", + "\n", + " nlp.acc nlp.context_similarity nlp.start nlp.end nlp.id nlp.meta_anns \\\n", + "0 1 1 24 38 0 {} \n", + "\n", + " nlp.context_left nlp.context_center nlp.context_right \\\n", + "0 [] [] [] \n", + "\n", + " meta.note_id meta.subject_id \\\n", + "0 demo-note-kidney-failure-001 1 \n", + "\n", + " timestamp \n", + "0 2026-03-26T17:50:17.140165+00:00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "concept_cui = str(annotations[0].get(\"cui\", \"\"))\n", "\n", @@ -346,18 +676,35 @@ "- **Add supervised learning with MedCAT Trainer (MLOps)** \n", " Set up a training and feedback loop to improve extraction quality over time using MedCAT Trainer (annotation -> train -> evaluate -> redeploy)." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "207e2a36-72b2-4906-a304-4dae0836a5a2", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From fa1d071e4b965fab4199802061ddfeca077b48bf Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 26 Mar 2026 18:06:12 +0000 Subject: [PATCH 3/3] feat(helm): Create Opensearch E2E tutorial for cogstack CE - add to docs --- .../tutorial/end-to-end-jupyterhub.md | 18 ++++++++----- docs/mkdocs.yml | 3 ++- docs/scripts/copy_files_from_repo.py | 7 +++-- .../examples/medcat-opensearch-e2e.ipynb | 27 +++++++++---------- .../examples/medcat-service-tutorial.ipynb | 11 +++++--- 5 files changed, 38 insertions(+), 28 deletions(-) diff --git a/docs/docs/cogstack-ce/tutorial/end-to-end-jupyterhub.md b/docs/docs/cogstack-ce/tutorial/end-to-end-jupyterhub.md index 35b3402..f0b53f2 100644 --- a/docs/docs/cogstack-ce/tutorial/end-to-end-jupyterhub.md +++ b/docs/docs/cogstack-ce/tutorial/end-to-end-jupyterhub.md @@ -13,7 +13,10 @@ By the end, you will have completed an end-to-end user flow: !!! tip The following tutorial will use your CogStack CE installation and let you run real code against your environment. - To see a non-interactive version of the tutorial notebook, refer to [the MedCAT Service Tutorial notebook](../../platform/cogstack-ai/medcat-service-tutorial.ipynb). + To see a non-interactive version of the tutorial notebook, refer to: + + - [the MedCAT Service Tutorial notebook](../../platform/cogstack-ai/medcat-service-tutorial.ipynb). + - [the OpenSearch E2E notebook](./medcat-opensearch-e2e.ipynb). ## Before you start @@ -46,11 +49,12 @@ Log in with: After login, JupyterLab opens for your user. -## Step 3: Open the bundled notebook +## Step 3: Open the bundled notebooks -The chart includes an example notebook: +The chart includes example notebooks to interact with CogStack CE: - `medcat-service-tutorial.ipynb` +- `medcat-opensearch-e2e.ipynb` You can open it directly: @@ -62,10 +66,11 @@ Or navigate to it in JupyterLab and click to open it. Run each cell in order from top to bottom. -The notebook demonstrates service calls to: +The notebooks demonstrates service calls to: - `medcat-service` at `/api/process` for named entity extraction - `anoncat-service` at `/api/process` for de-identification +- `OpenSearch` for indexing and searching data. It uses environment variables for service URLs where available, so the default CogStack CE setup should work without edits. @@ -83,13 +88,12 @@ If those outputs appear, you have validated the full end-to-end flow from Jupyte - If JupyterHub does not load, ensure port-forwarding is running. - If notebook requests fail, verify the cluster services are up and re-run: - - `helm get notes | bash` -- For production deployments, replace dummy authentication with secure auth configuration. +- For production deployments, replace dummy authentication with secure auth configuration. ## Next Steps - See the [full deployment documentation](../../platform/deployment/_index.md) for more details on scaling, production security, and advanced configuration options. - See full install instructions of the cogstack CE chart[CogStack CE Helm chart (install + customization)](../../platform/deployment/helm/charts/cogstack-ce-helm.md) -- See further tutorials on medcat on [GitHub](https://github.com/CogStack/cogstack-nlp/tree/79f00cfc204f4ae559b56c8e397bbcaf82d44274/medcat-v2-tutorials) \ No newline at end of file +- See further tutorials on medcat on [GitHub](https://github.com/CogStack/cogstack-nlp/tree/79f00cfc204f4ae559b56c8e397bbcaf82d44274/medcat-v2-tutorials) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index d24429b..dfbdbfd 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -39,7 +39,8 @@ nav: - Overview: cogstack-ce/_index.md - Quickstart: cogstack-ce/tutorial/quickstart-installation.md - Tutorials: - - End To End Tutorial: cogstack-ce/tutorial/end-to-end-jupyterhub.md + - Using JupyterHub: cogstack-ce/tutorial/end-to-end-jupyterhub.md + - End To End Tutorial: cogstack-ce/tutorial/medcat-opensearch-e2e.ipynb - CogStack AI: - CogStack AI: cogstack-ai/index.md - Natural Language Processing: overview/Natural Language Processing.md diff --git a/docs/scripts/copy_files_from_repo.py b/docs/scripts/copy_files_from_repo.py index 014eb96..2a36454 100644 --- a/docs/scripts/copy_files_from_repo.py +++ b/docs/scripts/copy_files_from_repo.py @@ -24,7 +24,11 @@ { "sourceFilePath": "helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb", "outputFilePath": "platform/cogstack-ai/medcat-service-tutorial.ipynb", - } + }, + { + "sourceFilePath": "helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb", + "outputFilePath": "cogstack-ce/tutorial/medcat-opensearch-e2e.ipynb", + }, ] @@ -46,4 +50,3 @@ def main() -> None: main() - diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb index 8a01f38..845c943 100644 --- a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-opensearch-e2e.ipynb @@ -11,17 +11,22 @@ "\n", "It is an end to end example of how you can use CogStack to unlock the power of your healthcare data.\n", "\n", - "## Who this is for\n", + "## Overview\n", + "\n", + "### Who this is for\n", "This is for developers, data engineers, and analysts who want to see a practical example of how CogStack, MedCAT and Opensearch can be integrated to let you perform advanced search on your notes. \n", "\n", - "## What this notebook does\n", + "### What this notebook does\n", "1. Index one sample note into `discharge`\n", "2. Search that note back using free text\n", "3. Search that note back even when we have typos\n", "4. Perform Named Entity Resolution (NER) by calling MedCAT Service and index them\n", "5. Search notes by concept\n", "\n", - "The goal is to show that this process is straightforward: call one API, index results, and query them." + "The goal is to show that this process is straightforward: call one API, index results, and query them.\n", + "\n", + "### Prerequisites\n", + "The best way to run this notebook interactively is to run the CogStack Community Edition with Helm. Look at https://docs.cogstack.org/ to get started." ] }, { @@ -29,7 +34,7 @@ "id": "875a64db", "metadata": {}, "source": [ - "# Initialisation: Define the inputs and services\n", + "## Initialisation: Define the inputs and services\n", "\n", "### Input Data\n", "We define a short input for this tutorial. This represents your free text patient data, for example a discharge summary or long doctors note.\n", @@ -465,9 +470,9 @@ "id": "1dc6dbf6", "metadata": {}, "source": [ - "### 4.1) Index MedCAT entities into `discharge_annotations`\n", + "### 4.1) Index MedCAT entities into OpenSearch\n", "\n", - "Here we take each MedCAT entity and store it in OpenSearch.\n", + "Here we take each MedCAT entity and store it in OpenSearch in the `discharge_annotations` index\n", "\n", "We prefix MedCAT fields with `nlp.` and add `meta.note_id` / `meta.subject_id` so each entity stays linked to its source note." ] @@ -676,14 +681,6 @@ "- **Add supervised learning with MedCAT Trainer (MLOps)** \n", " Set up a training and feedback loop to improve extraction quality over time using MedCAT Trainer (annotation -> train -> evaluate -> redeploy)." ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "207e2a36-72b2-4906-a304-4dae0836a5a2", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -707,4 +704,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb index d439f90..da883e9 100644 --- a/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/examples/medcat-service-tutorial.ipynb @@ -10,16 +10,21 @@ "This is a step-by-step walkthrough that shows how to call two CogStack services over HTTP:\n", "MedCAT (entity extraction) and AnonCAT (de-identification).\n", "\n", - "## Who it is for:\n", + "## Overview\n", + "\n", + "### Who it is for:\n", "This is for developers, data engineers, and analysts who want a quick, practical example of how\n", "to integrate MedCAT/AnonCAT into a Python workflow (and later into a notebook-based analysis).\n", "\n", - "## What it will do:\n", + "### What it will do:\n", "1) Define a sample clinical sentence and the service URLs.\n", "2) Extract Entities, by calling the medcat-service API\n", "3) Print the extracted entity annotations from the MedCAT response.\n", "4) Deidentify text by calling the anoncat-service API\n", - "5) Print the de-identified text (and show the full JSON response for inspection).\n" + "5) Print the de-identified text (and show the full JSON response for inspection).\n", + "\n", + "### Prerequisites\n", + "The best way to run this notebook interactively is to run the CogStack Community Edition with Helm. Look at https://docs.cogstack.org/ to get started." ] }, {