diff --git a/.dockerignore b/.dockerignore index 4093c0a..3d422e8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -147,11 +147,4 @@ data/ reports/ # Synthetic data conversations -src/agents/utils/example_inputs/ -src/agents/utils/synthetic_conversations/ -src/agents/utils/synthetic_conversation_generation.py -src/agents/utils/testbench_prompts.py -src/agents/utils/langgraph_viz.py - -# development agents -src/agents/student_agent/ \ No newline at end of file +src/agents/utils/example_inputs/ \ No newline at end of file diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 1191a86..1236ba3 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -50,6 +50,7 @@ jobs: if: always() run: | source .venv/bin/activate + export PYTHONPATH=$PYTHONPATH:. pytest --junit-xml=./reports/pytest.xml --tb=auto -v - name: Upload test results diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8400ca3..1da0493 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -50,6 +50,7 @@ jobs: if: always() run: | source .venv/bin/activate + export PYTHONPATH=$PYTHONPATH:. pytest --junit-xml=./reports/pytest.xml --tb=auto -v - name: Upload test results diff --git a/.gitignore b/.gitignore index 4b52234..11f861b 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +reports/ # Translations *.mo diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..514764f --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,88 @@ +# AGENTS.md + +This file provides guidance to AI agents when working with code in this repository. + +## Project Overview + +This is a boilerplate for creating AI educational chatbots that integrate with the **Lambda-Feedback** educational platform. It deploys as an AWS Lambda function (containerized via Docker) that receives student chat messages with educational context and returns LLM-powered chatbot responses. + +## Commands + +**Testing:** +```bash +pytest # Run all unit tests +python tests/manual_agent_run.py # Test agent locally with example inputs +python tests/manual_agent_requests.py # Test running Docker container +``` + +**Docker:** +```bash +docker build -t llm_chat . +docker run --env-file .env -p 8080:8080 llm_chat +``` + +**Manual API test (while Docker is running):** +```bash +curl -X POST http://localhost:8080/2015-03-31/functions/function/invocations \ + -H 'Content-Type: application/json' \ + -d '{"body":"{\"conversationId\": \"12345Test\", \"messages\": [{\"role\": \"USER\", \"content\": \"hi\"}], \"user\": {\"type\": \"LEARNER\"}}"}' +``` + +**Run a single test:** +```bash +pytest tests/test_module.py # Run specific test file +pytest tests/test_index.py::test_function_name # Run specific test +``` + +## Architecture + +### Request Flow + +``` +Lambda event → index.py (handler) + → validates via lf_toolkit ChatRequest schema + → src/module.py (chat_module) + → extracts muEd API context (messages, conversationId, question context, user type) + → parses educational context to prompt text via src/agent/context.py + → src/agent/agent.py (BaseAgent / LangGraph) + → routes to call_llm or summarize_conversation node + → calls LLM provider (OpenAI / Google / Azure / Ollama) + → returns ChatResponse (output, summary, conversationalStyle, processingTime) +``` + +### Key Files + +| File | Role | +|------|------| +| `index.py` | AWS Lambda entry point; parses event body, validates schema | +| `src/module.py` | Transforms muEd API request → invokes agent → builds ChatResponse | +| `src/agent/agent.py` | LangGraph stateful graph; manages message history and summarization | +| `src/agent/prompts.py` | System prompts for tutor behavior, summarization, style detection | +| `src/agent/llm_factory.py` | Factory classes for each LLM provider (OpenAI, Google, Azure, Ollama) | +| `src/agent/context.py` | Converts muEd question/submission context dicts to LLM prompt text | +| `tests/utils.py` | Shared test helpers: `assert_valid_chat_request`, `assert_valid_chat_response` | +| `tests/example_inputs/` | Real muEd payloads used for end-to-end tests | + +### Agent Logic (LangGraph) + +`BaseAgent` maintains a state graph with two nodes: +- **`call_llm`**: Invokes the LLM with system prompt + conversation summary + conversational style preference +- **`summarize_conversation`**: Triggered when message count exceeds ~11; summarizes history and also extracts the student's preferred conversational style + +Messages are trimmed after summarization to keep context window manageable. The `summary` and `conversationalStyle` fields persist across calls via the `ChatRequest` metadata. + +### muEd API Format + +`src/module.py` handles the muEd request format (https://mued.org/). The `context` field in `ChatRequest` contains nested educational data (question parts, student submissions, task info) that gets parsed into a tutoring prompt via `src/agent/context.py`. + +### LLM Configuration + +LLM provider and model are set via environment variables (see `.env.example`). The `llm_factory.py` selects the provider at runtime. The Lambda function name/identity is set in `config.json`. + +The agent uses **two separate LLM instances** — `self.llm` for chat responses and `self.summarisation_llm` for conversation summarisation and style analysis. By default both use the same provider, but you can point them at different models (e.g. a cheaper model for summarisation) by changing the class in `agent.py`. + +## Deployment + +- Pushing to `dev` branch triggers the dev deployment GitHub Actions workflow +- Pushing to `main` triggers staging deployment, with manual approval required for production +- All environment variables (API keys, model names) are injected via GitHub Actions secrets/variables — do not hardcode them diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9c8ebda --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,88 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is a boilerplate for creating AI educational chatbots that integrate with the **Lambda-Feedback** educational platform. It deploys as an AWS Lambda function (containerized via Docker) that receives student chat messages with educational context and returns LLM-powered chatbot responses. + +## Commands + +**Testing:** +```bash +pytest # Run all unit tests +python tests/manual_agent_run.py # Test agent locally with example inputs +python tests/manual_agent_requests.py # Test running Docker container +``` + +**Docker:** +```bash +docker build -t llm_chat . +docker run --env-file .env -p 8080:8080 llm_chat +``` + +**Manual API test (while Docker is running):** +```bash +curl -X POST http://localhost:8080/2015-03-31/functions/function/invocations \ + -H 'Content-Type: application/json' \ + -d '{"body":"{\"conversationId\": \"12345Test\", \"messages\": [{\"role\": \"USER\", \"content\": \"hi\"}], \"user\": {\"type\": \"LEARNER\"}}"}' +``` + +**Run a single test:** +```bash +pytest tests/test_module.py # Run specific test file +pytest tests/test_index.py::test_function_name # Run specific test +``` + +## Architecture + +### Request Flow + +``` +Lambda event → index.py (handler) + → validates via lf_toolkit ChatRequest schema + → src/module.py (chat_module) + → extracts muEd API context (messages, conversationId, question context, user type) + → parses educational context to prompt text via src/agent/context.py + → src/agent/agent.py (BaseAgent / LangGraph) + → routes to call_llm or summarize_conversation node + → calls LLM provider (OpenAI / Google / Azure / Ollama) + → returns ChatResponse (output, summary, conversationalStyle, processingTime) +``` + +### Key Files + +| File | Role | +|------|------| +| `index.py` | AWS Lambda entry point; parses event body, validates schema | +| `src/module.py` | Transforms muEd API request → invokes agent → builds ChatResponse | +| `src/agent/agent.py` | LangGraph stateful graph; manages message history and summarization | +| `src/agent/prompts.py` | System prompts for tutor behavior, summarization, style detection | +| `src/agent/llm_factory.py` | Factory classes for each LLM provider (OpenAI, Google, Azure, Ollama) | +| `src/agent/context.py` | Converts muEd question/submission context dicts to LLM prompt text | +| `tests/utils.py` | Shared test helpers: `assert_valid_chat_request`, `assert_valid_chat_response` | +| `tests/example_inputs/` | Real muEd payloads used for end-to-end tests | + +### Agent Logic (LangGraph) + +`BaseAgent` maintains a state graph with two nodes: +- **`call_llm`**: Invokes the LLM with system prompt + conversation summary + conversational style preference +- **`summarize_conversation`**: Triggered when message count exceeds ~11; summarizes history and also extracts the student's preferred conversational style + +Messages are trimmed after summarization to keep context window manageable. The `summary` and `conversationalStyle` fields persist across calls via the `ChatRequest` metadata. + +### muEd API Format + +`src/module.py` handles the muEd request format (https://mued.org/). The `context` field in `ChatRequest` contains nested educational data (question parts, student submissions, task info) and the `user` field contains user-specific information (e.g., user type, preferences, task progress) that gets parsed into a tutoring prompt via `src/agent/context.py`. + +### LLM Configuration + +LLM provider and model are set via environment variables (see `.env.example`). The `llm_factory.py` selects the provider at runtime. The Lambda function name/identity is set in `config.json`. + +The agent uses **two separate LLM instances** — `self.llm` for chat responses and `self.summarisation_llm` for conversation summarisation and style analysis. By default both use the same provider, but you can point them at different models (e.g. a cheaper model for summarisation) by changing the class in `agent.py`. + +## Deployment + +- Pushing to `dev` branch triggers the dev deployment GitHub Actions workflow +- Pushing to `main` triggers staging deployment, with manual approval required for production +- All environment variables (API keys, model names) are injected via GitHub Actions secrets/variables — do not hardcode them diff --git a/Dockerfile b/Dockerfile index 9150687..38276cc 100755 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ COPY src ./src COPY index.py . -COPY index_test.py . +COPY tests ./tests # Set the Lambda function handler CMD ["index.handler"] \ No newline at end of file diff --git a/README.md b/README.md index 692e6f3..6c7a2f6 100755 --- a/README.md +++ b/README.md @@ -43,11 +43,11 @@ In GitHub, choose Use this template > Create a new repository in the repository Choose the owner, and pick a name for the new repository. -> [!IMPORTANT] If you want to deploy the evaluation function to Lambda Feedback, make sure to choose the Lambda Feedback organization as the owner. +> [!IMPORTANT] If you want to deploy the chat function to Lambda Feedback, make sure to choose the `Lambda Feedback` organization as the owner. -Set the visibility to Public or Private. +Set the visibility to `Public` or `Private`. -> [!IMPORTANT] If you want to use GitHub deployment protection rules, make sure to set the visibility to Public. +> [!IMPORTANT] If you want to use GitHub deployment protection rules, make sure to set the visibility to `Public`. Click on Create repository. @@ -78,9 +78,11 @@ Also, don't forget to update or delete the Quickstart chapter from the `README.m ## Development -You can create your own invocation to your own agents hosted anywhere. Copy or update the `base_agent` from `src/agents/` and edit it to match your LLM agent requirements. Import the new invocation in the `module.py` file. +You can create your own invocation to your own agents hosted anywhere. Copy or update the `agent.py` from `src/agent/` and edit it to match your LLM agent requirements. Import the new invocation in the `module.py` file. -You agent can be based on an LLM hosted anywhere, you have available currently OpenAI, AzureOpenAI, and Ollama models but you can introduce your own API call in the `src/agents/llm_factory.py`. +Your agent can be based on an LLM hosted anywhere. OpenAI, Google AI, Azure OpenAI, and Ollama are available out of the box via `src/agent/llm_factory.py`, and you can add your own provider there too. + +The agent uses **two separate LLM instances** — `self.llm` for chat responses and `self.summarisation_llm` for conversation summarisation and style analysis. By default both use the same provider, but you can point them at different models (e.g. a cheaper or faster model for summarisation) by changing the class in `agent.py`. ### Prerequisites @@ -90,23 +92,41 @@ You agent can be based on an LLM hosted anywhere, you have available currently O ### Repository Structure ```bash -.github/workflows/ - dev.yml # deploys the DEV function to Lambda Feedback - main.yml # deploys the STAGING function to Lambda Feedback - test-report.yml # gathers Pytest Report of function tests - -docs/ # docs for devs and users - -src/module.py # chat_module function implementation -src/module_test.py # chat_module function tests -src/agents/ # find all agents developed for the chat functionality -src/agents/utils/test_prompts.py # allows testing of any LLM agent on a couple of example inputs containing Lambda Feedback Questions and synthetic student conversations +. +├── .github/workflows/ +│ ├── dev.yml # deploys the DEV function to Lambda Feedback +│ ├── main.yml # deploys the STAGING and PROD functions to Lambda Feedback +│ └── test-report.yml # gathers Pytest Report of function tests +├── docs/ # docs for devs and users +├── src/ +│ ├── agent/ +│ │ ├── agent.py # LangGraph stateful agent logic +│ │ ├── context.py # converts muEd context dicts to LLM prompt text +│ │ ├── llm_factory.py # factory classes for each LLM provider +│ │ └── prompts.py # system prompts defining the behaviour of the chatbot +│ └── module.py +└── tests/ # contains all tests for the chat function + ├── example_inputs/ # muEd example payloads for end-to-end tests + ├── manual_agent_requests.py # allows testing of the docker container through API requests + ├── manual_agent_run.py # allows testing of any LLM agent on a couple of example inputs + ├── utils.py # shared test helpers + ├── test_example_inputs.py # pytests for the example input files + ├── test_index.py # pytests + └── test_module.py # pytests ``` ## Testing the Chat Function -To test your function, you can either call the code directly through a python script. Or you can build the respective chat function docker container locally and call it through an API request. Below you can find details on those processes. +To test your function, you can run the unit tests, call the code directly through a python script, or build the respective chat function docker container locally and call it through an API request. Below you can find details on those processes. + +### Run Unit Tests + +You can run the unit tests using `pytest`. + +```bash +pytest +``` ### Run the Chat Script @@ -116,9 +136,9 @@ You can run the Python function itself. Make sure to have a main function in eit python src/module.py ``` -You can also use the `testbench_agents.py` script to test the agents with example inputs from Lambda Feedback questions and synthetic conversations. +You can also use the `manual_agent_run.py` script to test the agents with example inputs from Lambda Feedback questions and synthetic conversations. ```bash -python src/agents/utils/testbench_agents.py +python tests/manual_agent_run.py ``` ### Calling the Docker Image Locally @@ -150,13 +170,13 @@ This will start the chat function and expose it on port `8080` and it will be op ```bash curl --location 'http://localhost:8080/2015-03-31/functions/function/invocations' \ --header 'Content-Type: application/json' \ ---data '{"body":"{\"message\": \"hi\", \"params\": {\"conversation_id\": \"12345Test\", \"conversation_history\": [{\"type\": \"user\", +--data '{"body":"{\"conversationId\": \"12345Test\", \"messages\": [{\"role\": \"USER\", \"content\": \"hi\"}], \"user\": {\"type\": \"LEARNER\"}}"}' ``` #### Call Docker Container ##### A. Call Docker with Python Requests -In the `src/agents/utils` folder you can find the `requests_testscript.py` script that calls the POST URL of the running docker container. It reads any kind of input files with the expected schema. You can use this to test your curl calls of the chatbot. +In the `tests/` folder you can find the `manual_agent_requests.py` script that calls the POST URL of the running docker container. It reads any kind of input files with the expected schema. You can use this to test your curl calls of the chatbot. ##### B. Call Docker Container through API request @@ -169,22 +189,98 @@ http://localhost:8080/2015-03-31/functions/function/invocations Body (stringified within body for API request): ```JSON -{"body":"{\"message\": \"hi\", \"params\": {\"conversation_id\": \"12345Test\", \"conversation_history\": [{\"type\": \"user\", \"content\": \"hi\"}]}}"} +{"body":"{\"conversationId\": \"12345Test\", \"messages\": [{\"role\": \"USER\", \"content\": \"hi\"}], \"user\": {\"type\": \"LEARNER\"}}"} ``` -Body with optional Params: -```JSON +Body with optional fields: +```json { - "message":"hi", - "params":{ - "conversation_id":"12345Test", - "conversation_history":[{"type":"user","content":"hi"}], - "summary":" ", - "conversational_style":" ", - "question_response_details": "", - "include_test_data": true, - "agent_type": {agent_name} + "conversationId": "", + "messages": [ + { "role": "USER", "content": "" }, + { "role": "ASSISTANT", "content": "" }, + { "role": "USER", "content": "" } + ], + "user": { + "type": "LEARNER", + "preference": { + "conversationalStyle": "" + }, + "taskProgress": { + "timeSpentOnQuestion": "30 minutes", + "accessStatus": "a good amount of time spent on this question today.", + "markedDone": "This question is still being worked on.", + "currentPart": { + "position": 0, + "timeSpentOnPart": "10 minutes", + "markedDone": "This part is not marked done.", + "responseAreas": [ + { + "responseType": "EXPRESSION", + "totalSubmissions": 3, + "wrongSubmissions": 2, + "latestSubmission": { + "submission": "", + "feedback": "", + "answer": "" + } + } + ] + } + } + }, + "context": { + "summary": "", + "set": { + "title": "Fundamentals", + "number": 2, + "description": "" + }, + "question": { + "title": "Understanding Polymorphism", + "number": 3, + "guidance": "", + "content": "", + "estimatedTime": "15-25 minutes", + "parts": [ + { + "position": 0, + "content": "", + "answerContent": "", + "workedSolutionSections": [ + { "position": 0, "title": "Step 1", "content": "..." } + ], + "structuredTutorialSections": [ + { "position": 0, "title": "Hint", "content": "..." } + ], + "responseAreas": [ + { + "position": 0, + "responseType": "EXPRESSION", + "answer": "", + "preResponseText": "