From f04ed541b1cd502c08655d49442e2bbf8b1c84c1 Mon Sep 17 00:00:00 2001
From: "g. nicholas d'andrea" <nick@gnidan.org>
Date: Mon, 9 Mar 2026 16:22:44 -0400
Subject: [PATCH 1/2] Add calldata, returndata, code, and transient storage
 support to EVM codegen

The read/write instruction handlers in storage.ts previously only
supported storage and memory locations. This adds full support for:

- calldata reads via CALLDATALOAD (with shift+mask for partial reads)
- returndata reads via RETURNDATACOPY to scratch memory + MLOAD
- code reads via CODECOPY to scratch memory + MLOAD
- transient storage reads via TLOAD
- transient storage writes via TSTORE

Includes tests covering all new read/write paths.
---
 .../generation/instructions/storage.test.ts   | 206 ++++++++++
 .../evmgen/generation/instructions/storage.ts | 374 ++++++++++++------
 2 files changed, 467 insertions(+), 113 deletions(-)
 create mode 100644 packages/bugc/src/evmgen/generation/instructions/storage.test.ts

diff --git a/packages/bugc/src/evmgen/generation/instructions/storage.test.ts b/packages/bugc/src/evmgen/generation/instructions/storage.test.ts
new file mode 100644
index 000000000..7ccf72cfb
--- /dev/null
+++ b/packages/bugc/src/evmgen/generation/instructions/storage.test.ts
@@ -0,0 +1,206 @@
+import { describe, it, expect } from "vitest";
+
+import * as Ir from "#ir";
+import { Memory, Layout } from "#evmgen/analysis";
+
+import { generate } from "../function.js";
+
+/**
+ * Helper to generate bytecode for a function with given
+ * instructions and return the mnemonic sequence.
+ */
+function mnemonicsFor(
+  instructions: Ir.Instruction[],
+  allocations: Record<string, { offset: number; size: number }> = {},
+): string[] {
+  const func: Ir.Function = {
+    name: "test",
+    parameters: [],
+    entry: "entry",
+    blocks: new Map([
+      [
+        "entry",
+        {
+          id: "entry",
+          phis: [],
+          instructions,
+          terminator: { kind: "return", operationDebug: {} },
+          predecessors: new Set(),
+          debug: {},
+        } as Ir.Block,
+      ],
+    ]),
+  };
+
+  const memory: Memory.Function.Info = {
+    allocations,
+    nextStaticOffset: 0x80,
+  };
+
+  const layout: Layout.Function.Info = {
+    order: ["entry"],
+    offsets: new Map(),
+  };
+
+  const { instructions: evmInstructions } = generate(func, memory, layout);
+
+  return evmInstructions.map((i) => i.mnemonic);
+}
+
+describe("generateRead", () => {
+  describe("calldata reads", () => {
+    it("should use CALLDATALOAD for full 32-byte read", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "calldata",
+          offset: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 32n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.uint256,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("CALLDATALOAD");
+      expect(mnemonics).not.toContain("CALLDATACOPY");
+    });
+
+    it("should use CALLDATALOAD + shift/mask for partial read", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "calldata",
+          offset: {
+            kind: "const",
+            value: 4n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 20n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.address,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("CALLDATALOAD");
+      expect(mnemonics).toContain("SHR");
+      expect(mnemonics).toContain("AND");
+    });
+  });
+
+  describe("returndata reads", () => {
+    it("should use RETURNDATACOPY + MLOAD", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "returndata",
+          offset: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 32n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.uint256,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("RETURNDATACOPY");
+      expect(mnemonics).toContain("MLOAD");
+      // Should zero scratch memory first
+      expect(mnemonics).toContain("MSTORE");
+    });
+  });
+
+  describe("code reads", () => {
+    it("should use CODECOPY + MLOAD", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "code",
+          offset: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 32n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.uint256,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("CODECOPY");
+      expect(mnemonics).toContain("MLOAD");
+    });
+  });
+
+  describe("transient storage reads", () => {
+    it("should use TLOAD", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "transient",
+          slot: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.uint256,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("TLOAD");
+    });
+  });
+});
+
+describe("generateWrite", () => {
+  describe("transient storage writes", () => {
+    it("should use TSTORE", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "write",
+          location: "transient",
+          slot: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          value: {
+            kind: "const",
+            value: 42n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("TSTORE");
+    });
+  });
+});
diff --git a/packages/bugc/src/evmgen/generation/instructions/storage.ts b/packages/bugc/src/evmgen/generation/instructions/storage.ts
index 49ad7b15c..a7dcb3914 100644
--- a/packages/bugc/src/evmgen/generation/instructions/storage.ts
+++ b/packages/bugc/src/evmgen/generation/instructions/storage.ts
@@ -19,8 +19,17 @@ const {
   NOT,
   SUB,
   DUP1,
+  CALLDATALOAD,
+  RETURNDATACOPY,
+  CODECOPY,
+  TLOAD,
+  TSTORE,
 } = operations;
 
+// Scratch memory address for copy-based reads (returndata, code).
+// Uses the "zero slot" at 0x60, which is safe for temporary use.
+const SCRATCH_OFFSET = 0x60n;
+
 /**
  * Generate code for the new unified read instruction
  */
@@ -31,48 +40,16 @@ export function generateRead<S extends Stack>(
 
   // Handle storage reads
   if (inst.location === "storage" && inst.slot) {
-    const offset = inst.offset?.kind === "const" ? inst.offset.value : 0n;
-    const length = inst.length?.kind === "const" ? inst.length.value : 32n;
-
-    if (offset === 0n && length === 32n) {
-      // Full slot read - simple SLOAD
-      return pipe<S>()
-        .then(loadValue(inst.slot, { debug }), { as: "key" })
-        .then(SLOAD({ debug }), { as: "value" })
-        .then(storeValueIfNeeded(inst.dest, { debug }))
-        .done();
-    } else {
-      // Partial read - need to extract specific bytes
-      return (
-        pipe<S>()
-          .then(loadValue(inst.slot, { debug }), { as: "key" })
-          .then(SLOAD({ debug }), { as: "value" })
-
-          // Shift right to move desired bytes to the right (low) end
-          // We shift by (32 - offset - length) * 8 bits
-          .then(
-            PUSHn((32n - BigInt(offset) - BigInt(length)) * 8n, { debug }),
-            {
-              as: "shift",
-            },
-          )
-          .then(SHR({ debug }), { as: "shiftedValue" })
-          .then(PUSHn(1n, { debug }), { as: "b" })
-
-          // Mask to keep only the desired length
-          // mask = (1 << (length * 8)) - 1
-          .then(PUSHn(1n, { debug }), { as: "value" })
-          .then(PUSHn(BigInt(length) * 8n, { debug }), { as: "shift" })
-          .then(SHL({ debug }), { as: "a" }) // (1 << (length * 8))
-          .then(SUB({ debug }), { as: "mask" }) // ((1 << (length * 8)) - 1)
-          .then(rebrand<"mask", "a", "shiftedValue", "b">({ 1: "a", 2: "b" }))
-
-          // Apply mask: shiftedValue & mask
-          .then(AND({ debug }), { as: "value" })
-          .then(storeValueIfNeeded(inst.dest, { debug }))
-          .done()
-      );
-    }
+    return generateStorageRead(inst, debug);
+  }
+
+  // Handle transient storage reads
+  if (inst.location === "transient" && inst.slot) {
+    return pipe<S>()
+      .then(loadValue(inst.slot, { debug }), { as: "key" })
+      .then(TLOAD({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done();
   }
 
   // Handle memory reads
@@ -84,11 +61,161 @@ export function generateRead<S extends Stack>(
       .done();
   }
 
-  // TODO: Handle other locations (calldata, returndata)
-  // For unsupported locations, push a dummy value to maintain stack typing
+  // Handle calldata reads
+  if (inst.location === "calldata" && inst.offset) {
+    return generateCalldataRead(inst, debug);
+  }
+
+  // Handle returndata reads (copy to scratch memory, then MLOAD)
+  if (inst.location === "returndata" && inst.offset) {
+    return generateCopyBasedRead(inst, debug, RETURNDATACOPY);
+  }
+
+  // Handle code reads (copy to scratch memory, then MLOAD)
+  if (inst.location === "code" && inst.offset) {
+    return generateCopyBasedRead(inst, debug, CODECOPY);
+  }
+
+  // Unsupported location — push zero to maintain stack typing
   return pipe<S>().then(PUSHn(0n, { debug }), { as: "value" }).done();
 }
 
+/**
+ * Storage read: SLOAD with optional partial-slot extraction
+ */
+function generateStorageRead<S extends Stack>(
+  inst: Ir.Instruction.Read,
+  debug: Ir.Instruction.Debug,
+): Transition<S, readonly ["value", ...S]> {
+  const offset = inst.offset?.kind === "const" ? inst.offset.value : 0n;
+  const length = inst.length?.kind === "const" ? inst.length.value : 32n;
+
+  if (offset === 0n && length === 32n) {
+    // Full slot read - simple SLOAD
+    return pipe<S>()
+      .then(loadValue(inst.slot!, { debug }), { as: "key" })
+      .then(SLOAD({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done();
+  }
+
+  // Partial read - extract specific bytes via shift+mask
+  return (
+    pipe<S>()
+      .then(loadValue(inst.slot!, { debug }), { as: "key" })
+      .then(SLOAD({ debug }), { as: "value" })
+
+      // Shift right by (32 - offset - length) * 8 bits
+      .then(PUSHn((32n - BigInt(offset) - BigInt(length)) * 8n, { debug }), {
+        as: "shift",
+      })
+      .then(SHR({ debug }), { as: "shiftedValue" })
+      .then(PUSHn(1n, { debug }), { as: "b" })
+
+      // mask = (1 << (length * 8)) - 1
+      .then(PUSHn(1n, { debug }), { as: "value" })
+      .then(PUSHn(BigInt(length) * 8n, { debug }), { as: "shift" })
+      .then(SHL({ debug }), { as: "a" })
+      .then(SUB({ debug }), { as: "mask" })
+      .then(
+        rebrand<"mask", "a", "shiftedValue", "b">({
+          1: "a",
+          2: "b",
+        }),
+      )
+
+      // shiftedValue & mask
+      .then(AND({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done()
+  );
+}
+
+/**
+ * Calldata read: CALLDATALOAD reads 32 bytes at a given offset.
+ * For partial reads, shift+mask to extract the desired bytes.
+ */
+function generateCalldataRead<S extends Stack>(
+  inst: Ir.Instruction.Read,
+  debug: Ir.Instruction.Debug,
+): Transition<S, readonly ["value", ...S]> {
+  const length = inst.length?.kind === "const" ? inst.length.value : 32n;
+
+  if (length === 32n) {
+    // Full 32-byte read
+    return pipe<S>()
+      .then(loadValue(inst.offset!, { debug }), { as: "i" })
+      .then(CALLDATALOAD({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done();
+  }
+
+  // Partial read: CALLDATALOAD returns 32 bytes left-aligned,
+  // so shift right by (32 - length) * 8 bits to right-align,
+  // then mask.
+  return (
+    pipe<S>()
+      .then(loadValue(inst.offset!, { debug }), { as: "i" })
+      .then(CALLDATALOAD({ debug }), { as: "value" })
+      .then(PUSHn((32n - BigInt(length)) * 8n, { debug }), { as: "shift" })
+      .then(SHR({ debug }), { as: "shiftedValue" })
+      .then(PUSHn(1n, { debug }), { as: "b" })
+
+      // mask = (1 << (length * 8)) - 1
+      .then(PUSHn(1n, { debug }), { as: "value" })
+      .then(PUSHn(BigInt(length) * 8n, { debug }), { as: "shift" })
+      .then(SHL({ debug }), { as: "a" })
+      .then(SUB({ debug }), { as: "mask" })
+      .then(
+        rebrand<"mask", "a", "shiftedValue", "b">({
+          1: "a",
+          2: "b",
+        }),
+      )
+
+      .then(AND({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done()
+  );
+}
+
+/**
+ * Copy-based read for returndata and code locations.
+ * Uses RETURNDATACOPY or CODECOPY to copy data to scratch
+ * memory at 0x60, then MLOAD to read.
+ *
+ * Stack effect: copies `length` bytes from `offset` in the
+ * source to memory[0x60], then loads the 32-byte word.
+ */
+function generateCopyBasedRead<S extends Stack>(
+  inst: Ir.Instruction.Read,
+  debug: Ir.Instruction.Debug,
+  copyOp: typeof RETURNDATACOPY | typeof CODECOPY,
+): Transition<S, readonly ["value", ...S]> {
+  const length = inst.length?.kind === "const" ? inst.length.value : 32n;
+
+  // Clear scratch memory first so partial copies are zero-padded
+  return (
+    pipe<S>()
+      // Zero out scratch: MSTORE(0x60, 0)
+      .then(PUSHn(0n, { debug }), { as: "value" })
+      .then(PUSHn(SCRATCH_OFFSET, { debug }), { as: "offset" })
+      .then(MSTORE({ debug }))
+
+      // COPY(destOffset=0x60, offset, size=length)
+      .then(PUSHn(BigInt(length), { debug }), { as: "size" })
+      .then(loadValue(inst.offset!, { debug }), { as: "offset" })
+      .then(PUSHn(SCRATCH_OFFSET, { debug }), { as: "destOffset" })
+      .then(copyOp({ debug }))
+
+      // MLOAD from scratch
+      .then(PUSHn(SCRATCH_OFFSET, { debug }), { as: "offset" })
+      .then(MLOAD({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done()
+  );
+}
+
 /**
  * Generate code for the new unified write instruction
  */
@@ -99,74 +226,16 @@ export function generateWrite<S extends Stack>(
 
   // Handle storage writes
   if (inst.location === "storage" && inst.slot && inst.value) {
-    // Check if this is a partial write (offset != 0 or length != 32)
-    const offset = inst.offset?.kind === "const" ? inst.offset.value : 0n;
-    const length = inst.length?.kind === "const" ? inst.length.value : 32n;
-
-    if (offset === 0n && length === 32n) {
-      // Full slot write - simple SSTORE
-      return pipe<S>()
-        .then(loadValue(inst.value, { debug }), { as: "value" })
-        .then(loadValue(inst.slot, { debug }), { as: "key" })
-        .then(SSTORE({ debug }))
-        .done();
-    } else {
-      // Partial write - need to do read-modify-write with masking
-      return (
-        pipe<S>()
-          // Load the slot key and duplicate for later SSTORE
-          .then(loadValue(inst.slot, { debug }), { as: "key" })
-          .then(DUP1({ debug }))
-
-          // Load current value from storage
-          .then(SLOAD({ debug }), { as: "current" })
-
-          // Create mask to clear the bits we're updating
-          // First create: (1 << (length * 8)) - 1
-          .then(PUSHn(1n, { debug }), { as: "b" })
-          .then(PUSHn(1n, { debug }), { as: "value" })
-          .then(PUSHn(BigInt(length) * 8n, { debug }), { as: "shift" })
-          .then(SHL({ debug }), { as: "a" }) // (1 << (length * 8))
-          .then(SUB({ debug }), { as: "lengthMask" }) // ((1 << (length * 8)) - 1)
-
-          // Then shift it left by offset: ((1 << (length * 8)) - 1) << (offset * 8)
-          .then(PUSHn(BigInt(offset) * 8n, { debug }), { as: "bitOffset" })
-          .then(
-            rebrand<"bitOffset", "shift", "lengthMask", "value">({
-              1: "shift",
-              2: "value",
-            }),
-          )
-          .then(SHL({ debug }), { as: "a" })
-
-          // Invert to get clear mask: ~(((1 << (length * 8)) - 1) << (offset * 8))
-          .then(NOT({ debug }), { as: "clearMask" })
-          .then(rebrand<"clearMask", "a", "current", "b">({ 1: "a", 2: "b" }))
-
-          // Clear the bits in the current value: current & clearMask
-          .then(AND({ debug }), { as: "clearedCurrent" })
-
-          // Prepare the new value (shift to correct position)
-          .then(loadValue(inst.value, { debug }), { as: "value" })
-          .then(PUSHn(BigInt(offset) * 8n, { debug }), { as: "shift" })
-          .then(SHL({ debug }), { as: "shiftedValue" })
-
-          .then(
-            rebrand<"shiftedValue", "a", "clearedCurrent", "b">({
-              1: "a",
-              2: "b",
-            }),
-          )
-
-          // Combine: clearedCurrent | shiftedValue
-          .then(OR({ debug }), { as: "value" })
-          .then(SWAP1({ debug }))
-
-          // Store the result (key is already on stack from DUP1)
-          .then(SSTORE({ debug }))
-          .done()
-      );
-    }
+    return generateStorageWrite(inst, debug);
+  }
+
+  // Handle transient storage writes
+  if (inst.location === "transient" && inst.slot && inst.value) {
+    return pipe<S>()
+      .then(loadValue(inst.value, { debug }), { as: "value" })
+      .then(loadValue(inst.slot, { debug }), { as: "key" })
+      .then(TSTORE({ debug }))
+      .done();
   }
 
   // Handle memory writes
@@ -178,6 +247,85 @@ export function generateWrite<S extends Stack>(
       .done();
   }
 
-  // TODO: Handle other locations
+  // Other locations (local, etc.) - no-op
   return (state) => state;
 }
+
+/**
+ * Storage write: SSTORE with optional partial-slot masking
+ */
+function generateStorageWrite<S extends Stack>(
+  inst: Ir.Instruction.Write,
+  debug: Ir.Instruction.Debug,
+): Transition<S, S> {
+  const offset = inst.offset?.kind === "const" ? inst.offset.value : 0n;
+  const length = inst.length?.kind === "const" ? inst.length.value : 32n;
+
+  if (offset === 0n && length === 32n) {
+    // Full slot write - simple SSTORE
+    return pipe<S>()
+      .then(loadValue(inst.value!, { debug }), { as: "value" })
+      .then(loadValue(inst.slot!, { debug }), { as: "key" })
+      .then(SSTORE({ debug }))
+      .done();
+  }
+
+  // Partial write - read-modify-write with masking
+  return (
+    pipe<S>()
+      .then(loadValue(inst.slot!, { debug }), { as: "key" })
+      .then(DUP1({ debug }))
+
+      .then(SLOAD({ debug }), { as: "current" })
+
+      // (1 << (length * 8)) - 1
+      .then(PUSHn(1n, { debug }), { as: "b" })
+      .then(PUSHn(1n, { debug }), { as: "value" })
+      .then(PUSHn(BigInt(length) * 8n, { debug }), { as: "shift" })
+      .then(SHL({ debug }), { as: "a" })
+      .then(SUB({ debug }), { as: "lengthMask" })
+
+      // Shift mask to offset position
+      .then(PUSHn(BigInt(offset) * 8n, { debug }), {
+        as: "bitOffset",
+      })
+      .then(
+        rebrand<"bitOffset", "shift", "lengthMask", "value">({
+          1: "shift",
+          2: "value",
+        }),
+      )
+      .then(SHL({ debug }), { as: "a" })
+
+      // Invert for clear mask
+      .then(NOT({ debug }), { as: "clearMask" })
+      .then(
+        rebrand<"clearMask", "a", "current", "b">({
+          1: "a",
+          2: "b",
+        }),
+      )
+
+      // current & clearMask
+      .then(AND({ debug }), { as: "clearedCurrent" })
+
+      // Prepare new value at offset
+      .then(loadValue(inst.value!, { debug }), { as: "value" })
+      .then(PUSHn(BigInt(offset) * 8n, { debug }), { as: "shift" })
+      .then(SHL({ debug }), { as: "shiftedValue" })
+
+      .then(
+        rebrand<"shiftedValue", "a", "clearedCurrent", "b">({
+          1: "a",
+          2: "b",
+        }),
+      )
+
+      // clearedCurrent | shiftedValue
+      .then(OR({ debug }), { as: "value" })
+      .then(SWAP1({ debug }))
+
+      .then(SSTORE({ debug }))
+      .done()
+  );
+}

From 60925f944c8e180f5dbd7ef0abe0418100fbea0c Mon Sep 17 00:00:00 2001
From: "g. nicholas d'andrea" <nick@gnidan.org>
Date: Mon, 9 Mar 2026 16:34:01 -0400
Subject: [PATCH 2/2] Fix partial reads for returndata, code, and transient
 storage

Copy-based reads (returndata, code) returned left-aligned values
from MLOAD without right-aligning for partial reads (length < 32).
Add shift+mask path matching calldata's partial read handling.

Transient storage reads used TLOAD without offset/length handling.
Add the same partial-slot extraction as regular storage reads.
---
 .../generation/instructions/storage.test.ts   |  95 ++++++++++++-
 .../evmgen/generation/instructions/storage.ts | 125 ++++++++++++++++--
 2 files changed, 207 insertions(+), 13 deletions(-)

diff --git a/packages/bugc/src/evmgen/generation/instructions/storage.test.ts b/packages/bugc/src/evmgen/generation/instructions/storage.test.ts
index 7ccf72cfb..758f9cd76 100644
--- a/packages/bugc/src/evmgen/generation/instructions/storage.test.ts
+++ b/packages/bugc/src/evmgen/generation/instructions/storage.test.ts
@@ -102,7 +102,7 @@ describe("generateRead", () => {
   });
 
   describe("returndata reads", () => {
-    it("should use RETURNDATACOPY + MLOAD", () => {
+    it("should use RETURNDATACOPY + MLOAD for full read", () => {
       const mnemonics = mnemonicsFor([
         {
           kind: "read",
@@ -127,11 +127,40 @@ describe("generateRead", () => {
       expect(mnemonics).toContain("MLOAD");
       // Should zero scratch memory first
       expect(mnemonics).toContain("MSTORE");
+      // Full read — no shift/mask needed
+      expect(mnemonics).not.toContain("SHR");
+    });
+
+    it("should shift+mask for partial returndata read", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "returndata",
+          offset: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 20n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.address,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("RETURNDATACOPY");
+      expect(mnemonics).toContain("MLOAD");
+      expect(mnemonics).toContain("SHR");
+      expect(mnemonics).toContain("AND");
     });
   });
 
   describe("code reads", () => {
-    it("should use CODECOPY + MLOAD", () => {
+    it("should use CODECOPY + MLOAD for full read", () => {
       const mnemonics = mnemonicsFor([
         {
           kind: "read",
@@ -154,11 +183,59 @@ describe("generateRead", () => {
 
       expect(mnemonics).toContain("CODECOPY");
       expect(mnemonics).toContain("MLOAD");
+      expect(mnemonics).not.toContain("SHR");
+    });
+
+    it("should shift+mask for partial code read", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "code",
+          offset: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 4n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.uint256,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("CODECOPY");
+      expect(mnemonics).toContain("MLOAD");
+      expect(mnemonics).toContain("SHR");
+      expect(mnemonics).toContain("AND");
     });
   });
 
   describe("transient storage reads", () => {
-    it("should use TLOAD", () => {
+    it("should use TLOAD for full read", () => {
+      const mnemonics = mnemonicsFor([
+        {
+          kind: "read",
+          location: "transient",
+          slot: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          type: Ir.Type.Scalar.uint256,
+          dest: "%1",
+          operationDebug: {},
+        },
+      ]);
+
+      expect(mnemonics).toContain("TLOAD");
+      expect(mnemonics).not.toContain("SHR");
+    });
+
+    it("should shift+mask for partial transient read", () => {
       const mnemonics = mnemonicsFor([
         {
           kind: "read",
@@ -168,6 +245,16 @@ describe("generateRead", () => {
             value: 0n,
             type: Ir.Type.Scalar.uint256,
           },
+          offset: {
+            kind: "const",
+            value: 0n,
+            type: Ir.Type.Scalar.uint256,
+          },
+          length: {
+            kind: "const",
+            value: 1n,
+            type: Ir.Type.Scalar.uint256,
+          },
           type: Ir.Type.Scalar.uint256,
           dest: "%1",
           operationDebug: {},
@@ -175,6 +262,8 @@ describe("generateRead", () => {
       ]);
 
       expect(mnemonics).toContain("TLOAD");
+      expect(mnemonics).toContain("SHR");
+      expect(mnemonics).toContain("AND");
     });
   });
 });
diff --git a/packages/bugc/src/evmgen/generation/instructions/storage.ts b/packages/bugc/src/evmgen/generation/instructions/storage.ts
index a7dcb3914..c49ce52a6 100644
--- a/packages/bugc/src/evmgen/generation/instructions/storage.ts
+++ b/packages/bugc/src/evmgen/generation/instructions/storage.ts
@@ -45,11 +45,7 @@ export function generateRead<S extends Stack>(
 
   // Handle transient storage reads
   if (inst.location === "transient" && inst.slot) {
-    return pipe<S>()
-      .then(loadValue(inst.slot, { debug }), { as: "key" })
-      .then(TLOAD({ debug }), { as: "value" })
-      .then(storeValueIfNeeded(inst.dest, { debug }))
-      .done();
+    return generateTransientRead(inst, debug);
   }
 
   // Handle memory reads
@@ -131,6 +127,57 @@ function generateStorageRead<S extends Stack>(
   );
 }
 
+/**
+ * Transient storage read: TLOAD with optional partial extraction.
+ * Same shift+mask logic as regular storage reads.
+ */
+function generateTransientRead<S extends Stack>(
+  inst: Ir.Instruction.Read,
+  debug: Ir.Instruction.Debug,
+): Transition<S, readonly ["value", ...S]> {
+  const offset = inst.offset?.kind === "const" ? inst.offset.value : 0n;
+  const length = inst.length?.kind === "const" ? inst.length.value : 32n;
+
+  if (offset === 0n && length === 32n) {
+    return pipe<S>()
+      .then(loadValue(inst.slot!, { debug }), { as: "key" })
+      .then(TLOAD({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done();
+  }
+
+  // Partial read - same shift+mask as storage
+  return (
+    pipe<S>()
+      .then(loadValue(inst.slot!, { debug }), { as: "key" })
+      .then(TLOAD({ debug }), { as: "value" })
+
+      .then(PUSHn((32n - BigInt(offset) - BigInt(length)) * 8n, { debug }), {
+        as: "shift",
+      })
+      .then(SHR({ debug }), { as: "shiftedValue" })
+      .then(PUSHn(1n, { debug }), { as: "b" })
+
+      // mask = (1 << (length * 8)) - 1
+      .then(PUSHn(1n, { debug }), { as: "value" })
+      .then(PUSHn(BigInt(length) * 8n, { debug }), {
+        as: "shift",
+      })
+      .then(SHL({ debug }), { as: "a" })
+      .then(SUB({ debug }), { as: "mask" })
+      .then(
+        rebrand<"mask", "a", "shiftedValue", "b">({
+          1: "a",
+          2: "b",
+        }),
+      )
+
+      .then(AND({ debug }), { as: "value" })
+      .then(storeValueIfNeeded(inst.dest, { debug }))
+      .done()
+  );
+}
+
 /**
  * Calldata read: CALLDATALOAD reads 32 bytes at a given offset.
  * For partial reads, shift+mask to extract the desired bytes.
@@ -194,7 +241,38 @@ function generateCopyBasedRead<S extends Stack>(
 ): Transition<S, readonly ["value", ...S]> {
   const length = inst.length?.kind === "const" ? inst.length.value : 32n;
 
-  // Clear scratch memory first so partial copies are zero-padded
+  if (length === 32n) {
+    // Full 32-byte read — copy and load directly
+    return (
+      pipe<S>()
+        // Zero out scratch: MSTORE(0x60, 0)
+        .then(PUSHn(0n, { debug }), { as: "value" })
+        .then(PUSHn(SCRATCH_OFFSET, { debug }), { as: "offset" })
+        .then(MSTORE({ debug }))
+
+        // COPY(destOffset=0x60, offset, size=32)
+        .then(PUSHn(32n, { debug }), { as: "size" })
+        .then(loadValue(inst.offset!, { debug }), {
+          as: "offset",
+        })
+        .then(PUSHn(SCRATCH_OFFSET, { debug }), {
+          as: "destOffset",
+        })
+        .then(copyOp({ debug }))
+
+        // MLOAD from scratch
+        .then(PUSHn(SCRATCH_OFFSET, { debug }), {
+          as: "offset",
+        })
+        .then(MLOAD({ debug }), { as: "value" })
+        .then(storeValueIfNeeded(inst.dest, { debug }))
+        .done()
+    );
+  }
+
+  // Partial read: copy `length` bytes to scratch, MLOAD
+  // returns left-aligned data, shift right to right-align,
+  // then mask.
   return (
     pipe<S>()
       // Zero out scratch: MSTORE(0x60, 0)
@@ -204,13 +282,40 @@ function generateCopyBasedRead<S extends Stack>(
 
       // COPY(destOffset=0x60, offset, size=length)
       .then(PUSHn(BigInt(length), { debug }), { as: "size" })
-      .then(loadValue(inst.offset!, { debug }), { as: "offset" })
-      .then(PUSHn(SCRATCH_OFFSET, { debug }), { as: "destOffset" })
+      .then(loadValue(inst.offset!, { debug }), {
+        as: "offset",
+      })
+      .then(PUSHn(SCRATCH_OFFSET, { debug }), {
+        as: "destOffset",
+      })
       .then(copyOp({ debug }))
 
-      // MLOAD from scratch
-      .then(PUSHn(SCRATCH_OFFSET, { debug }), { as: "offset" })
+      // MLOAD from scratch — value is left-aligned
+      .then(PUSHn(SCRATCH_OFFSET, { debug }), {
+        as: "offset",
+      })
       .then(MLOAD({ debug }), { as: "value" })
+
+      // Shift right to right-align
+      .then(PUSHn((32n - BigInt(length)) * 8n, { debug }), { as: "shift" })
+      .then(SHR({ debug }), { as: "shiftedValue" })
+      .then(PUSHn(1n, { debug }), { as: "b" })
+
+      // mask = (1 << (length * 8)) - 1
+      .then(PUSHn(1n, { debug }), { as: "value" })
+      .then(PUSHn(BigInt(length) * 8n, { debug }), {
+        as: "shift",
+      })
+      .then(SHL({ debug }), { as: "a" })
+      .then(SUB({ debug }), { as: "mask" })
+      .then(
+        rebrand<"mask", "a", "shiftedValue", "b">({
+          1: "a",
+          2: "b",
+        }),
+      )
+
+      .then(AND({ debug }), { as: "value" })
       .then(storeValueIfNeeded(inst.dest, { debug }))
       .done()
   );