Code

/**
 * mcp/skills/diskScan.ts — disk_scan skill
 *
 * Scans a directory and returns the size of every immediate child entry,
 * sorted largest first.  Helps Claude answer "what is using my disk space?"
 *
 * Platform strategy
 * -----------------
 * darwin  `du -sk <dir>/*` — fast, OS-native, gives recursive dir sizes
 * win32   PowerShell via -EncodedCommand — recursive Measure-Object per child
 *
 * Smoke test
 *   npx tsx -r dotenv/config mcp/skills/diskScan.ts [/optional/path]
 */
 
import * as fs       from "fs/promises";
import * as os       from "os";
import * as nodePath from "path";
import { z }         from "zod";
 
import { loggedExec } from "./_shared/platform";
 
// -- Meta ---------------------------------------------------------------------
 
export const meta = {
  name: "disk_scan",
  description:
    "Scans a directory and returns the size of each immediate child entry " +
    "(files and sub-folders), sorted largest first. " +
    "Use when the user wants to find what is consuming disk space.",
  riskLevel:       "low",
  destructive:     false,
  requiresConsent: false,
  supportsDryRun:  false,
  affectedScope:   ["user"],
  auditRequired:   false,
  schema: {
    path: z
      .string()
      .optional()
      .describe(
        "Absolute path of the directory to scan. " +
        "Defaults to the user home directory.",
      ),
  },
} as const;
 
// -- Shared helpers -----------------------------------------------------------
 
export function formatBytes(bytes: number): string {
  if (bytes === 0) return "0 B";
  const units = ["B", "KB", "MB", "GB", "TB"];
  const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
  return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
}
 
interface Entry {
  name:      string;
  path:      string;
  size:      number;
  sizeHuman: string;
  type:      "file" | "directory";
}
 
// -- PowerShell helper --------------------------------------------------------
 
export async function runPS(script: string, tag = "ps"): Promise<string> {
  // -EncodedCommand accepts Base64 UTF-16LE — avoids all shell quoting issues.
  const encoded = Buffer.from(script, "utf16le").toString("base64");
  const { stdout } = await loggedExec(
    `powershell.exe -NoProfile -NonInteractive -EncodedCommand ${encoded}`,
    { tag: `disk_scan:${tag}`, maxBuffer: 20 * 1024 * 1024, timeoutMs: 30_000 },
  );
  return stdout.trim();
}
 
// -- darwin implementation ----------------------------------------------------
 
/** Fallback: stat immediate children (used when du output is empty). */
async function statChildren(scanPath: string): Promise<Entry[]> {
  const dirents = await fs.readdir(scanPath, { withFileTypes: true });
  const settled = await Promise.allSettled(
    dirents.map(async (e) => {
      const full = nodePath.join(scanPath, e.name);
      const stat = await fs.stat(full);
      return {
        name:      e.name,
        path:      full,
        size:      stat.size,
        sizeHuman: formatBytes(stat.size),
        type:      (e.isDirectory() ? "directory" : "file") as Entry["type"],
      };
    }),
  );
  return settled
    .filter((r): r is PromiseFulfilledResult<Entry> => r.status === "fulfilled")
    .map((r) => r.value)
    .sort((a, b) => b.size - a.size);
}
 
async function scanDarwin(scanPath: string): Promise<Entry[]> {
  // du exits non-zero when some children are permission-denied — stdout still useful.
  // We do NOT redirect stderr to /dev/null any more: loggedExec captures it and
  // detects TCC patterns ("Operation not permitted", EPERM, etc.) so partial
  // results from missing Full Disk Access become visible in idemeum-agent.log.
  let stdout = "";
  try {
    // Security: use single-quoted path to prevent shell injection.
    // Single-quoted strings cannot contain command substitution ($(), ``)
    // or variable expansion. Escape any literal single quotes by ending
    // the string, inserting a backslash-quoted ', then restarting.
    const safePath = scanPath.replace(/'/g, `'\\''`);
    ({ stdout } = await loggedExec(
      `du -sk '${safePath}'/*`,
      { tag: "disk_scan:du", maxBuffer: 20 * 1024 * 1024, timeoutMs: 30_000 },
    ));
  } catch (err) {
    stdout = (err as { stdout?: string }).stdout ?? "";
  }
 
  if (!stdout.trim()) return statChildren(scanPath);
 
  return stdout
    .trim()
    .split("\n")
    .filter(Boolean)
    .map((line) => {
      const tab  = line.indexOf("\t");
      const kb   = parseInt(line.slice(0, tab), 10);
      const full = line.slice(tab + 1).trim();
      const size = kb * 1024; // du -k reports 1024-byte blocks
      return {
        name:      nodePath.basename(full),
        path:      full,
        size,
        sizeHuman: formatBytes(size),
        type:      "directory" as Entry["type"],
      };
    })
    .sort((a, b) => b.size - a.size);
}
 
// -- win32 implementation -----------------------------------------------------
 
async function scanWin32(scanPath: string): Promise<Entry[]> {
  const ps = `
$ErrorActionPreference = 'SilentlyContinue'
$items = Get-ChildItem -LiteralPath '${scanPath.replace(/'/g, "''")}'
$out = foreach ($item in $items) {
  if ($item.PSIsContainer) {
    $bytes = (Get-ChildItem -LiteralPath $item.FullName -Recurse -File |
              Measure-Object -Property Length -Sum).Sum
    if ($null -eq $bytes) { $bytes = 0 }
  } else {
    $bytes = $item.Length
  }
  [PSCustomObject]@{
    name      = $item.Name
    path      = $item.FullName
    size      = [long]$bytes
    sizeHuman = if ($bytes -ge 1GB)     { '{0:N1} GB' -f ($bytes / 1GB)   }
                elseif ($bytes -ge 1MB) { '{0:N1} MB' -f ($bytes / 1MB)   }
                elseif ($bytes -ge 1KB) { '{0:N1} KB' -f ($bytes / 1KB)   }
                else                    { "$bytes B" }
    type      = if ($item.PSIsContainer) { 'directory' } else { 'file' }
  }
}
$out | Sort-Object size -Descending | ConvertTo-Json -Depth 2 -Compress
`.trim();
 
  const raw = await runPS(ps);
  if (!raw) return [];
  const parsed = JSON.parse(raw) as Entry | Entry[];
  return Array.isArray(parsed) ? parsed : [parsed];
}
 
// -- Exported run function ----------------------------------------------------
 
export async function run({ path: inputPath = os.homedir() }: { path?: string }) {
  const scanPath = nodePath.resolve(inputPath);
 
  // Security: restrict scanning to within the user home directory.
  // Prevents Claude from being directed to scan /etc, /var, or other
  // system paths that could leak sensitive file names to the LLM context.
  const home = os.homedir();
  const rel  = nodePath.relative(home, scanPath);
  if (rel.startsWith("..") || nodePath.isAbsolute(rel)) {
    throw new Error(
      `[disk_scan] Path must be within home directory (${home}): ${scanPath}`,
    );
  }
 
  try {
    await fs.access(scanPath);
  } catch {
    throw new Error(`[disk_scan] Path not accessible: ${scanPath}`);
  }
 
  const platform = os.platform();
  const entries  = platform === "win32"
    ? await scanWin32(scanPath)
    : await scanDarwin(scanPath);
 
  // ── Partial-result detection ────────────────────────────────────────────────
  // Compare the entry count we got back from `du`/PowerShell against what
  // fs.readdir reports for the same directory. A significant shortfall is
  // almost always a TCC denial — without Full Disk Access, du can list a
  // path it cannot recurse into, so children disappear silently. Surface
  // this so the user knows the scan is incomplete instead of trusting
  // partial sizes for cleanup decisions.
  let warning: string | undefined;
  try {
    const expected = (await fs.readdir(scanPath)).filter((n) => n !== ".DS_Store");
    if (expected.length > 0) {
      const skipped = Math.max(0, expected.length - entries.length);
      const ratio   = skipped / expected.length;
      if (ratio > 0.2) {
        warning =
          `Scan results are incomplete: ${skipped} of ${expected.length} children ` +
          `could not be read (likely missing Full Disk Access). ` +
          `Open System Settings → Privacy & Security → Full Disk Access, ` +
          `enable AI Support Agent, then quit and relaunch.`;
      }
    }
  } catch {
    // readdir itself failed — main scan likely already failed too; let the
    // empty entries result speak for itself.
  }
 
  return {
    scannedPath: scanPath,
    platform,
    entryCount:  entries.length,
    entries,
    ...(warning ? { warning } : {}),
  };
}
 
// -- CLI smoke test -----------------------------------------------------------
 
if (require.main === module) {
  run({ path: process.argv[2] })
    .then((r) => {
      console.log(`\nScanned: ${r.scannedPath}  (${r.entryCount} entries)\n`);
      r.entries.slice(0, 10).forEach((e) =>
        console.log(
          `  ${e.sizeHuman.padStart(10)}  ${e.type === "directory" ? "[DIR]" : "[FILE]"}  ${e.name}`,
        ),
      );
    })
    .catch((err: Error) => { console.error(err.message); process.exit(1); });
}