shinpr
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎server.json‎
Lines changed: 9 additions & 2 deletions b/‎server.json‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎src/__tests__/cli/cli-error-shim.ts‎
Lines changed: 27 additions & 0 deletions b/‎src/__tests__/cli/cli-error-shim.ts‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎src/__tests__/cli/common.test.ts‎
Lines changed: 82 additions & 1 deletion b/‎src/__tests__/cli/common.test.ts‎
Lines changed: 82 additions & 1 deletion
diff --git a/‎src/__tests__/cli/delete.test.ts‎
Lines changed: 4 additions & 3 deletions b/‎src/__tests__/cli/delete.test.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/__tests__/cli/ingest.test.ts‎
Lines changed: 8 additions & 4 deletions b/‎src/__tests__/cli/ingest.test.ts‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/__tests__/cli/list.test.ts‎
Lines changed: 4 additions & 3 deletions b/‎src/__tests__/cli/list.test.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/__tests__/cli/options.test.ts‎
Lines changed: 38 additions & 0 deletions b/‎src/__tests__/cli/options.test.ts‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎src/__tests__/cli/query.test.ts‎
Lines changed: 5 additions & 3 deletions b/‎src/__tests__/cli/query.test.ts‎
Lines changed: 5 additions & 3 deletions
@@ -383,6 +383,7 @@ The MCP server is configured by environment variables only — pass them through
 | `MAX_FILE_SIZE` | `--max-file-size` | `104857600` (100MB) | Maximum file size in bytes |
 | `CHUNK_MIN_LENGTH` | `--chunk-min-length` | `50` | Minimum chunk length in characters (1–10000) |
 | `RAG_DEVICE` | — | `cpu` | Execution device. Passed straight to ONNX Runtime. See the [Transformers.js device source code](https://github.com/huggingface/transformers.js/blob/main/packages/transformers/src/utils/devices.js) for the live list of supported backend names. If initialization fails, the server throws an error. |
+| `RAG_DTYPE` | — | `fp32` | Embedding quantization dtype. Opt-in and passed straight through; accepts any dtype the chosen model provides (`fp32`, `fp16`, `q8`, `int8`, …). If the model lacks the requested variant, the server throws an error naming the dtypes it does provide. Changing `RAG_DEVICE`/`RAG_DTYPE` changes the embedding space — re-ingest existing data. |
 
 **Model choice tips:**
 - Multilingual docs → e.g., `onnx-community/embeddinggemma-300m-ONNX` (100+ languages)
@@ -607,6 +608,9 @@ Yes, but you must delete your database and re-ingest all documents. Different mo
 **GPU acceleration?**
 Opt-in via `RAG_DEVICE`. Devices are passed straight to ONNX Runtime. GPU support is highly dependent on your system, Node.js version, and the underlying ONNX backend. See the [Transformers.js device source code](https://github.com/huggingface/transformers.js/blob/main/packages/transformers/src/utils/devices.js) for the live list of supported backend names. If the requested device fails to initialize, the server throws an error — set `RAG_DEVICE=cpu` to revert.
 
+**Can I change the embedding precision (dtype)?**
+Opt-in via `RAG_DTYPE` (default `fp32`); accepted values are in the env-var table above. A recognized dtype the model lacks errors and lists the available ones; an unrecognized value (a typo) silently falls back to `fp32`. Changing `RAG_DEVICE`/`RAG_DTYPE` changes the embedding space — delete `DB_PATH` and re-ingest.
+
 **Multi-user support?**
 No. Designed for single-user, local access. Multi-user would require authentication/access control.
 
 
@@ -1,6 +1,6 @@
 {
   "name": "mcp-local-rag",
-  "version": "0.15.0",
+  "version": "0.15.1",
   "description": "Local RAG MCP Server - Easy-to-setup document search with minimal configuration",
   "type": "module",
   "main": "dist/index.js",
 
@@ -8,13 +8,13 @@
     "url": "https://github.com/shinpr/mcp-local-rag",
     "source": "github"
   },
-  "version": "0.15.0",
+  "version": "0.15.1",
   "packages": [
     {
       "registryType": "npm",
       "registryBaseUrl": "https://registry.npmjs.org",
       "identifier": "mcp-local-rag",
-      "version": "0.15.0",
+      "version": "0.15.1",
       "transport": {
         "type": "stdio"
       },
@@ -96,6 +96,13 @@
           "format": "string",
           "isSecret": false
         },
+        {
+          "name": "RAG_DTYPE",
+          "description": "Embedding quantization dtype for the embedder (defaults to fp32). Opt-in and pass-through; accepts any dtype the chosen model provides (fp32, fp16, q8, int8, ...). If the model has no variant for the requested dtype, the server throws an error. Changing this changes the embedding space — re-ingest existing data.",
+          "isRequired": false,
+          "format": "string",
+          "isSecret": false
+        },
         {
           "name": "RAG_HYBRID_WEIGHT",
           "description": "Keyword boost factor for hybrid search (0.0-1.0, defaults to 0.6). 0 means semantic similarity only; higher values increase the keyword-match contribution to the final score.",
 
@@ -0,0 +1,27 @@
+// Faithful test double for `formatCliError` (src/cli/common.ts).
+//
+// CLI subcommand tests mock `../../cli/common.js` wholesale, so they must
+// supply their own `formatCliError`. This shim mirrors the production
+// rendering (full `.cause` chain + stacks, deeper links prefixed
+// `Caused by: `) so failure-path assertions exercise real behavior — the
+// cause chain reaches stderr, exactly as the Contract-Delta CLI row requires —
+// rather than a message-only stub.
+
+export function formatCliErrorShim(error: unknown): string {
+  const err = error instanceof Error ? error : new Error(String(error))
+  const chain: Error[] = []
+  const seen = new Set<Error>()
+  let current: Error | undefined = err
+  while (current !== undefined && !seen.has(current)) {
+    chain.push(current)
+    seen.add(current)
+    const next: unknown = current.cause
+    current = next instanceof Error ? next : undefined
+  }
+  return chain
+    .map((link, index) => {
+      const header = index === 0 ? '' : 'Caused by: '
+      return `${header}${link.stack || `${link.name}: ${link.message}`}`
+    })
+    .join('\n')
+}
@@ -34,6 +34,7 @@ const MOCKED_PATHS = ['../../vectordb/index.js', '../../embedder/index.js'] as c
 
 let createEmbedder: typeof import('../../cli/common.js').createEmbedder
 let createVectorStore: typeof import('../../cli/common.js').createVectorStore
+let formatCliError: typeof import('../../cli/common.js').formatCliError
 type ResolvedGlobalConfig = import('../../cli/options.js').ResolvedGlobalConfig
 
 // ============================================
@@ -58,7 +59,7 @@ describe('cli/common', () => {
     vi.resetModules()
     vi.doMock('../../vectordb/index.js', vectordbFactory)
     vi.doMock('../../embedder/index.js', embedderFactory)
-    ;({ createEmbedder, createVectorStore } = await import('../../cli/common.js'))
+    ;({ createEmbedder, createVectorStore, formatCliError } = await import('../../cli/common.js'))
   })
 
   afterAll(() => {
@@ -82,8 +83,56 @@ describe('cli/common', () => {
     })
   })
 
+  describe('formatCliError', () => {
+    it('renders the full cause chain with stacks for a nested error', () => {
+      // Build a deterministic 3-link chain: outer → mid → root.
+      const root = new Error('root disk failure')
+      const mid = new Error('vector store write failed', { cause: root })
+      const outer = new Error('Failed to ingest file', { cause: mid })
+
+      const rendered = formatCliError(outer)
+
+      // Every link's message appears.
+      expect(rendered).toContain('Failed to ingest file')
+      expect(rendered).toContain('vector store write failed')
+      expect(rendered).toContain('root disk failure')
+      // Deeper links are attributed as causes; the outer link is not.
+      expect(rendered).toContain('Caused by: ')
+      expect(rendered.indexOf('Caused by: ')).toBeGreaterThan(
+        rendered.indexOf('Failed to ingest file')
+      )
+      // The chain is ordered outer → cause → cause.
+      expect(rendered.indexOf('Failed to ingest file')).toBeLessThan(
+        rendered.indexOf('vector store write failed')
+      )
+      expect(rendered.indexOf('vector store write failed')).toBeLessThan(
+        rendered.indexOf('root disk failure')
+      )
+      // Stack frames are included for diagnostics (operator-facing).
+      expect(rendered).toContain('at ')
+    })
+
+    it('renders message and stack for a single Error without a cause', () => {
+      const err = new Error('lonely failure')
+
+      const rendered = formatCliError(err)
+
+      expect(rendered).toContain('lonely failure')
+      expect(rendered).not.toContain('Caused by: ')
+      expect(rendered).toContain('at ')
+    })
+
+    it('stringifies a non-Error thrown value', () => {
+      const rendered = formatCliError('plain string failure')
+
+      expect(rendered).toContain('plain string failure')
+      expect(rendered).not.toContain('Caused by: ')
+    })
+  })
+
   describe('createEmbedder', () => {
     const originalDevice = process.env['RAG_DEVICE']
+    const originalDtype = process.env['RAG_DTYPE']
 
     afterEach(() => {
       mocks.Embedder.mockReset()
@@ -92,6 +141,11 @@ describe('cli/common', () => {
       } else {
         process.env['RAG_DEVICE'] = originalDevice
       }
+      if (originalDtype === undefined) {
+        delete process.env['RAG_DTYPE']
+      } else {
+        process.env['RAG_DTYPE'] = originalDtype
+      }
     })
 
     it('defaults device to cpu when RAG_DEVICE is unset', () => {
@@ -115,5 +169,32 @@ describe('cli/common', () => {
 
       expect(mocks.Embedder).toHaveBeenCalledWith(expect.objectContaining({ device: 'webgpu' }))
     })
+
+    it('omits dtype from the Embedder config when RAG_DTYPE is unset', () => {
+      delete process.env['RAG_DTYPE']
+
+      createEmbedder(makeConfig({ modelName: 'custom/model', cacheDir: '/custom/cache' }))
+
+      expect(mocks.Embedder).toHaveBeenCalledOnce()
+      const passedConfig = mocks.Embedder.mock.calls[0]?.[0]
+      expect(passedConfig).not.toHaveProperty('dtype')
+    })
+
+    it('passes RAG_DTYPE through to the Embedder when set', () => {
+      process.env['RAG_DTYPE'] = 'q8'
+
+      createEmbedder(makeConfig({ modelName: 'custom/model', cacheDir: '/custom/cache' }))
+
+      expect(mocks.Embedder).toHaveBeenCalledWith(expect.objectContaining({ dtype: 'q8' }))
+    })
+
+    it('omits dtype when RAG_DTYPE is whitespace-only', () => {
+      process.env['RAG_DTYPE'] = '   '
+
+      createEmbedder(makeConfig({ modelName: 'custom/model', cacheDir: '/custom/cache' }))
+
+      const passedConfig = mocks.Embedder.mock.calls[0]?.[0]
+      expect(passedConfig).not.toHaveProperty('dtype')
+    })
   })
 })
@@ -28,9 +28,9 @@ const cliCommonFactory = () => ({
     deleteChunks: mocks.deleteChunks,
     optimize: mocks.optimize,
   })),
-  // Pure helper used by the catch block; real implementation preserves the
-  // `Error: <message>` stderr behavior the tests assert.
-  toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
+  // Catch-block renderer; faithful shim preserves the `Error: <message>`
+  // stderr behavior the tests assert.
+  formatCliError: formatCliErrorShim,
 })
 
 const fsPromisesFactory = async (
@@ -47,6 +47,7 @@ const MOCKED_PATHS = ['../../cli/common.js', 'node:fs/promises'] as const
 
 import { mkdir, rm, writeFile } from 'node:fs/promises'
 import { resolve } from 'node:path'
+import { formatCliErrorShim } from './cli-error-shim.js'
 
 let runDelete: typeof import('../../cli/delete.js').runDelete
 
 
@@ -103,9 +103,9 @@ const cliCommonFactory = () => ({
   resolveCliBaseDirsOrExit: vi
     .fn()
     .mockImplementation((cliRoots: string[]) => mocks.resolveCliBaseDirs(cliRoots)),
-  // Pure helper used by the catch block; real implementation preserves the
-  // per-file `... FAILED: <message>` stderr behavior the tests assert.
-  toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
+  // Catch-block renderer; faithful shim preserves the per-file
+  // `... FAILED: <message>` stderr behavior the tests assert.
+  formatCliError: formatCliErrorShim,
 })
 
 const MOCKED_PATHS = [
@@ -124,6 +124,7 @@ const MOCKED_PATHS = [
 // (e.g., ../../cli/common.js) can win the module-registry race and bind
 // runIngest's closures to that file's factories instead of this file's.
 import { resolve } from 'node:path'
+import { formatCliErrorShim } from './cli-error-shim.js'
 
 let runIngest: typeof import('../../cli/ingest.js').runIngest
 let parseArgs: typeof import('../../cli/ingest.js').parseArgs
@@ -709,7 +710,10 @@ describe('CLI ingest', () => {
     expect(process.exitCode).toBe(1)
 
     const joined = output.join('\n')
-    expect(joined).toContain('FAILED: Parse error: corrupted file')
+    // formatCliError now renders the failing file's diagnostic (message + stack)
+    // on the per-file FAILED line; the original message is still present.
+    expect(joined).toContain('FAILED:')
+    expect(joined).toContain('Parse error: corrupted file')
     expect(joined).toContain('Succeeded: 2')
     expect(joined).toContain('Failed:    1')
   })
 
@@ -45,14 +45,15 @@ const cliCommonFactory = () => ({
   resolveCliBaseDirsOrExit: vi
     .fn()
     .mockImplementation((cliRoots: string[]) => mocks.resolveCliBaseDirs(cliRoots)),
-  // Pure helper used by the catch block; real implementation preserves the
-  // `Error: <message>` stderr behavior the tests assert.
-  toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
+  // Catch-block renderer; faithful shim preserves the
+  // `Failed to list files: <message>` stderr behavior the tests assert.
+  formatCliError: formatCliErrorShim,
 })
 
 const MOCKED_PATHS = ['node:fs/promises', '../../cli/common.js'] as const
 
 import { resolve } from 'node:path'
+import { formatCliErrorShim } from './cli-error-shim.js'
 
 let parseArgs: typeof import('../../cli/list.js').parseArgs
 let runList: typeof import('../../cli/list.js').runList
 
@@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
 import {
   parseGlobalOptions,
   ROOT_HELP_TEXT,
+  resolveDtype,
   resolveGlobalConfig,
   validateMaxFileSize,
   validateModelName,
@@ -428,3 +429,40 @@ describe('CLI global options', () => {
     })
   })
 })
+
+// ============================================
+// resolveDtype
+// ============================================
+// Unlike resolveDevice (which defaults unset/whitespace to 'cpu'), resolveDtype
+// returns `undefined` for unset/whitespace. This divergence is load-bearing: it
+// is the only signal that distinguishes "RAG_DTYPE unset" from an explicit
+// RAG_DTYPE=fp32, which gates the Phase 2 enrichment.
+describe('resolveDtype', () => {
+  it('returns undefined when value is undefined', () => {
+    expect(resolveDtype(undefined)).toBeUndefined()
+  })
+
+  it('returns undefined for an empty string', () => {
+    expect(resolveDtype('')).toBeUndefined()
+  })
+
+  it('returns undefined for a whitespace-only string', () => {
+    expect(resolveDtype('   ')).toBeUndefined()
+  })
+
+  it('passes an explicit fp32 through unchanged (not coerced to a default)', () => {
+    expect(resolveDtype('fp32')).toBe('fp32')
+  })
+
+  it('passes fp16 through unchanged', () => {
+    expect(resolveDtype('fp16')).toBe('fp16')
+  })
+
+  it('passes q8 through unchanged', () => {
+    expect(resolveDtype('q8')).toBe('q8')
+  })
+
+  it('trims surrounding whitespace from a value', () => {
+    expect(resolveDtype('  q8  ')).toBe('q8')
+  })
+})
@@ -32,9 +32,9 @@ const cliCommonFactory = () => ({
     search: mocks.search,
     close: vi.fn(),
   })),
-  // Pure helper used by the catch block; real implementation preserves the
-  // `Error: <message>` stderr behavior the tests assert.
-  toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
+  // Catch-block renderer; faithful shim preserves the `Error: <message>`
+  // stderr behavior the tests assert.
+  formatCliError: formatCliErrorShim,
 })
 
 // NOTE: the mock factory below mirrors the NEW raw-data-utils contract.
@@ -54,6 +54,8 @@ const rawDataUtilsFactory = () => ({
 
 const MOCKED_PATHS = ['../../cli/common.js', '../../utils/raw-data-utils.js'] as const
 
+import { formatCliErrorShim } from './cli-error-shim.js'
+
 let parseArgs: typeof import('../../cli/query.js').parseArgs
 let runQuery: typeof import('../../cli/query.js').runQuery
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "mcp-local-rag",`
`3`		`- "version": "0.15.0",`
	`3`	`+ "version": "0.15.1",`
`4`	`4`	`"description": "Local RAG MCP Server - Easy-to-setup document search with minimal configuration",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "dist/index.js",`