Skip to content

Commit b1bbcfd

Browse files
authored
Merge pull request #147 from shinpr/feat/configurable-embedding-dtype
feat: configurable embedding dtype (RAG_DTYPE) + error classification & boundary
2 parents 330aee8 + 20596c1 commit b1bbcfd

42 files changed

Lines changed: 2678 additions & 796 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ The MCP server is configured by environment variables only — pass them through
383383
| `MAX_FILE_SIZE` | `--max-file-size` | `104857600` (100MB) | Maximum file size in bytes |
384384
| `CHUNK_MIN_LENGTH` | `--chunk-min-length` | `50` | Minimum chunk length in characters (1–10000) |
385385
| `RAG_DEVICE` || `cpu` | Execution device. Passed straight to ONNX Runtime. See the [Transformers.js device source code](https://github.com/huggingface/transformers.js/blob/main/packages/transformers/src/utils/devices.js) for the live list of supported backend names. If initialization fails, the server throws an error. |
386+
| `RAG_DTYPE` || `fp32` | Embedding quantization dtype. Opt-in and passed straight through; accepts any dtype the chosen model provides (`fp32`, `fp16`, `q8`, `int8`, …). If the model lacks the requested variant, the server throws an error naming the dtypes it does provide. Changing `RAG_DEVICE`/`RAG_DTYPE` changes the embedding space — re-ingest existing data. |
386387

387388
**Model choice tips:**
388389
- Multilingual docs → e.g., `onnx-community/embeddinggemma-300m-ONNX` (100+ languages)
@@ -607,6 +608,9 @@ Yes, but you must delete your database and re-ingest all documents. Different mo
607608
**GPU acceleration?**
608609
Opt-in via `RAG_DEVICE`. Devices are passed straight to ONNX Runtime. GPU support is highly dependent on your system, Node.js version, and the underlying ONNX backend. See the [Transformers.js device source code](https://github.com/huggingface/transformers.js/blob/main/packages/transformers/src/utils/devices.js) for the live list of supported backend names. If the requested device fails to initialize, the server throws an error — set `RAG_DEVICE=cpu` to revert.
609610

611+
**Can I change the embedding precision (dtype)?**
612+
Opt-in via `RAG_DTYPE` (default `fp32`); accepted values are in the env-var table above. A recognized dtype the model lacks errors and lists the available ones; an unrecognized value (a typo) silently falls back to `fp32`. Changing `RAG_DEVICE`/`RAG_DTYPE` changes the embedding space — delete `DB_PATH` and re-ingest.
613+
610614
**Multi-user support?**
611615
No. Designed for single-user, local access. Multi-user would require authentication/access control.
612616

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "mcp-local-rag",
3-
"version": "0.15.0",
3+
"version": "0.15.1",
44
"description": "Local RAG MCP Server - Easy-to-setup document search with minimal configuration",
55
"type": "module",
66
"main": "dist/index.js",

server.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
"url": "https://github.com/shinpr/mcp-local-rag",
99
"source": "github"
1010
},
11-
"version": "0.15.0",
11+
"version": "0.15.1",
1212
"packages": [
1313
{
1414
"registryType": "npm",
1515
"registryBaseUrl": "https://registry.npmjs.org",
1616
"identifier": "mcp-local-rag",
17-
"version": "0.15.0",
17+
"version": "0.15.1",
1818
"transport": {
1919
"type": "stdio"
2020
},
@@ -96,6 +96,13 @@
9696
"format": "string",
9797
"isSecret": false
9898
},
99+
{
100+
"name": "RAG_DTYPE",
101+
"description": "Embedding quantization dtype for the embedder (defaults to fp32). Opt-in and pass-through; accepts any dtype the chosen model provides (fp32, fp16, q8, int8, ...). If the model has no variant for the requested dtype, the server throws an error. Changing this changes the embedding space — re-ingest existing data.",
102+
"isRequired": false,
103+
"format": "string",
104+
"isSecret": false
105+
},
99106
{
100107
"name": "RAG_HYBRID_WEIGHT",
101108
"description": "Keyword boost factor for hybrid search (0.0-1.0, defaults to 0.6). 0 means semantic similarity only; higher values increase the keyword-match contribution to the final score.",
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Faithful test double for `formatCliError` (src/cli/common.ts).
2+
//
3+
// CLI subcommand tests mock `../../cli/common.js` wholesale, so they must
4+
// supply their own `formatCliError`. This shim mirrors the production
5+
// rendering (full `.cause` chain + stacks, deeper links prefixed
6+
// `Caused by: `) so failure-path assertions exercise real behavior — the
7+
// cause chain reaches stderr, exactly as the Contract-Delta CLI row requires —
8+
// rather than a message-only stub.
9+
10+
export function formatCliErrorShim(error: unknown): string {
11+
const err = error instanceof Error ? error : new Error(String(error))
12+
const chain: Error[] = []
13+
const seen = new Set<Error>()
14+
let current: Error | undefined = err
15+
while (current !== undefined && !seen.has(current)) {
16+
chain.push(current)
17+
seen.add(current)
18+
const next: unknown = current.cause
19+
current = next instanceof Error ? next : undefined
20+
}
21+
return chain
22+
.map((link, index) => {
23+
const header = index === 0 ? '' : 'Caused by: '
24+
return `${header}${link.stack || `${link.name}: ${link.message}`}`
25+
})
26+
.join('\n')
27+
}

src/__tests__/cli/common.test.ts

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ const MOCKED_PATHS = ['../../vectordb/index.js', '../../embedder/index.js'] as c
3434

3535
let createEmbedder: typeof import('../../cli/common.js').createEmbedder
3636
let createVectorStore: typeof import('../../cli/common.js').createVectorStore
37+
let formatCliError: typeof import('../../cli/common.js').formatCliError
3738
type ResolvedGlobalConfig = import('../../cli/options.js').ResolvedGlobalConfig
3839

3940
// ============================================
@@ -58,7 +59,7 @@ describe('cli/common', () => {
5859
vi.resetModules()
5960
vi.doMock('../../vectordb/index.js', vectordbFactory)
6061
vi.doMock('../../embedder/index.js', embedderFactory)
61-
;({ createEmbedder, createVectorStore } = await import('../../cli/common.js'))
62+
;({ createEmbedder, createVectorStore, formatCliError } = await import('../../cli/common.js'))
6263
})
6364

6465
afterAll(() => {
@@ -82,8 +83,56 @@ describe('cli/common', () => {
8283
})
8384
})
8485

86+
describe('formatCliError', () => {
87+
it('renders the full cause chain with stacks for a nested error', () => {
88+
// Build a deterministic 3-link chain: outer → mid → root.
89+
const root = new Error('root disk failure')
90+
const mid = new Error('vector store write failed', { cause: root })
91+
const outer = new Error('Failed to ingest file', { cause: mid })
92+
93+
const rendered = formatCliError(outer)
94+
95+
// Every link's message appears.
96+
expect(rendered).toContain('Failed to ingest file')
97+
expect(rendered).toContain('vector store write failed')
98+
expect(rendered).toContain('root disk failure')
99+
// Deeper links are attributed as causes; the outer link is not.
100+
expect(rendered).toContain('Caused by: ')
101+
expect(rendered.indexOf('Caused by: ')).toBeGreaterThan(
102+
rendered.indexOf('Failed to ingest file')
103+
)
104+
// The chain is ordered outer → cause → cause.
105+
expect(rendered.indexOf('Failed to ingest file')).toBeLessThan(
106+
rendered.indexOf('vector store write failed')
107+
)
108+
expect(rendered.indexOf('vector store write failed')).toBeLessThan(
109+
rendered.indexOf('root disk failure')
110+
)
111+
// Stack frames are included for diagnostics (operator-facing).
112+
expect(rendered).toContain('at ')
113+
})
114+
115+
it('renders message and stack for a single Error without a cause', () => {
116+
const err = new Error('lonely failure')
117+
118+
const rendered = formatCliError(err)
119+
120+
expect(rendered).toContain('lonely failure')
121+
expect(rendered).not.toContain('Caused by: ')
122+
expect(rendered).toContain('at ')
123+
})
124+
125+
it('stringifies a non-Error thrown value', () => {
126+
const rendered = formatCliError('plain string failure')
127+
128+
expect(rendered).toContain('plain string failure')
129+
expect(rendered).not.toContain('Caused by: ')
130+
})
131+
})
132+
85133
describe('createEmbedder', () => {
86134
const originalDevice = process.env['RAG_DEVICE']
135+
const originalDtype = process.env['RAG_DTYPE']
87136

88137
afterEach(() => {
89138
mocks.Embedder.mockReset()
@@ -92,6 +141,11 @@ describe('cli/common', () => {
92141
} else {
93142
process.env['RAG_DEVICE'] = originalDevice
94143
}
144+
if (originalDtype === undefined) {
145+
delete process.env['RAG_DTYPE']
146+
} else {
147+
process.env['RAG_DTYPE'] = originalDtype
148+
}
95149
})
96150

97151
it('defaults device to cpu when RAG_DEVICE is unset', () => {
@@ -115,5 +169,32 @@ describe('cli/common', () => {
115169

116170
expect(mocks.Embedder).toHaveBeenCalledWith(expect.objectContaining({ device: 'webgpu' }))
117171
})
172+
173+
it('omits dtype from the Embedder config when RAG_DTYPE is unset', () => {
174+
delete process.env['RAG_DTYPE']
175+
176+
createEmbedder(makeConfig({ modelName: 'custom/model', cacheDir: '/custom/cache' }))
177+
178+
expect(mocks.Embedder).toHaveBeenCalledOnce()
179+
const passedConfig = mocks.Embedder.mock.calls[0]?.[0]
180+
expect(passedConfig).not.toHaveProperty('dtype')
181+
})
182+
183+
it('passes RAG_DTYPE through to the Embedder when set', () => {
184+
process.env['RAG_DTYPE'] = 'q8'
185+
186+
createEmbedder(makeConfig({ modelName: 'custom/model', cacheDir: '/custom/cache' }))
187+
188+
expect(mocks.Embedder).toHaveBeenCalledWith(expect.objectContaining({ dtype: 'q8' }))
189+
})
190+
191+
it('omits dtype when RAG_DTYPE is whitespace-only', () => {
192+
process.env['RAG_DTYPE'] = ' '
193+
194+
createEmbedder(makeConfig({ modelName: 'custom/model', cacheDir: '/custom/cache' }))
195+
196+
const passedConfig = mocks.Embedder.mock.calls[0]?.[0]
197+
expect(passedConfig).not.toHaveProperty('dtype')
198+
})
118199
})
119200
})

src/__tests__/cli/delete.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ const cliCommonFactory = () => ({
2828
deleteChunks: mocks.deleteChunks,
2929
optimize: mocks.optimize,
3030
})),
31-
// Pure helper used by the catch block; real implementation preserves the
32-
// `Error: <message>` stderr behavior the tests assert.
33-
toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
31+
// Catch-block renderer; faithful shim preserves the `Error: <message>`
32+
// stderr behavior the tests assert.
33+
formatCliError: formatCliErrorShim,
3434
})
3535

3636
const fsPromisesFactory = async (
@@ -47,6 +47,7 @@ const MOCKED_PATHS = ['../../cli/common.js', 'node:fs/promises'] as const
4747

4848
import { mkdir, rm, writeFile } from 'node:fs/promises'
4949
import { resolve } from 'node:path'
50+
import { formatCliErrorShim } from './cli-error-shim.js'
5051

5152
let runDelete: typeof import('../../cli/delete.js').runDelete
5253

src/__tests__/cli/ingest.test.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ const cliCommonFactory = () => ({
103103
resolveCliBaseDirsOrExit: vi
104104
.fn()
105105
.mockImplementation((cliRoots: string[]) => mocks.resolveCliBaseDirs(cliRoots)),
106-
// Pure helper used by the catch block; real implementation preserves the
107-
// per-file `... FAILED: <message>` stderr behavior the tests assert.
108-
toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
106+
// Catch-block renderer; faithful shim preserves the per-file
107+
// `... FAILED: <message>` stderr behavior the tests assert.
108+
formatCliError: formatCliErrorShim,
109109
})
110110

111111
const MOCKED_PATHS = [
@@ -124,6 +124,7 @@ const MOCKED_PATHS = [
124124
// (e.g., ../../cli/common.js) can win the module-registry race and bind
125125
// runIngest's closures to that file's factories instead of this file's.
126126
import { resolve } from 'node:path'
127+
import { formatCliErrorShim } from './cli-error-shim.js'
127128

128129
let runIngest: typeof import('../../cli/ingest.js').runIngest
129130
let parseArgs: typeof import('../../cli/ingest.js').parseArgs
@@ -709,7 +710,10 @@ describe('CLI ingest', () => {
709710
expect(process.exitCode).toBe(1)
710711

711712
const joined = output.join('\n')
712-
expect(joined).toContain('FAILED: Parse error: corrupted file')
713+
// formatCliError now renders the failing file's diagnostic (message + stack)
714+
// on the per-file FAILED line; the original message is still present.
715+
expect(joined).toContain('FAILED:')
716+
expect(joined).toContain('Parse error: corrupted file')
713717
expect(joined).toContain('Succeeded: 2')
714718
expect(joined).toContain('Failed: 1')
715719
})

src/__tests__/cli/list.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,15 @@ const cliCommonFactory = () => ({
4545
resolveCliBaseDirsOrExit: vi
4646
.fn()
4747
.mockImplementation((cliRoots: string[]) => mocks.resolveCliBaseDirs(cliRoots)),
48-
// Pure helper used by the catch block; real implementation preserves the
49-
// `Error: <message>` stderr behavior the tests assert.
50-
toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
48+
// Catch-block renderer; faithful shim preserves the
49+
// `Failed to list files: <message>` stderr behavior the tests assert.
50+
formatCliError: formatCliErrorShim,
5151
})
5252

5353
const MOCKED_PATHS = ['node:fs/promises', '../../cli/common.js'] as const
5454

5555
import { resolve } from 'node:path'
56+
import { formatCliErrorShim } from './cli-error-shim.js'
5657

5758
let parseArgs: typeof import('../../cli/list.js').parseArgs
5859
let runList: typeof import('../../cli/list.js').runList

src/__tests__/cli/options.test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
66
import {
77
parseGlobalOptions,
88
ROOT_HELP_TEXT,
9+
resolveDtype,
910
resolveGlobalConfig,
1011
validateMaxFileSize,
1112
validateModelName,
@@ -428,3 +429,40 @@ describe('CLI global options', () => {
428429
})
429430
})
430431
})
432+
433+
// ============================================
434+
// resolveDtype
435+
// ============================================
436+
// Unlike resolveDevice (which defaults unset/whitespace to 'cpu'), resolveDtype
437+
// returns `undefined` for unset/whitespace. This divergence is load-bearing: it
438+
// is the only signal that distinguishes "RAG_DTYPE unset" from an explicit
439+
// RAG_DTYPE=fp32, which gates the Phase 2 enrichment.
440+
describe('resolveDtype', () => {
441+
it('returns undefined when value is undefined', () => {
442+
expect(resolveDtype(undefined)).toBeUndefined()
443+
})
444+
445+
it('returns undefined for an empty string', () => {
446+
expect(resolveDtype('')).toBeUndefined()
447+
})
448+
449+
it('returns undefined for a whitespace-only string', () => {
450+
expect(resolveDtype(' ')).toBeUndefined()
451+
})
452+
453+
it('passes an explicit fp32 through unchanged (not coerced to a default)', () => {
454+
expect(resolveDtype('fp32')).toBe('fp32')
455+
})
456+
457+
it('passes fp16 through unchanged', () => {
458+
expect(resolveDtype('fp16')).toBe('fp16')
459+
})
460+
461+
it('passes q8 through unchanged', () => {
462+
expect(resolveDtype('q8')).toBe('q8')
463+
})
464+
465+
it('trims surrounding whitespace from a value', () => {
466+
expect(resolveDtype(' q8 ')).toBe('q8')
467+
})
468+
})

src/__tests__/cli/query.test.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ const cliCommonFactory = () => ({
3232
search: mocks.search,
3333
close: vi.fn(),
3434
})),
35-
// Pure helper used by the catch block; real implementation preserves the
36-
// `Error: <message>` stderr behavior the tests assert.
37-
toErrorMessage: (error: unknown) => (error instanceof Error ? error.message : String(error)),
35+
// Catch-block renderer; faithful shim preserves the `Error: <message>`
36+
// stderr behavior the tests assert.
37+
formatCliError: formatCliErrorShim,
3838
})
3939

4040
// NOTE: the mock factory below mirrors the NEW raw-data-utils contract.
@@ -54,6 +54,8 @@ const rawDataUtilsFactory = () => ({
5454

5555
const MOCKED_PATHS = ['../../cli/common.js', '../../utils/raw-data-utils.js'] as const
5656

57+
import { formatCliErrorShim } from './cli-error-shim.js'
58+
5759
let parseArgs: typeof import('../../cli/query.js').parseArgs
5860
let runQuery: typeof import('../../cli/query.js').runQuery
5961

0 commit comments

Comments
 (0)