Skip to content

Commit 56b3c02

Browse files
committed
feat(wkb): MultiPolygon support in WKB parser
1 parent a27dba7 commit 56b3c02

2 files changed

Lines changed: 273 additions & 0 deletions

File tree

src/io/wkb.test.ts

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
import { describe, test, expect } from "vitest";
2+
import { makeData, Binary, Data, List } from "apache-arrow";
3+
import { parseWkb, WKBType } from "./wkb";
4+
5+
function hexToUint8Array(hex: string): Uint8Array {
6+
const bytes = new Uint8Array(hex.length / 2);
7+
for (let i = 0; i < hex.length; i += 2) {
8+
bytes[i / 2] = parseInt(hex.substring(i, i + 2), 16);
9+
}
10+
return bytes;
11+
}
12+
13+
function makeWkbData(...hexStrings: string[]) {
14+
const buffers = hexStrings.map(hexToUint8Array);
15+
const totalLen = buffers.reduce((s, b) => s + b.length, 0);
16+
const wkb = new Uint8Array(totalLen);
17+
const valueOffsets = new Int32Array(hexStrings.length + 1);
18+
let offset = 0;
19+
for (let i = 0; i < buffers.length; i++) {
20+
wkb.set(buffers[i], offset);
21+
offset += buffers[i].length;
22+
valueOffsets[i + 1] = offset;
23+
}
24+
return makeData({
25+
type: new Binary(),
26+
data: wkb,
27+
valueOffsets,
28+
});
29+
}
30+
31+
/** Extract offset arrays from nested List data for structural assertions */
32+
function getOffsets(data: Data<List>, level: number): Int32Array {
33+
let d: Data = data;
34+
for (let i = 0; i < level; i++) {
35+
d = (d as Data<List>).children[0];
36+
}
37+
return (d as Data<List>).valueOffsets;
38+
}
39+
40+
function getCoords(data: Data<List>, dim: number): number[][] {
41+
let d: Data = data;
42+
// walk: geom -> polygon -> ring -> vertex (FixedSizeList) -> Float64
43+
while (d.children?.length) {
44+
d = d.children[0];
45+
}
46+
const flat = d.values as Float64Array;
47+
const coords: number[][] = [];
48+
for (let i = 0; i < flat.length; i += dim) {
49+
coords.push(Array.from(flat.slice(i, i + dim)));
50+
}
51+
return coords;
52+
}
53+
54+
// Generated via shapely:
55+
// MultiPolygon([box(0,0,1,1), box(2,2,3,3)]).wkb_hex
56+
const TWO_SQUARES_HEX =
57+
"01060000000200000001030000000100000005000000000000000000F03F0000000000000000000000000000F03F000000000000F03F0000000000000000000000000000F03F00000000000000000000000000000000000000000000F03F0000000000000000010300000001000000050000000000000000000840000000000000004000000000000008400000000000000840000000000000004000000000000008400000000000000040000000000000004000000000000008400000000000000040";
58+
59+
// MultiPolygon([box(0,0,1,1)]).wkb_hex
60+
const SINGLE_POLYGON_HEX =
61+
"01060000000100000001030000000100000005000000000000000000F03F0000000000000000000000000000F03F000000000000F03F0000000000000000000000000000F03F00000000000000000000000000000000000000000000F03F0000000000000000";
62+
63+
// MultiPolygon([Polygon(box(0,0,10,10).exterior, [box(2,2,4,4).exterior])]).wkb_hex
64+
const WITH_HOLE_HEX =
65+
"010600000001000000010300000002000000050000000000000000002440000000000000000000000000000024400000000000002440000000000000000000000000000024400000000000000000000000000000000000000000000024400000000000000000050000000000000000001040000000000000004000000000000010400000000000001040000000000000004000000000000010400000000000000040000000000000004000000000000010400000000000000040";
66+
67+
describe("parseWkb MultiPolygon", () => {
68+
test("two squares: structure and coordinates", () => {
69+
const wkbData = makeWkbData(TWO_SQUARES_HEX);
70+
const result = parseWkb(wkbData, WKBType.MultiPolygon, 2) as Data<List>;
71+
expect(result.length).toBe(1);
72+
73+
// geom offsets: 1 multipolygon containing 2 polygons
74+
const geomOffsets = result.valueOffsets;
75+
expect(Array.from(geomOffsets)).toEqual([0, 2]);
76+
77+
// polygon offsets: 2 polygons, each with 1 ring
78+
const polyOffsets = getOffsets(result, 1);
79+
expect(Array.from(polyOffsets)).toEqual([0, 1, 2]);
80+
81+
// ring offsets: 2 rings, each with 5 coords (closed box)
82+
const ringOffsets = getOffsets(result, 2);
83+
expect(Array.from(ringOffsets)).toEqual([0, 5, 10]);
84+
85+
// verify coordinate values
86+
const coords = getCoords(result, 2);
87+
expect(coords.length).toBe(10);
88+
// first polygon starts at (1,0) - shapely box winding
89+
expect(coords[0]).toEqual([1, 0]);
90+
// second polygon starts at (3,2)
91+
expect(coords[5]).toEqual([3, 2]);
92+
});
93+
94+
test("single polygon in multipolygon", () => {
95+
const wkbData = makeWkbData(SINGLE_POLYGON_HEX);
96+
const result = parseWkb(wkbData, WKBType.MultiPolygon, 2) as Data<List>;
97+
expect(result.length).toBe(1);
98+
99+
const geomOffsets = result.valueOffsets;
100+
expect(Array.from(geomOffsets)).toEqual([0, 1]);
101+
102+
const polyOffsets = getOffsets(result, 1);
103+
expect(Array.from(polyOffsets)).toEqual([0, 1]);
104+
105+
const ringOffsets = getOffsets(result, 2);
106+
expect(Array.from(ringOffsets)).toEqual([0, 5]);
107+
108+
expect(getCoords(result, 2).length).toBe(5);
109+
});
110+
111+
test("polygon with hole: 2 rings", () => {
112+
const wkbData = makeWkbData(WITH_HOLE_HEX);
113+
const result = parseWkb(wkbData, WKBType.MultiPolygon, 2) as Data<List>;
114+
expect(result.length).toBe(1);
115+
116+
// 1 multipolygon -> 1 polygon -> 2 rings (outer + hole)
117+
const geomOffsets = result.valueOffsets;
118+
expect(Array.from(geomOffsets)).toEqual([0, 1]);
119+
120+
const polyOffsets = getOffsets(result, 1);
121+
expect(Array.from(polyOffsets)).toEqual([0, 2]);
122+
123+
const ringOffsets = getOffsets(result, 2);
124+
expect(Array.from(ringOffsets)).toEqual([0, 5, 10]);
125+
126+
const coords = getCoords(result, 2);
127+
expect(coords.length).toBe(10);
128+
// outer ring starts at (10,0), hole starts at (4,2)
129+
expect(coords[0]).toEqual([10, 0]);
130+
expect(coords[5]).toEqual([4, 2]);
131+
});
132+
133+
test("batch of multiple multipolygon features", () => {
134+
const wkbData = makeWkbData(TWO_SQUARES_HEX, SINGLE_POLYGON_HEX);
135+
const result = parseWkb(wkbData, WKBType.MultiPolygon, 2) as Data<List>;
136+
expect(result.length).toBe(2);
137+
138+
// feature 0 has 2 polygons, feature 1 has 1 polygon
139+
const geomOffsets = result.valueOffsets;
140+
expect(Array.from(geomOffsets)).toEqual([0, 2, 3]);
141+
142+
// 3 polygons total, each with 1 ring
143+
const polyOffsets = getOffsets(result, 1);
144+
expect(Array.from(polyOffsets)).toEqual([0, 1, 2, 3]);
145+
146+
// 3 rings, each with 5 coords
147+
const ringOffsets = getOffsets(result, 2);
148+
expect(Array.from(ringOffsets)).toEqual([0, 5, 10, 15]);
149+
150+
expect(getCoords(result, 2).length).toBe(15);
151+
});
152+
});

src/io/wkb.ts

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { makeData, Field, FixedSizeList, Float64, List } from "apache-arrow";
22
import {
33
GeoArrowData,
44
LineStringData,
5+
MultiPolygonData,
56
PointData,
67
PolygonData,
78
WKBData,
@@ -57,6 +58,12 @@ export function parseWkb(
5758
case WKBType.Polygon:
5859
return repackPolygons(parsedGeometries as BinaryPolygonGeometry[], dim);
5960

61+
case WKBType.MultiPolygon:
62+
return repackMultiPolygons(
63+
parsedGeometries as BinaryPolygonGeometry[],
64+
dim,
65+
);
66+
6067
default:
6168
assertFalse("Not yet implemented for this geometry type");
6269
}
@@ -258,6 +265,120 @@ function inferPolygonCapacity(geoms: BinaryPolygonGeometry[]): PolygonCapacity {
258265
return capacity;
259266
}
260267

268+
type MultiPolygonCapacity = {
269+
coordCapacity: number;
270+
ringCapacity: number;
271+
polygonCapacity: number;
272+
geomCapacity: number;
273+
};
274+
275+
function repackMultiPolygons(
276+
geoms: BinaryPolygonGeometry[],
277+
dim: number,
278+
): MultiPolygonData {
279+
const capacity = inferMultiPolygonCapacity(geoms);
280+
const coords = new Float64Array(capacity.coordCapacity * dim);
281+
const ringOffsets = new Int32Array(capacity.ringCapacity + 1);
282+
const polygonOffsets = new Int32Array(capacity.polygonCapacity + 1);
283+
const geomOffsets = new Int32Array(capacity.geomCapacity + 1);
284+
285+
let geomIndex = 0;
286+
let polygonIndex = 0;
287+
let ringIndex = 0;
288+
let coordOffset = 0;
289+
290+
for (const geom of geoms) {
291+
assert(geom.positions.value instanceof Float64Array);
292+
const numCoords = geom.positions.value.length / geom.positions.size;
293+
294+
coords.set(geom.positions.value, coordOffset * dim);
295+
296+
// polygonIndices uses coordinate indices (not ring indices) to separate
297+
// component polygons. We scan primitivePolygonIndices to find which rings
298+
// belong to each polygon.
299+
const primIndices = geom.primitivePolygonIndices.value;
300+
const polyCoordIndices =
301+
geom.polygonIndices?.value ?? new Int32Array([0, numCoords]);
302+
const numPolygons = polyCoordIndices.length - 1;
303+
let ringPtr = 0;
304+
305+
for (let p = 0; p < numPolygons; p++) {
306+
const polyCoordEnd = polyCoordIndices[p + 1];
307+
308+
while (
309+
ringPtr < primIndices.length - 1 &&
310+
primIndices[ringPtr] < polyCoordEnd
311+
) {
312+
ringOffsets[ringIndex + 1] =
313+
ringOffsets[ringIndex] +
314+
(primIndices[ringPtr + 1] - primIndices[ringPtr]);
315+
ringIndex += 1;
316+
ringPtr += 1;
317+
}
318+
319+
polygonOffsets[polygonIndex + 1] = ringIndex;
320+
polygonIndex += 1;
321+
}
322+
323+
coordOffset += numCoords;
324+
geomOffsets[geomIndex + 1] = polygonIndex;
325+
geomIndex += 1;
326+
}
327+
328+
const coordsData = makeData({
329+
type: new Float64(),
330+
data: coords,
331+
});
332+
const verticesData = makeData({
333+
type: new FixedSizeList(
334+
dim,
335+
new Field(coordFieldName(dim), coordsData.type, false),
336+
),
337+
child: coordsData,
338+
});
339+
const ringsData = makeData({
340+
type: new List(new Field("vertices", verticesData.type, false)),
341+
valueOffsets: ringOffsets,
342+
child: verticesData,
343+
});
344+
const polygonsData = makeData({
345+
type: new List(new Field("rings", ringsData.type, false)),
346+
valueOffsets: polygonOffsets,
347+
child: ringsData,
348+
});
349+
return makeData({
350+
type: new List(new Field("polygons", polygonsData.type, false)),
351+
valueOffsets: geomOffsets,
352+
child: polygonsData,
353+
});
354+
}
355+
356+
function inferMultiPolygonCapacity(
357+
geoms: BinaryPolygonGeometry[],
358+
): MultiPolygonCapacity {
359+
let capacity: MultiPolygonCapacity = {
360+
coordCapacity: 0,
361+
ringCapacity: 0,
362+
polygonCapacity: 0,
363+
geomCapacity: 0,
364+
};
365+
366+
for (const geom of geoms) {
367+
capacity.geomCapacity += 1;
368+
const polyIndices = geom.polygonIndices?.value;
369+
if (polyIndices) {
370+
capacity.polygonCapacity += polyIndices.length - 1;
371+
} else {
372+
capacity.polygonCapacity += 1;
373+
}
374+
capacity.ringCapacity += geom.primitivePolygonIndices.value.length - 1;
375+
capacity.coordCapacity +=
376+
geom.positions.value.length / geom.positions.size;
377+
}
378+
379+
return capacity;
380+
}
381+
261382
function coordFieldName(dim: number): "xy" | "xyz" {
262383
if (dim === 2) {
263384
return "xy";

0 commit comments

Comments
 (0)