@@ -2,6 +2,7 @@ import crypto from 'node:crypto';
22import { createRequire } from 'node:module' ;
33import { gzipSync } from 'node:zlib' ;
44import { PutObjectCommand , S3Client } from '@aws-sdk/client-s3' ;
5+ import { ZipArchive } from 'archiver' ;
56import { pool } from '../db/client.js' ;
67import {
78 downloadFromS3ForArtifact ,
@@ -176,6 +177,20 @@ function jsonlGzipBuffer(rows: unknown[]): Buffer {
176177 return gzipSync ( Buffer . from ( rows . map ( ( row ) => JSON . stringify ( row ) ) . join ( '\n' ) + '\n' , 'utf8' ) ) ;
177178}
178179
180+ function createZipArchiveBuffer ( files : { name : string ; buffer : Buffer } [ ] ) : Promise < Buffer > {
181+ return new Promise ( ( resolve , reject ) => {
182+ const archive = new ZipArchive ( { zlib : { level : 9 } } ) ;
183+ const buffers : Buffer [ ] = [ ] ;
184+ archive . on ( 'data' , ( data : Buffer ) => buffers . push ( data ) ) ;
185+ archive . on ( 'end' , ( ) => resolve ( Buffer . concat ( buffers ) ) ) ;
186+ archive . on ( 'error' , ( err : any ) => reject ( err ) ) ;
187+ for ( const file of files ) {
188+ archive . append ( file . buffer , { name : file . name } ) ;
189+ }
190+ archive . finalize ( ) ;
191+ } ) ;
192+ }
193+
179194async function putResearchObject ( key : string , body : Buffer , contentType : string ) : Promise < void > {
180195 await getResearchLakeClient ( ) . send ( new PutObjectCommand ( {
181196 Bucket : config . RESEARCH_LAKE_BUCKET ,
@@ -1211,6 +1226,7 @@ async function processJob(job: ResearchJobRow): Promise<'exported' | 'rejected'>
12111226 ui_frames : `${ basePath } /ui_frames.jsonl.gz` ,
12121227 ui_skeleton : `${ basePath } /ui_skeleton.jsonl.gz` ,
12131228 quality : `${ basePath } /quality.json` ,
1229+ zip : `${ basePath } .zip` ,
12141230 } ,
12151231 } ;
12161232 const quality = {
@@ -1244,11 +1260,27 @@ async function processJob(job: ResearchJobRow): Promise<'exported' | 'rejected'>
12441260 return 'rejected' ;
12451261 }
12461262
1247- await putResearchObject ( `${ basePath } /manifest.json` , jsonBuffer ( manifest ) , 'application/json' ) ;
1248- await putResearchObject ( `${ basePath } /quality.json` , jsonBuffer ( quality ) , 'application/json' ) ;
1249- await putResearchObject ( `${ basePath } /interactions.jsonl.gz` , jsonlGzipBuffer ( interactions ) , 'application/jsonl+gzip' ) ;
1250- await putResearchObject ( `${ basePath } /ui_frames.jsonl.gz` , jsonlGzipBuffer ( visualRows . frames ) , 'application/jsonl+gzip' ) ;
1251- await putResearchObject ( `${ basePath } /ui_skeleton.jsonl.gz` , jsonlGzipBuffer ( skeleton ) , 'application/jsonl+gzip' ) ;
1263+ const manifestBuf = jsonBuffer ( manifest ) ;
1264+ const qualityBuf = jsonBuffer ( quality ) ;
1265+ const interactionsBuf = jsonlGzipBuffer ( interactions ) ;
1266+ const uiFramesBuf = jsonlGzipBuffer ( visualRows . frames ) ;
1267+ const uiSkeletonBuf = jsonlGzipBuffer ( skeleton ) ;
1268+
1269+ await putResearchObject ( `${ basePath } /manifest.json` , manifestBuf , 'application/json' ) ;
1270+ await putResearchObject ( `${ basePath } /quality.json` , qualityBuf , 'application/json' ) ;
1271+ await putResearchObject ( `${ basePath } /interactions.jsonl.gz` , interactionsBuf , 'application/jsonl+gzip' ) ;
1272+ await putResearchObject ( `${ basePath } /ui_frames.jsonl.gz` , uiFramesBuf , 'application/jsonl+gzip' ) ;
1273+ await putResearchObject ( `${ basePath } /ui_skeleton.jsonl.gz` , uiSkeletonBuf , 'application/jsonl+gzip' ) ;
1274+
1275+ const zipFiles = [
1276+ { name : 'manifest.json' , buffer : manifestBuf } ,
1277+ { name : 'quality.json' , buffer : qualityBuf } ,
1278+ { name : 'interactions.jsonl.gz' , buffer : interactionsBuf } ,
1279+ { name : 'ui_frames.jsonl.gz' , buffer : uiFramesBuf } ,
1280+ { name : 'ui_skeleton.jsonl.gz' , buffer : uiSkeletonBuf } ,
1281+ ] ;
1282+ const zipBuffer = await createZipArchiveBuffer ( zipFiles ) ;
1283+ await putResearchObject ( `${ basePath } .zip` , zipBuffer , 'application/zip' ) ;
12521284
12531285 await completeJob ( job , {
12541286 status : 'exported' ,
@@ -1315,4 +1347,5 @@ export async function runResearchLakeExtractionCycle(): Promise<ResearchLakeCycl
13151347export const __researchLakeTestInternals = {
13161348 containsIdentifierRisk,
13171349 imageFeatureGrid,
1350+ createZipArchiveBuffer,
13181351} ;
0 commit comments