Skip to content

Commit 6b291e3

Browse files
authored
Release v0.16.0
Release v0.16.0
2 parents e8a4966 + 2d54d68 commit 6b291e3

11 files changed

Lines changed: 66805 additions & 66520 deletions

File tree

db/seed.sql

Lines changed: 66556 additions & 66497 deletions
Large diffs are not rendered by default.

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
"research:verify-botanical": "tsx scripts/research/verify-botanical.ts",
3131
"enrich:wikipedia": "tsx scripts/enrich/wikipedia-names.ts",
3232
"enrich:translate-names": "tsx scripts/enrich/translate-names.ts",
33+
"enrich:populate-canonical-names": "tsx scripts/enrich/populate-canonical-names.ts",
34+
"enrich:populate-botanical-synonyms": "tsx scripts/enrich/populate-botanical-synonyms.ts",
3335
"orchestrate:status": "NODE_PATH=/home/agent/.local/share/pnpm/global/5/node_modules tsx autodev/scripts/index.ts status",
3436
"orchestrate:run": "NODE_PATH=/home/agent/.local/share/pnpm/global/5/node_modules tsx autodev/scripts/index.ts run",
3537
"orchestrate:retry": "NODE_PATH=/home/agent/.local/share/pnpm/global/5/node_modules tsx autodev/scripts/index.ts retry",
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
-- Add canonicalName to Crop (author-stripped name for external lookups)
2+
ALTER TABLE "Crop" ADD COLUMN "canonicalName" TEXT;
3+
CREATE INDEX "Crop_canonicalName_idx" ON "Crop"("canonicalName");
4+
5+
-- Drop unused synonyms array (was wired up for search but never populated)
6+
ALTER TABLE "Crop" DROP COLUMN "synonyms";
7+
8+
-- Add version tracking to CropEnrichmentAttempt
9+
ALTER TABLE "CropEnrichmentAttempt" ADD COLUMN "version" INTEGER NOT NULL DEFAULT 1;
10+
11+
-- Create BotanicalSynonym table
12+
CREATE TABLE "BotanicalSynonym" (
13+
"id" TEXT NOT NULL,
14+
"cropId" TEXT NOT NULL,
15+
"name" TEXT NOT NULL,
16+
"source" TEXT NOT NULL,
17+
CONSTRAINT "BotanicalSynonym_pkey" PRIMARY KEY ("id")
18+
);
19+
20+
CREATE UNIQUE INDEX "BotanicalSynonym_cropId_name_key" ON "BotanicalSynonym"("cropId", "name");
21+
CREATE INDEX "BotanicalSynonym_name_idx" ON "BotanicalSynonym"("name");
22+
23+
ALTER TABLE "BotanicalSynonym"
24+
ADD CONSTRAINT "BotanicalSynonym_cropId_fkey"
25+
FOREIGN KEY ("cropId") REFERENCES "Crop"("id") ON DELETE CASCADE ON UPDATE CASCADE;

prisma/schema.prisma

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ datasource db {
1010
model Crop {
1111
id String @id @default(cuid())
1212
botanicalName String @unique
13+
canonicalName String?
1314
name String
1415
slug String @unique
1516
isNitrogenFixer Boolean @default(false)
1617
isCommonCrop Boolean @default(false)
1718
commonNames String[] @default([])
18-
synonyms String[] @default([])
1919
minTempC Float?
2020
imageUrl String?
2121
sources CropSource[]
@@ -24,12 +24,15 @@ model Crop {
2424
relationshipsB CropRelationship[] @relation("CropB")
2525
translations CropTranslation[]
2626
enrichmentAttempts CropEnrichmentAttempt[]
27+
botanicalSynonyms BotanicalSynonym[]
2728
researchRequestsA ResearchRequest[] @relation("ResearchRequestCropA")
2829
researchRequestsB ResearchRequest[] @relation("ResearchRequestCropB")
2930
researchAttemptsA RelationshipResearchAttempt[] @relation("ResearchAttemptCropA")
3031
researchAttemptsB RelationshipResearchAttempt[] @relation("ResearchAttemptCropB")
3132
createdAt DateTime @default(now())
3233
updatedAt DateTime @updatedAt
34+
35+
@@index([canonicalName])
3336
}
3437

3538
model CropTranslation {
@@ -48,13 +51,25 @@ model CropEnrichmentAttempt {
4851
cropId String
4952
locale String
5053
source String
54+
version Int @default(1)
5155
attemptedAt DateTime @default(now())
5256
crop Crop @relation(fields: [cropId], references: [id], onDelete: Cascade)
5357
5458
@@unique([cropId, locale, source])
5559
@@index([locale, source])
5660
}
5761

62+
model BotanicalSynonym {
63+
id String @id @default(cuid())
64+
cropId String
65+
name String
66+
source String
67+
crop Crop @relation(fields: [cropId], references: [id], onDelete: Cascade)
68+
69+
@@unique([cropId, name])
70+
@@index([name])
71+
}
72+
5873
model CropSource {
5974
id String @id @default(cuid())
6075
source SourceType
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* Populate BotanicalSynonym from USDA CropSource data.
3+
*
4+
* USDA rawData has "Synonym Symbol" and "Accepted Symbol" fields.
5+
* When a crop's Symbol != Accepted Symbol, the crop's Scientific Name is
6+
* a synonym and the accepted crop is the canonical one. This script creates
7+
* a BotanicalSynonym row on the accepted crop pointing at the synonym name.
8+
*
9+
* Also mines the "Scientific Name" field to catch cases where the stored
10+
* botanicalName differs from the USDA name (import normalisation artifacts).
11+
*
12+
* Usage:
13+
* pnpm enrich:populate-botanical-synonyms
14+
* pnpm enrich:populate-botanical-synonyms --dry-run
15+
*/
16+
17+
import { PrismaClient } from '@prisma/client'
18+
19+
const prisma = new PrismaClient()
20+
21+
interface UsdaRawData {
22+
Symbol?: string
23+
'Synonym Symbol'?: string
24+
'Accepted Symbol'?: string
25+
'Scientific Name'?: string
26+
}
27+
28+
async function main() {
29+
const dryRun = process.argv.includes('--dry-run')
30+
31+
const sources = await prisma.cropSource.findMany({
32+
where: { source: 'USDA' },
33+
select: { cropId: true, rawData: true },
34+
})
35+
36+
// Build symbol → cropId map from all USDA sources
37+
const symbolToCropId = new Map<string, string>()
38+
for (const s of sources) {
39+
const d = s.rawData as UsdaRawData
40+
if (d.Symbol) symbolToCropId.set(d.Symbol, s.cropId)
41+
}
42+
43+
let created = 0
44+
let skipped = 0
45+
46+
for (const s of sources) {
47+
const d = s.rawData as UsdaRawData
48+
const symbol = d.Symbol ?? ''
49+
const acceptedSymbol = d['Accepted Symbol'] ?? ''
50+
const scientificName = d['Scientific Name']
51+
52+
if (!scientificName) continue
53+
54+
// This entry IS a synonym: point at the accepted crop
55+
if (acceptedSymbol && acceptedSymbol !== symbol) {
56+
const acceptedCropId = symbolToCropId.get(acceptedSymbol)
57+
if (!acceptedCropId) continue
58+
59+
if (dryRun) {
60+
console.log(` synonym: "${scientificName}" → accepted symbol ${acceptedSymbol} (cropId ${acceptedCropId})`)
61+
} else {
62+
await prisma.botanicalSynonym.upsert({
63+
where: { cropId_name: { cropId: acceptedCropId, name: scientificName } },
64+
create: { cropId: acceptedCropId, name: scientificName, source: 'usda' },
65+
update: {},
66+
})
67+
}
68+
created++
69+
continue
70+
}
71+
72+
// Same symbol accepted — check if USDA Scientific Name differs from stored botanicalName
73+
const crop = await prisma.crop.findUnique({
74+
where: { id: s.cropId },
75+
select: { botanicalName: true },
76+
})
77+
if (!crop) continue
78+
if (scientificName === crop.botanicalName) { skipped++; continue }
79+
80+
if (dryRun) {
81+
console.log(` alt name: crop ${s.cropId}: "${scientificName}" vs stored "${crop.botanicalName}"`)
82+
} else {
83+
await prisma.botanicalSynonym.upsert({
84+
where: { cropId_name: { cropId: s.cropId, name: scientificName } },
85+
create: { cropId: s.cropId, name: scientificName, source: 'usda' },
86+
update: {},
87+
})
88+
}
89+
created++
90+
}
91+
92+
await prisma.$disconnect()
93+
if (dryRun) {
94+
console.log(`\n[dry-run] Would create ${created} synonym rows (${skipped} no difference).`)
95+
} else {
96+
console.log(`Done. Created/verified ${created} BotanicalSynonym rows, ${skipped} no difference.`)
97+
}
98+
}
99+
100+
main().catch(err => { console.error(err); process.exit(1) })
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/**
2+
* Populate Crop.canonicalName by stripping author citations from botanicalName.
3+
*
4+
* "Allium L." → "Allium"
5+
* "Zea mays L." → "Zea mays"
6+
* "Abies balsamea (L.) Mill." → "Abies balsamea"
7+
* "Glycine max (L.) Merr." → "Glycine max"
8+
* "Allium cepa var. aggregatum G.Don" → "Allium cepa var. aggregatum"
9+
*
10+
* Only writes when the stripped name differs from botanicalName.
11+
* Idempotent — safe to re-run.
12+
*
13+
* Usage:
14+
* pnpm enrich:populate-canonical-names
15+
* pnpm enrich:populate-canonical-names --dry-run
16+
*/
17+
18+
import { PrismaClient } from '@prisma/client'
19+
20+
const prisma = new PrismaClient()
21+
22+
function stripAuthor(botanicalName: string): string | null {
23+
// Remove parenthetical author citations: "(L.)", "(DC.)", "(Royle ex D.Don)", etc.
24+
let s = botanicalName.replace(/\s*\([^)]+\)/g, '')
25+
// Strip trailing author tokens: "L.", "DC.", "Mill.", "G.Don", "ex Carrière", etc.
26+
// An author token starts with a capital letter or "ex " followed by capital.
27+
s = s.replace(/(\s+(?:ex\s+)?[A-Z][a-zA-Z'.-]*\.?)+\s*$/, '').trim()
28+
if (!s || s === botanicalName.trim()) return null
29+
return s
30+
}
31+
32+
async function main() {
33+
const dryRun = process.argv.includes('--dry-run')
34+
35+
const crops = await prisma.crop.findMany({
36+
select: { id: true, botanicalName: true, canonicalName: true },
37+
})
38+
39+
let updated = 0
40+
let skipped = 0
41+
42+
for (const crop of crops) {
43+
const canonical = stripAuthor(crop.botanicalName)
44+
if (!canonical) { skipped++; continue }
45+
if (crop.canonicalName === canonical) { skipped++; continue }
46+
47+
if (dryRun) {
48+
console.log(` ${crop.botanicalName}${canonical}`)
49+
} else {
50+
await prisma.crop.update({
51+
where: { id: crop.id },
52+
data: { canonicalName: canonical },
53+
})
54+
}
55+
updated++
56+
}
57+
58+
await prisma.$disconnect()
59+
if (dryRun) {
60+
console.log(`\n[dry-run] Would update ${updated} crops (${skipped} unchanged).`)
61+
} else {
62+
console.log(`Done. Updated ${updated} crops, ${skipped} already correct or no author suffix.`)
63+
}
64+
}
65+
66+
main().catch(err => { console.error(err); process.exit(1) })

0 commit comments

Comments
 (0)