Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 1b5ca45

Browse files
committed
use langchain-postgres package to access Postgresql PGVector
1 parent b02f647 commit 1b5ca45

5 files changed

Lines changed: 50 additions & 41 deletions

File tree

app/pgvector_chat_flan_xl.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import boto3
1313

14-
from langchain_community.vectorstores import PGVector
14+
from langchain_postgres import PGVector
1515
from langchain_community.embeddings import SagemakerEndpointEmbeddings
1616
from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
1717

@@ -142,8 +142,8 @@ def transform_output(self, output: bytes) -> str:
142142

143143
vectorstore = PGVector(
144144
collection_name=collection_name,
145-
connection_string=CONNECTION_STRING,
146-
embedding_function=_create_sagemaker_embeddings(embeddings_model_endpoint, region)
145+
connection=CONNECTION_STRING,
146+
embeddings=_create_sagemaker_embeddings(embeddings_model_endpoint, region)
147147
)
148148
retriever = vectorstore.as_retriever()
149149

app/pgvector_chat_llama2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import boto3
1313

14-
from langchain_community.vectorstores import PGVector
14+
from langchain_postgres import PGVector
1515
from langchain_community.embeddings import SagemakerEndpointEmbeddings
1616
from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
1717

@@ -177,8 +177,8 @@ def transform_output(self, output: bytes) -> str:
177177

178178
vectorstore = PGVector(
179179
collection_name=collection_name,
180-
connection_string=CONNECTION_STRING,
181-
embedding_function=_create_sagemaker_embeddings(embeddings_model_endpoint, region)
180+
connection=CONNECTION_STRING,
181+
embeddings=_create_sagemaker_embeddings(embeddings_model_endpoint, region)
182182
)
183183
retriever = vectorstore.as_retriever()
184184

data_ingestion_to_vectordb/container/Dockerfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ FROM python:3.10.13-slim
55
# amount of space in the image, which reduces start up time.
66
RUN pip --no-cache-dir install -U pip
77
RUN pip --no-cache-dir install boto3==1.33.9 \
8-
langchain==0.1.0 \
9-
langchain-community==0.0.20 \
10-
psycopg2-binary==2.9.6 \
11-
pgvector==0.2.0 \
8+
langchain==0.2.5 \
9+
langchain-community==0.2.4 \
10+
langchain-postgres==0.0.7 \
11+
SQLAlchemy==2.0.28 \
12+
psycopg[binary]==3.1.19 \
13+
pgvector==0.2.5 \
1214
beautifulsoup4==4.12.3
1315

1416

data_ingestion_to_vectordb/container/load_data_into_pgvector.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import numpy as np
1919

2020
from langchain_community.document_loaders import ReadTheDocsLoader
21-
from langchain_community.vectorstores import PGVector
21+
from langchain_postgres import PGVector
2222
from langchain.text_splitter import RecursiveCharacterTextSplitter
2323

2424
from credentials import get_credentials
@@ -38,7 +38,7 @@ def process_shard(shard, embeddings_model_endpoint_name, aws_region, collection_
3838
vectordb = PGVector.from_existing_index(
3939
embedding=embeddings,
4040
collection_name=collection_name,
41-
connection_string=connection_string)
41+
connection=connection_string)
4242

4343
vectordb.add_documents(documents=shard)
4444

@@ -77,7 +77,7 @@ def process_shard(shard, embeddings_model_endpoint_name, aws_region, collection_
7777
db_host = secret['host']
7878

7979
CONNECTION_STRING = PGVector.connection_string_from_db_params(
80-
driver = 'psycopg2',
80+
driver = 'psycopg',
8181
user = db_username,
8282
password = db_password,
8383
host = db_host,
@@ -123,8 +123,8 @@ def process_shard(shard, embeddings_model_endpoint_name, aws_region, collection_
123123

124124
embeddings = create_sagemaker_embeddings_from_js_model(args.embeddings_model_endpoint_name, args.aws_region)
125125
_ = PGVector(collection_name=args.pgvector_collection_name,
126-
connection_string=CONNECTION_STRING,
127-
embedding_function=embeddings)
126+
connection=CONNECTION_STRING,
127+
embeddings=embeddings)
128128
else:
129129
logger.info(f"{path} file is not present, "
130130
f"will wait for some other node to create the {args.pgvector_collection_name} collection")

data_ingestion_to_vectordb/data_ingestion_to_pgvector.ipynb

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,21 @@
1313
"Here is a list of packages that are used in this notebook.\n",
1414
"\n",
1515
"```\n",
16-
"!pip list | grep -E -w \"sagemaker|ipython-sql|langchain|psycopg2|pgvector|numpy|sh\"\n",
17-
"----------------------------------------------------------------------------------------\n",
18-
"ipython-sql 0.5.0\n",
19-
"langchain 0.1.0\n",
20-
"langchain-community 0.0.20\n",
21-
"langchain-core 0.1.52\n",
22-
"langchain-text-splitters 0.0.1\n",
23-
"numpy 1.24.3\n",
24-
"pgvector 0.2.0\n",
25-
"psycopg2-binary 2.9.6\n",
26-
"sagemaker 2.155.0\n",
27-
"sagemaker-studio-image-build 0.6.0\n",
28-
"sh 2.0.4\n",
16+
"!pip list | grep -E -w \"sagemaker_studio_image_build|ipython-sql|langchain|psycopg|pgvector|numpy|sh\"\n",
17+
"-----------------------------------------------------------------------------------------------------\n",
18+
"ipython-sql 0.5.0\n",
19+
"langchain 0.2.5\n",
20+
"langchain-community 0.2.4\n",
21+
"langchain-core 0.2.43\n",
22+
"langchain-postgres 0.0.7\n",
23+
"langchain-text-splitters 0.2.4\n",
24+
"numpy 1.26.4\n",
25+
"pgvector 0.2.5\n",
26+
"psycopg 3.1.19\n",
27+
"psycopg-binary 3.1.19\n",
28+
"psycopg-pool 3.2.4\n",
29+
"sagemaker_studio_image_build 0.6.0\n",
30+
"sh 2.0.4\n",
2931
"```"
3032
]
3133
},
@@ -49,10 +51,13 @@
4951
"source": [
5052
"%%capture --no-stderr\n",
5153
"\n",
52-
"!pip install -U langchain==0.1.0\n",
54+
"!pip install -U langchain==0.2.5\n",
55+
"!pip install -U langchain-community==0.2.4\n",
56+
"!pip install -U langchain-postgres==0.0.7\n",
57+
"!pip install -U SQLAlchemy==2.0.28\n",
58+
"!pip install -U pgvector==0.2.5\n",
59+
"!pip install -U psycopg[binary]==3.1.19\n",
5360
"!pip install -U ipython-sql==0.5.0\n",
54-
"!pip install -U psycopg2-binary==2.9.6\n",
55-
"!pip install -U pgvector==0.2.0\n",
5661
"!pip install -U sh==2.0.4\n",
5762
"!pip install -U sagemaker-studio-image-build==0.6.0"
5863
]
@@ -66,7 +71,7 @@
6671
},
6772
"outputs": [],
6873
"source": [
69-
"!pip list | grep -E -w \"sagemaker|ipython-sql|langchain|psycopg2|pgvector|numpy|sh\""
74+
"!pip list | grep -E -w \"sagemaker_studio_image_build|ipython-sql|langchain|psycopg|pgvector|numpy|sh\""
7075
]
7176
},
7277
{
@@ -277,7 +282,7 @@
277282
"db_port = secret['port']\n",
278283
"db_host = secret['host']\n",
279284
"\n",
280-
"driver = 'psycopg2'\n",
285+
"driver = 'psycopg'\n",
281286
"\n",
282287
"connection_string = f\"postgresql+{driver}://{db_username}:{db_password}@{db_host}:{db_port}/\"\n",
283288
"connection_string"
@@ -566,7 +571,9 @@
566571
"metadata": {},
567572
"outputs": [],
568573
"source": [
569-
"!pip install -Uq beautifulsoup4==4.12.3"
574+
"%%capture --no-stderr\n",
575+
"\n",
576+
"!pip install -U beautifulsoup4==4.12.3"
570577
]
571578
},
572579
{
@@ -633,7 +640,7 @@
633640
"outputs": [],
634641
"source": [
635642
"import urllib\n",
636-
"from langchain_community.vectorstores import PGVector\n",
643+
"from langchain_postgres import PGVector\n",
637644
"from container.credentials import get_credentials\n",
638645
"\n",
639646
"\n",
@@ -644,7 +651,7 @@
644651
"db_host = secret['host']\n",
645652
"\n",
646653
"CONNECTION_STRING = PGVector.connection_string_from_db_params(\n",
647-
" driver='psycopg2',\n",
654+
" driver='psycopg',\n",
648655
" user=db_username,\n",
649656
" password=db_password,\n",
650657
" host=db_host,\n",
@@ -670,8 +677,8 @@
670677
"\n",
671678
"vectordb = PGVector(\n",
672679
" collection_name=pgvector_collection_name,\n",
673-
" connection_string=CONNECTION_STRING,\n",
674-
" embedding_function=embeddings\n",
680+
" connection=CONNECTION_STRING,\n",
681+
" embeddings=embeddings\n",
675682
")"
676683
]
677684
},
@@ -711,7 +718,7 @@
711718
"source": [
712719
"import urllib\n",
713720
"\n",
714-
"from langchain.vectorstores import PGVector\n",
721+
"from langchain_postgres import PGVector\n",
715722
"\n",
716723
"from container.credentials import get_credentials\n",
717724
"from container.sm_helper import create_sagemaker_embeddings_from_js_model\n",
@@ -725,7 +732,7 @@
725732
"db_host = secret['host']\n",
726733
"\n",
727734
"connection_string = PGVector.connection_string_from_db_params(\n",
728-
" driver='psycopg2',\n",
735+
" driver='psycopg',\n",
729736
" user=db_username,\n",
730737
" password=db_password,\n",
731738
" host=db_host,\n",
@@ -737,7 +744,7 @@
737744
" embedding=create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name,\n",
738745
" aws_region),\n",
739746
" collection_name=pgvector_collection_name,\n",
740-
" connection_string=connection_string)\n",
747+
" connection=connection_string)\n",
741748
"\n",
742749
"q = \"Which XGBoost versions does SageMaker support?\"\n",
743750
"docs = docsearch.similarity_search(q, k=3)\n",

0 commit comments

Comments
 (0)