Skip to content

Commit

Permalink
Merge branch 'main' of github.com:Leo-XM-Zeng/pg_duckdb into pg_duckd…
Browse files Browse the repository at this point in the history
…b_15
  • Loading branch information
Leo-XM-Zeng committed Sep 30, 2024
2 parents 0130159 + 7433c7a commit a216a4b
Show file tree
Hide file tree
Showing 40 changed files with 1,472 additions and 225 deletions.
22 changes: 11 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: duckdb install-duckdb clean-duckdb lintcheck check-regression-duckdb clean-regression .depend
.PHONY: duckdb install-duckdb clean-duckdb clean-all lintcheck check-regression-duckdb clean-regression

MODULE_big = pg_duckdb
EXTENSION = pg_duckdb
Expand All @@ -22,7 +22,13 @@ SRCS = src/scan/heap_reader.cpp \
src/pgduckdb_planner.cpp \
src/pgduckdb_ruleutils.cpp \
src/pgduckdb_types.cpp \
src/pgduckdb.cpp
src/pgduckdb.cpp \
src/catalog/pgduckdb_storage.cpp \
src/catalog/pgduckdb_schema.cpp \
src/catalog/pgduckdb_table.cpp \
src/catalog/pgduckdb_transaction.cpp \
src/catalog/pgduckdb_transaction_manager.cpp \
src/catalog/pgduckdb_catalog.cpp

OBJS = $(subst .cpp,.o, $(SRCS))

Expand All @@ -47,7 +53,7 @@ endif
override PG_CPPFLAGS += -Iinclude -Ithird_party/duckdb/src/include -Ithird_party/duckdb/third_party/re2
override PG_CXXFLAGS += -std=c++17 -Wno-sign-compare ${DUCKDB_BUILD_CXX_FLAGS}

SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -L$(PG_LIB) -lduckdb -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -lstdc++ -llz4
SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4

COMPILE.cc.bc = $(CXX) -Wno-ignored-attributes -Wno-register $(BITCODE_CXXFLAGS) $(CXXFLAGS) $(PG_CPPFLAGS) $(PG_CXXFLAGS) -I$(INCLUDEDIR_SERVER) -emit-llvm -c

Expand All @@ -63,7 +69,7 @@ ifeq ($(UNAME_S),Linux)
DUCKDB_LIB = libduckdb.so
endif

all: duckdb $(OBJS) .depend
all: duckdb $(OBJS)

include Makefile.global

Expand Down Expand Up @@ -108,18 +114,12 @@ clean-duckdb:

install: install-duckdb

clean: clean-regression clean-duckdb
clean-all: clean clean-regression clean-duckdb

lintcheck:
clang-tidy $(SRCS) -- -I$(INCLUDEDIR) -I$(INCLUDEDIR_SERVER) -Iinclude $(CPPFLAGS) -std=c++17
ruff check

.depend:
$(RM) -f .depend
$(foreach SRC,$(SRCS),$(CXX) $(CPPFLAGS) -I$(INCLUDEDIR) -I$(INCLUDEDIR_SERVER) -MM -MT $(SRC:.cpp=.o) $(SRC) >> .depend;)

format:
git clang-format origin/main
ruff format

include .depend
47 changes: 47 additions & 0 deletions Makefile.global
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,50 @@ INCLUDEDIR_SERVER := ${shell $(PG_CONFIG) --includedir-server}

USE_PGXS = 1
include $(PGXS)

# All the below stuff is vendored in from Postgres its Makefile.global. It's
# normally only enabled when --enabled-depend is provided to Postgres its
# ./configure script. This enables it even if that is not the case, so that
# running "make clean" is pretty much never necessary anymore to re-trigger
# builds of C/C++ files when their headers are modified.
ifneq ($(autodepend), yes)

ifndef COMPILE.c
COMPILE.c = $(CC) $(CFLAGS) $(CPPFLAGS) -c
endif

ifndef COMPILE.cc
COMPILE.cc = $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c
endif

DEPDIR = .deps

ifeq ($(GCC), yes)

# GCC allows us to create object and dependency file in one invocation.
%.o : %.c
@if test ! -d $(DEPDIR); then mkdir -p $(DEPDIR); fi
$(COMPILE.c) -o $@ $< -MMD -MP -MF $(DEPDIR)/$(*F).Po

%.o : %.cpp
@if test ! -d $(DEPDIR); then mkdir -p $(DEPDIR); fi
$(COMPILE.cc) -o $@ $< -MMD -MP -MF $(DEPDIR)/$(*F).Po

endif # GCC

# Include all the dependency files generated for the current
# directory. Note that make would complain if include was called with
# no arguments.
Po_files := $(wildcard $(DEPDIR)/*.Po)
ifneq (,$(Po_files))
include $(Po_files)
endif

# hook for clean-up
clean distclean: clean-deps

.PHONY: clean-deps
clean-deps:
@rm -rf $(DEPDIR)

endif # autodepend
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ pg_duckdb is a Postgres extension that embeds DuckDB's columnar-vectorized analy

pg_duckdb was developed in collaboration with our partners, [Hydra](https://hydra.so) and [MotherDuck](https://motherduck.com).

## Goals

* Broad support for Postgres types and functions within analytical context
* Query external datasets on S3, GCP and Azure (Parquet, Iceberg and Delta Lake)
* Join native Postgres and analytical data
* Direct access to data stored in MotherDuck
* Support installation of DuckDB extensions

## Installation

Pre-built binaries and additional installation options are coming soon.
Expand Down Expand Up @@ -38,7 +46,7 @@ CREATE EXTENSION pg_duckdb;
- `SELECT n FROM read_parquet('s3://bucket/file.parquet') AS (n int)`
- `SELECT n FROM read_csv('s3://bucket/file.csv') AS (n int)`
- You can pass globs and arrays to these functions, just like in DuckDB
- Enable the DuckDB Iceberg extension using `SELECT duckdb.enable_extension('iceberg')` and read Iceberg files with `iceberg_scan`.
- Enable the DuckDB Iceberg extension using `SELECT duckdb.install_extension('iceberg')` and read Iceberg files with `iceberg_scan`.
- Write a query — or an entire table — to parquet in object storage.
- `COPY (SELECT foo, bar FROM baz) TO 's3://...'`
- `COPY table TO 's3://...'`
Expand Down Expand Up @@ -96,7 +104,7 @@ Please see the [project roadmap][roadmap] for upcoming planned tasks and feature

### Connect with MotherDuck

pg_duckdb integration with MotherDuck will enable hybrid execution with Differential Storage.
pg_duckdb integration with MotherDuck will enable dual execution with Differential Storage.

* Zero-copy snapshots and forks
* Time travel
Expand Down
55 changes: 55 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_catalog.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma once

#include "duckdb/storage/storage_extension.hpp"
#include "duckdb/catalog/catalog.hpp"
#include "pgduckdb/catalog/pgduckdb_schema.hpp"

extern "C" {
#include "postgres.h"
#include "miscadmin.h"
#include "utils/snapshot.h"
}

namespace duckdb {

class PostgresCatalog : public Catalog {
public:
PostgresCatalog(AttachedDatabase &db, const string &connection_string, AccessMode access_mode);

public:
static unique_ptr<Catalog> Attach(StorageExtensionInfo *storage_info, ClientContext &context, AttachedDatabase &db,
const string &name, AttachInfo &info, AccessMode access_mode);

public:
string path;
AccessMode access_mode;

public:
// -- Catalog API --
void Initialize(bool load_builtin) override;
string GetCatalogType() override;
optional_ptr<CatalogEntry> CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override;
optional_ptr<SchemaCatalogEntry> GetSchema(CatalogTransaction transaction, const string &schema_name,
OnEntryNotFound if_not_found,
QueryErrorContext error_context = QueryErrorContext()) override;
void ScanSchemas(ClientContext &context, std::function<void(SchemaCatalogEntry &)> callback) override;
unique_ptr<PhysicalOperator> PlanCreateTableAs(ClientContext &context, LogicalCreateTable &op,
unique_ptr<PhysicalOperator> plan) override;
unique_ptr<PhysicalOperator> PlanInsert(ClientContext &context, LogicalInsert &op,
unique_ptr<PhysicalOperator> plan) override;
unique_ptr<PhysicalOperator> PlanDelete(ClientContext &context, LogicalDelete &op,
unique_ptr<PhysicalOperator> plan) override;
unique_ptr<PhysicalOperator> PlanUpdate(ClientContext &context, LogicalUpdate &op,
unique_ptr<PhysicalOperator> plan) override;
unique_ptr<LogicalOperator> BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table,
unique_ptr<LogicalOperator> plan) override;
DatabaseSize GetDatabaseSize(ClientContext &context) override;
bool InMemory() override;
string GetDBPath() override;
void DropSchema(ClientContext &context, DropInfo &info) override;

private:
case_insensitive_map_t<unique_ptr<PostgresSchema>> schemas;
};

} // namespace duckdb
46 changes: 46 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_schema.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#pragma once

#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
#include "pgduckdb/catalog/pgduckdb_table.hpp"

extern "C" {
#include "postgres.h"
#include "miscadmin.h"
#include "utils/snapshot.h"
#include "nodes/pathnodes.h"
}

namespace duckdb {

class PostgresSchema : public SchemaCatalogEntry {
public:
PostgresSchema(Catalog &catalog, CreateSchemaInfo &info, Snapshot snapshot);

public:
// -- Schema API --
void Scan(ClientContext &context, CatalogType type, const std::function<void(CatalogEntry &)> &callback) override;
void Scan(CatalogType type, const std::function<void(CatalogEntry &)> &callback) override;
optional_ptr<CatalogEntry> CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info,
TableCatalogEntry &table) override;
optional_ptr<CatalogEntry> CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) override;
optional_ptr<CatalogEntry> CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) override;
optional_ptr<CatalogEntry> CreateView(CatalogTransaction transaction, CreateViewInfo &info) override;
optional_ptr<CatalogEntry> CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info) override;
optional_ptr<CatalogEntry> CreateTableFunction(CatalogTransaction transaction,
CreateTableFunctionInfo &info) override;
optional_ptr<CatalogEntry> CreateCopyFunction(CatalogTransaction transaction,
CreateCopyFunctionInfo &info) override;
optional_ptr<CatalogEntry> CreatePragmaFunction(CatalogTransaction transaction,
CreatePragmaFunctionInfo &info) override;
optional_ptr<CatalogEntry> CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) override;
optional_ptr<CatalogEntry> CreateType(CatalogTransaction transaction, CreateTypeInfo &info) override;
optional_ptr<CatalogEntry> GetEntry(CatalogTransaction transaction, CatalogType type, const string &name) override;
void DropEntry(ClientContext &context, DropInfo &info) override;
void Alter(CatalogTransaction transaction, AlterInfo &info) override;

public:
Snapshot snapshot;
Catalog &catalog;
};

} // namespace duckdb
27 changes: 27 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_storage.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once

#include "duckdb/storage/storage_extension.hpp"
extern "C" {
#include "postgres.h"
#include "miscadmin.h"
#include "utils/snapshot.h"
#include "nodes/pathnodes.h"
}

namespace duckdb {

class PostgresStorageExtensionInfo : public StorageExtensionInfo {
public:
PostgresStorageExtensionInfo(Snapshot snapshot) : snapshot(snapshot) {
}

public:
Snapshot snapshot;
};

class PostgresStorageExtension : public StorageExtension {
public:
PostgresStorageExtension(Snapshot snapshot);
};

} // namespace duckdb
71 changes: 71 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_table.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#pragma once

#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
#include "duckdb/storage/table_storage_info.hpp"

extern "C" {
#include "postgres.h"
#include "utils/snapshot.h"
#include "postgres.h"
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
#include "utils/builtins.h"
#include "utils/regproc.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "access/htup_details.h"
}

namespace duckdb {

class PostgresTable : public TableCatalogEntry {
public:
virtual ~PostgresTable() {
}

public:
static bool PopulateColumns(CreateTableInfo &info, Oid relid, Snapshot snapshot);

protected:
PostgresTable(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info, Cardinality cardinality,
Snapshot snapshot);

protected:
Cardinality cardinality;
Snapshot snapshot;
};

class PostgresHeapTable : public PostgresTable {
public:
PostgresHeapTable(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info, Cardinality cardinality,
Snapshot snapshot, Oid oid);

public:
// -- Table API --
unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id) override;
TableFunction GetScanFunction(ClientContext &context, unique_ptr<FunctionData> &bind_data) override;
TableStorageInfo GetStorageInfo(ClientContext &context) override;

private:
Oid oid;
};

class PostgresIndexTable : public PostgresTable {
public:
PostgresIndexTable(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info, Cardinality cardinality,
Snapshot snapshot, Path *path, PlannerInfo *planner_info);

public:
// -- Table API --
unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id) override;
TableFunction GetScanFunction(ClientContext &context, unique_ptr<FunctionData> &bind_data) override;
TableStorageInfo GetStorageInfo(ClientContext &context) override;

private:
Path *path;
PlannerInfo *planner_info;
};

} // namespace duckdb
43 changes: 43 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_transaction.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include "duckdb/transaction/transaction.hpp"
#include "pgduckdb/catalog/pgduckdb_table.hpp"
#include "pgduckdb/catalog/pgduckdb_schema.hpp"

namespace duckdb {

class PostgresCatalog;

class SchemaItems {
public:
SchemaItems(unique_ptr<PostgresSchema> &&schema, const string &name) : name(name), schema(std::move(schema)) {
}

public:
optional_ptr<CatalogEntry> GetTable(const string &name, PlannerInfo *planner_info);

public:
string name;
unique_ptr<PostgresSchema> schema;
case_insensitive_map_t<unique_ptr<PostgresTable>> tables;
};

class PostgresTransaction : public Transaction {
public:
PostgresTransaction(TransactionManager &manager, ClientContext &context, PostgresCatalog &catalog,
Snapshot snapshot);
~PostgresTransaction() override;

public:
optional_ptr<CatalogEntry> GetCatalogEntry(CatalogType type, const string &schema, const string &name);

private:
optional_ptr<CatalogEntry> GetSchema(const string &name);

private:
case_insensitive_map_t<SchemaItems> schemas;
PostgresCatalog &catalog;
Snapshot snapshot;
};

} // namespace duckdb
Loading

0 comments on commit a216a4b

Please sign in to comment.