From e78a86124ec8e4d7afbdacd0338e459fa4822e5b Mon Sep 17 00:00:00 2001 From: Andy Atkinson Date: Tue, 11 Jun 2024 10:05:56 -0500 Subject: [PATCH] Talk: SaaS on Rails on PostgreSQL This PR has code samples and examples from the Posette Conf talk "Saas on Rails on PostgreSQL" https://www.citusdata.com/posette/speakers/andrew-atkinson/ Some of the tech examples: - Row Level Security - Composite primary keys - Active Record Horizontal Sharding - Citus, row-based and schema-based sharding --- .gitignore | 2 + docs/posette/README.md | 45 ++++++++ ...ive_record_horizontal_sharding_example.yml | 11 ++ docs/posette/citus_examples.sql | 105 ++++++++++++++++++ docs/posette/configure_citus_docker.sh | 25 +++++ docs/posette/configure_citus_docker_two.sh | 27 +++++ docs/posette/cpk_example.sql | 35 ++++++ docs/posette/rls_config.sql | 58 ++++++++++ docs/posette/schema_examples.sql | 9 ++ 9 files changed, 317 insertions(+) create mode 100644 docs/posette/README.md create mode 100644 docs/posette/active_record_horizontal_sharding_example.yml create mode 100644 docs/posette/citus_examples.sql create mode 100755 docs/posette/configure_citus_docker.sh create mode 100755 docs/posette/configure_citus_docker_two.sh create mode 100644 docs/posette/cpk_example.sql create mode 100644 docs/posette/rls_config.sql create mode 100644 docs/posette/schema_examples.sql diff --git a/.gitignore b/.gitignore index 65872ad..6bb67be 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,5 @@ docker/.pgpass output.log .pgpass + +docs/posette/docker-pg-data-dir/ diff --git a/docs/posette/README.md b/docs/posette/README.md new file mode 100644 index 0000000..58783b1 --- /dev/null +++ b/docs/posette/README.md @@ -0,0 +1,45 @@ +# README + +## Citus + +## Sharding types +As of 12.1, we have two types: + +- Row-based sharding +- Schema-based sharding + +## General Terminology and Concepts +- Coordinator node (the primary node) +- Shard nodes are Postgres instances that we don't want to directly modify + +## Workflow +- Reference tables or distributed tables +- For row-based, call `create_distributed_table()` to create a distributed table + +## Ruby gems +- [activerecord-multi-tenant](https://github.com/citusdata/activerecord-multi-tenant) adds helpers + + +## Active Record Horizontal Sharding +- Shard per tenant +- ShardRecord base class + +```rb +class ShardRecord < ApplicationRecord + self.abstract_class = true + + connects_to shards: { + company_one: { writing: :primary_company_one, reading: :primary_company_one_replica } + } +end +``` + +## Citus Schema Sharding + +```sql +SELECT * FROM citus_schemas; +``` + +## citus_stat_tenants + +- [citus_stat_tenants](https://www.citusdata.com/blog/2023/05/12/tenant-monitoring-in-citus-and-postgres-with-citus-stat-tenants/) diff --git a/docs/posette/active_record_horizontal_sharding_example.yml b/docs/posette/active_record_horizontal_sharding_example.yml new file mode 100644 index 0000000..9dd755f --- /dev/null +++ b/docs/posette/active_record_horizontal_sharding_example.yml @@ -0,0 +1,11 @@ +production: + primary: + database: my_primary_database + primary_replica: + database: my_primary_database + replica: true + primary_company_one: + database: company_one_db + primary_company_one_replica: + database: company_one_db + replica: true diff --git a/docs/posette/citus_examples.sql b/docs/posette/citus_examples.sql new file mode 100644 index 0000000..83486bb --- /dev/null +++ b/docs/posette/citus_examples.sql @@ -0,0 +1,105 @@ +-- Generic example +-- Multi-tenant applications +--- +-- https://docs.citusdata.com/en/v12.1/get_started/tutorial_multi_tenant.html + +-- Add tables: companies, campaigns, ads +-- "Distribution column" is the company ID +-- This is either a PK or a FK +-- companies.id +-- campaigns.company_id +-- ads.company_id + +-- Then distribute the tables: +-- SELECT create_distributed_table('companies', 'id'); +-- SELECT create_distributed_table('campaigns', 'company_id'); +-- SELECT create_distributed_table('ads', 'company_id'); + +-- Tables are "colocated" + +-- Bulk load data using COPY command +-- \copy companies from 'companies.csv' with csv +-- \copy campaigns from 'campaigns.csv' with csv +-- \copy ads from 'ads.csv' with csv + +--docker exec -it citus psql -U owner -d rideshare_development + +-- Rideshare examples + +CREATE TABLE companies ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + name text NOT NULL, + image_url text, + created_at timestamp without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +-- `rideshare.users` could be a reference table +-- `rideshare.locations` could be a reference table +-- Needed to drop FKs referring to the table I wanted to make a reference table +-- alter table trip_requests drop constraint fk_rails_3fdebbfaca; +-- alter table trip_requests drop constraint fk_rails_fa2679b626; +-- alter table trip_requests drop constraint fk_rails_c17a139554; +-- alter table trips drop constraint fk_rails_e7560abc33; +SELECT create_reference_table('users'); +SELECT create_reference_table('locations'); + +INSERT INTO companies (name, image_url) +VALUES ('MSP Rides', 'https://media.istockphoto.com/id/151520574/photo/white-formal-gloved-uniformed-hand-opening-car-door.jpg?s=1024x1024&w=is&k=20&c=Sg--aZrhlv4LOt6sOHEFhg548_Y2wYESnSAl-8RQGGQ='); + +ALTER TABLE trips ADD COLUMN IF NOT EXISTS company_id BIGINT; +ALTER TABLE trip_requests ADD COLUMN IF NOT EXISTS company_id BIGINT; + +ALTER TABLE trip_requests ADD CONSTRAINT fk_trip_requests_company_id + FOREIGN KEY (company_id) REFERENCES companies(id); + +ALTER TABLE trips ADD CONSTRAINT fk_trips_company_id + FOREIGN KEY (company_id) REFERENCES companies(id); + +-- Let's associate trips with a company +UPDATE trips +SET company_id = (SELECT id FROM companies LIMIT 1); + +UPDATE trip_requests +SET company_id = (SELECT id FROM companies LIMIT 1); + +\q + +-- Run as superuser +docker exec -it citus psql -U postgres + +-- Include rideshare search path +-- Should be SET ROLE postgres +-- Connected to postgres DB +SET search_path = "$user", public, rideshare; + +-- Row distribution +-- ERROR: connection to the remote node owner@localhost:5432 failed with the following error: FATAL: too many connections for role "owner" +-- Was 10 +-- Increase connections for owner +SELECT create_distributed_table('companies', 'id'); + +-- alter table trip_requests drop constraint trip_requests_pkey CASCADE; +SELECT create_distributed_table('trip_requests', 'company_id'); + +-- drop constraint from trip_positions, for demo: fk_rails_9688ac8706 +-- Although trip_positions would be great to be distributed +-- Drop single column PK +SELECT create_distributed_table('trips', 'company_id'); + +-- Log all statements on citus Postgres: +-- ALTER DATABASE rideshare_development SET log_statement = 'all'; + + +-- "Reference" tables concept +-- These are distributed to all workers + + +-- Schema-based sharding +-- https://docs.citusdata.com/en/stable/develop/reference_ddl.html +-- Schema distribution +SELECT citus_schema_distribute('user_service'); + +-- DDL propagation +-- https://docs.citusdata.com/en/stable/develop/api_guc.html +-- citus.enable_ddl_propagation (boolean) diff --git a/docs/posette/configure_citus_docker.sh b/docs/posette/configure_citus_docker.sh new file mode 100755 index 0000000..c90140c --- /dev/null +++ b/docs/posette/configure_citus_docker.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Single node citus: +# https://docs.citusdata.com/en/v12.1/installation/single_node_docker.html +# +# export DOCKER_CLI_HINTS=false +# +docker run \ + --name citus \ + --publish 15001:5432 \ + --volume ${PWD}/docker-pg-data-dir:/var/lib/postgresql/data \ + --env POSTGRES_USER=postgres \ + --env POSTGRES_PASSWORD=postgres \ + --detach \ + citusdata/citus:12.1.3 + +# Wait a moment +sleep 2 + +# verify it's running, and that Citus is installed: +PGPASSWORD=postgres psql -U postgres \ + -h localhost \ + -p 15001 \ + -d postgres \ + -c "SELECT * FROM citus_version();" diff --git a/docs/posette/configure_citus_docker_two.sh b/docs/posette/configure_citus_docker_two.sh new file mode 100755 index 0000000..0da0d0b --- /dev/null +++ b/docs/posette/configure_citus_docker_two.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# Single node citus: +# https://docs.citusdata.com/en/v12.1/installation/single_node_docker.html +# +# export DOCKER_CLI_HINTS=false +# + +# Configure Rideshare: +cd rideshare +export DOCKER_CLI_HINTS=false +export DB_URL="postgres://postgres:postgres@localhost:15001/postgres" +export RIDESHARE_DB_PASSWORD="HSnDDgFtyW9fyFI" + +# sh db/setup.sh 2>&1 | tee -a citus.log +# psql:db/create_database.sql:3: NOTICE: Citus partially supports CREATE DATABASE for distributed databases +# DETAIL: Citus does not propagate CREATE DATABASE command to workers +# HINT: You can manually create a database and its extensions on workers. +# CREATE DATABASE + +docker exec -it citus psql -U postgres + +\l +\c rideshare_development +\dx + +SELECT * FROM citus_version(); diff --git a/docs/posette/cpk_example.sql b/docs/posette/cpk_example.sql new file mode 100644 index 0000000..0157fe0 --- /dev/null +++ b/docs/posette/cpk_example.sql @@ -0,0 +1,35 @@ +CREATE TABLE accounts ( + id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + name text UNIQUE NOT NULL +); + +-- single column primary key +CREATE TABLE orders ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + account_id INT NOT NULL, + CONSTRAINT fk_account_id + FOREIGN KEY (account_id) + REFERENCES accounts(id) +); + +-- Alternative for FK +CREATE TABLE orders ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + account_id INT NOT NULL REFERENCES accounts(id) +); + +-- Drop single column PK +ALTER TABLE orders DROP CONSTRAINT orders_pkey; + +-- Add CPK +ALTER TABLE orders +ADD CONSTRAINT orders_pkey_cpk +PRIMARY KEY (id, account_id); + +-- Create with CPK +CREATE TABLE orders ( + id BIGINT GENERATED ALWAYS AS IDENTITY NOT NULL, + account_id INT NOT NULL, + CONSTRAINT orders_pkey_cpk + PRIMARY KEY (id, account_id) +); diff --git a/docs/posette/rls_config.sql b/docs/posette/rls_config.sql new file mode 100644 index 0000000..34c897e --- /dev/null +++ b/docs/posette/rls_config.sql @@ -0,0 +1,58 @@ +-- NOTE: superusers override RLS +-- Create less priviledged users to test with +CREATE USER bob; +CREATE USER jane; +CREATE SCHEMA my_schema; +GRANT USAGE ON SCHEMA my_schema TO bob; +GRANT USAGE ON SCHEMA my_schema TO jane; +ALTER DEFAULT PRIVILEGES IN SCHEMA my_schema +GRANT INSERT, SELECT, UPDATE, DELETE ON TABLES TO bob; +ALTER DEFAULT PRIVILEGES IN SCHEMA my_schema +GRANT INSERT, SELECT, UPDATE, DELETE ON TABLES TO jane; + +SET search_path = 'my_schema'; + +CREATE TABLE users ( + user_id serial PRIMARY KEY, + username text UNIQUE NOT NULL +); +INSERT INTO users (username) +VALUES ('bob'), ('jane'); + +CREATE TABLE user_data (data TEXT, user_id INTEGER); + +CREATE OR REPLACE FUNCTION current_user_id() RETURNS int AS $$ +DECLARE + found_user_id int; +BEGIN + SELECT user_id INTO found_user_id FROM users WHERE username = CURRENT_USER; + RETURN found_user_id; +EXCEPTION WHEN NO_DATA_FOUND THEN + RETURN NULL; -- or raise an exception, depending on your requirements +END; +$$ LANGUAGE plpgsql STABLE; + +SET ROLE bob; +INSERT INTO user_data (data, user_id) +VALUES ('bob data', current_user_id()); + +SET ROLE jane; +INSERT INTO user_data (data, user_id) +VALUES ('jane data', current_user_id()); + +-- SET ROLE andy; +-- Must be owner of table +-- Enable for user_data +ALTER TABLE user_data ENABLE ROW LEVEL SECURITY; + +-- Policy for user_data +CREATE POLICY select_policy ON user_data +FOR SELECT + USING (user_id = current_user_id()); + +-- Make sure it's set to ON +SET row_security TO ON; + +SELECT polname, polcmd, pg_get_expr(polqual, polrelid), pg_get_expr(polwithcheck, polrelid) +FROM pg_policy +WHERE polrelid = 'user_data'::regclass; diff --git a/docs/posette/schema_examples.sql b/docs/posette/schema_examples.sql new file mode 100644 index 0000000..b928d1d --- /dev/null +++ b/docs/posette/schema_examples.sql @@ -0,0 +1,9 @@ +-- Bookstore app companies +CREATE SCHEMA amazon; +CREATE SCHEMA bn; +CREATE SCHEMA bookshop; + + +-- Rideshare company schemas +CREATE SCHEMA mn_msp_rides; +CREATE SCHEMA nyc_rides;