Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Snowflake/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Snowflake
Load Scripts to Snowflake from Snowflake stage (external or internal).

we use S3 folder connected to snowflake stage (called @IMPORTSTAGE)
see [Snowflake user guide](https://docs.snowflake.com/en/user-guide/data-load-s3-create-stage)

all .tsv files are stored in gzipped format in S3 folder
using plain .tsv files should work but you have to change the filenames

_Michael Ettl - Sonoton Music_
176 changes: 176 additions & 0 deletions Snowflake/create_database.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
-- Michael Ettl - Sonoton Music
--
-- Create BWARM table structure in Snowflake
-- Attention - this will drop all your data,
-- if you want to use different snapshots don't run this if tables already exist
--
-- select your schema
USE DB.SCHEMA;

-- create TSV import format settings
CREATE OR REPLACE FILE FORMAT TSV_FILE_FORMAT TYPE = 'CSV' COMPRESSION = 'AUTO'
FIELD_DELIMITER = '\t' RECORD_DELIMITER = '\n' SKIP_HEADER = 0
FIELD_OPTIONALLY_ENCLOSED_BY = 'NONE' TRIM_SPACE = FALSE
ERROR_ON_COLUMN_COUNT_MISMATCH = TRUE ESCAPE = 'NONE'
ESCAPE_UNENCLOSED_FIELD = '\134' DATE_FORMAT = 'AUTO'
TIMESTAMP_FORMAT = 'AUTO' NULL_IF = ('\\N');

CREATE OR REPLACE TABLE MLC_SNAPSHOTS (
snapshotid INT NOT NULL AUTOINCREMENT,
created_date DATETIME,
PRIMARY KEY (snapshotid)
);

CREATE OR REPLACE TABLE MLC_WORKS (
FeedProvidersWorkId VARCHAR(3000),
ISWC VARCHAR(11),
WorkTitle VARCHAR,
OpusNumber VARCHAR(3000),
ComposerCatalogNumber VARCHAR(3000),
NominalDuration VARCHAR(3000),
HasRightsInDispute BOOLEAN,
TerritoryOfPublicDomain VARCHAR,
IsArrangementOfTraditionalWork BOOLEAN,
AlternativeWorkForUsStatutoryReversion VARCHAR(3000),
UsStatutoryReversionDate VARCHAR(100),
snapshotid INT,
PRIMARY KEY (FeedProvidersWorkId)
);

CREATE OR REPLACE TABLE MLC_ALTERNATIVE_WORK_TITLES (
FeedProvidersWorkAlternativeTitleId VARCHAR(3000),
FeedProvidersWorkId VARCHAR(3000),
AlternativeTitle VARCHAR,
LanguageAndScriptCode VARCHAR(3000),
TitleType VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersWorkAlternativeTitleId)
);

CREATE OR REPLACE TABLE MLC_WORK_IDENTIFIERS (
FeedProvidersWorkProprietaryIdentifierId VARCHAR(3000),
FeedProvidersWorkId VARCHAR(3000),
Identifier VARCHAR(3000),
FeedProvidersAllocatingPartyId VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersWorkProprietaryIdentifierId)
);

CREATE OR REPLACE TABLE MLC_PARTIES (
FeedProvidersPartyId VARCHAR(3000),
ISNI VARCHAR(15),
IpiNameNumber INTEGER,
CisacSocietyId VARCHAR(3),
DPID VARCHAR(100),
FullName VARCHAR,
NamesBeforeKeyName VARCHAR,
KeyName VARCHAR,
NamesAfterKeyName VARCHAR,
snapshotid INT,
PRIMARY KEY (FeedProvidersPartyId)
);

CREATE OR REPLACE TABLE MLC_WORK_RIGHT_SHARES (
FeedProvidersWorkRightShareId VARCHAR(3000),
FeedProvidersWorkId VARCHAR(3000),
FeedProvidersPartyId VARCHAR(3000),
PartyRole VARCHAR(100),
RightSharePercentage FLOAT,
RightShareType VARCHAR(100),
RightsType VARCHAR(100),
ValidityStartDate DATE,
ValidityEndDate DATE,
FeedProvidersParentWorkRightShareId VARCHAR(3000),
TerritoryCode VARCHAR(3000),
UseType VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersWorkRightShareId)
);

CREATE OR REPLACE TABLE MLC_RECORDINGS (
FeedProvidersRecordingId VARCHAR(3000),
ISRC VARCHAR(12),
RecordingTitle VARCHAR,
RecordingSubTitle VARCHAR,
DisplayArtistName VARCHAR,
DisplayArtistISNI VARCHAR(16),
PLine VARCHAR(3000),
Duration VARCHAR(100),
FeedProvidersReleaseId VARCHAR(3000),
StudioProducerName VARCHAR,
StudioProducerId VARCHAR(3000),
OriginalDataProviderName VARCHAR(3000),
OriginalDataProviderDPID VARCHAR(3000),
IsDataProvidedAsReceived BOOLEAN,
snapshotid INT,
PRIMARY KEY (FeedProvidersRecordingId)
);

CREATE OR REPLACE TABLE MLC_ALTERNATIVE_RECORDING_TITLES (
FeedProvidersRecordingAlternativeTitleId VARCHAR(3000),
FeedProvidersRecordingId VARCHAR(3000),
AlternativeTitle VARCHAR,
LanguageAndScriptCode VARCHAR(3000),
TitleType VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersRecordingAlternativeTitleId)
);

CREATE OR REPLACE TABLE MLC_RECORDING_IDENTIFIERS (
FeedProvidersRecordingProprietaryIdentifierId VARCHAR(3000),
FeedProvidersRecordingId VARCHAR(3000),
Identifier VARCHAR(3000),
FeedProvidersAllocatingPartyId VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersRecordingProprietaryIdentifierId)
);

CREATE OR REPLACE TABLE MLC_RELEASES (
FeedProvidersReleaseId VARCHAR(3000),
ICPN VARCHAR(15),
ReleaseTitle VARCHAR,
ReleaseSubTitle VARCHAR,
DisplayArtistName VARCHAR,
DisplayArtistISNI VARCHAR(16),
LabelName VARCHAR(3000),
ReleaseDate DATE,
OriginalDataProviderName VARCHAR(3000),
OriginalDataProviderDPID VARCHAR(3000),
IsDataProvidedAsReceived BOOLEAN,
snapshotid INT,
PRIMARY KEY (FeedProvidersReleaseId)
);

CREATE OR REPLACE TABLE MLC_RELEASE_IDENTIFIERS (
FeedProvidersReleaseProprietaryIdentifierId VARCHAR(3000),
FeedProvidersReleaseId VARCHAR(3000),
Identifier VARCHAR(3000),
FeedProvidersAllocatingPartyId VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersReleaseProprietaryIdentifierId)
);

CREATE OR REPLACE TABLE MLC_WORK_RECORDINGS (
FeedProvidersLinkId VARCHAR(3000),
FeedProvidersWorkId VARCHAR(3000),
FeedProvidersRecordingId VARCHAR(3000),
snapshotid INT,
PRIMARY KEY (FeedProvidersLinkId)
);

CREATE OR REPLACE TABLE MLC_UNCLAIMED_WORKS (
FeedProvidersRightShareId VARCHAR(3000),
FeedProvidersRecordingId VARCHAR(3000),
FeedProvidersWorkId VARCHAR(3000),
ISRC VARCHAR(12),
DspRecordingId VARCHAR(3000),
RecordingTitle VARCHAR,
RecordingSubTitle VARCHAR,
AlternativeRecordingTitle VARCHAR,
DisplayArtistName VARCHAR,
DisplayArtistISNI VARCHAR(16),
Duration VARCHAR(100),
UnclaimedPercentage FLOAT,
PercentileForPrioritisation FLOAT,
snapshotid INT
);
13 changes: 13 additions & 0 deletions Snowflake/refresh_parties.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

COPY INTO MLC_PARTIES
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/parties.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;

14 changes: 14 additions & 0 deletions Snowflake/refresh_recording_alt_titles.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_ALTERNATIVE_RECORDING_TITLES;

COPY INTO MLC_ALTERNATIVE_RECORDING_TITLES
FROM (
SELECT $1, $2, $3, $4, $5, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/recordingalternativetitles.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
12 changes: 12 additions & 0 deletions Snowflake/refresh_recording_ids.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_RECORDING_IDENTIFIERS;

COPY INTO MLC_RECORDING_IDENTIFIERS
FROM (
SELECT $1, $2, $3, $4, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/recordingidentifiers.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT;
14 changes: 14 additions & 0 deletions Snowflake/refresh_recordings.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_RECORDINGS;

COPY INTO MLC_RECORDINGS
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/recordings.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
14 changes: 14 additions & 0 deletions Snowflake/refresh_release_ids.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_RELEASE_IDENTIFIERS;

COPY INTO MLC_RELEASE_IDENTIFIERS
FROM (
SELECT $1, $2, $3, $4, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/releaseidentifiers.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
15 changes: 15 additions & 0 deletions Snowflake/refresh_releases.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_RELEASES;

COPY INTO MLC_RELEASES
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/releases.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;

14 changes: 14 additions & 0 deletions Snowflake/refresh_unclaimed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_UNCLAIMED_WORKS;

COPY INTO MLC_UNCLAIMED_WORKS
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/unclaimedworkrightshares.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
14 changes: 14 additions & 0 deletions Snowflake/refresh_work_alt_titles.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_ALTERNATIVE_WORK_TITLES;

COPY INTO MLC_ALTERNATIVE_WORK_TITLES
FROM (
SELECT $1, $2, $3, $4, $5, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/workalternativetitles.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
14 changes: 14 additions & 0 deletions Snowflake/refresh_work_ids.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_WORKS;

COPY INTO MLC_WORKS
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/works.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
14 changes: 14 additions & 0 deletions Snowflake/refresh_work_recordings.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_WORK_RECORDINGS;

COPY INTO MLC_WORK_RECORDINGS
FROM (
SELECT $1, $2, $3, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/worksrecordings.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
14 changes: 14 additions & 0 deletions Snowflake/refresh_work_shares.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_WORK_RIGHT_SHARES;

COPY INTO MLC_WORK_RIGHT_SHARES
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/workrightshares.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
14 changes: 14 additions & 0 deletions Snowflake/refresh_works.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SET snapshotid=ifnull((SELECT max(snapshotid) from MLC_SNAPSHOTS),0);

TRUNCATE MLC_WORKS;

COPY INTO MLC_WORKS
FROM (
SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $snapshotid
FROM @IMPORTSTAGE
)
FILES = ('/mlc/works.tsv.gz')
FILE_FORMAT = TSV_FILE_FORMAT
ON_ERROR = CONTINUE
;
10 changes: 10 additions & 0 deletions Snowflake/update_snapshot.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- Michael Ettl - Sonoton Music
--
-- use snowflake to load BWARM data from snowflake stage into snowflake tables
-- we use S3 folder connected to snowflake stage - see https://docs.snowflake.com/en/user-guide/data-load-s3-create-stage
-- all .tsv files are stored in gzipped format in S3 folder
-- if you can use plain .tsv files this should work but don't forget to change the filenames
--

-- create snapshotid with timestamp
INSERT INTO MLC_SNAPSHOTS (created_date) VALUES (CURRENT_TIMESTAMP);