Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/run-processing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,15 @@ jobs:
RCLONE_S3_BUCKET_ACL: private
RCLONE_S3_CHUNK_SIZE: 20M
RCLONE_S3_UPLOAD_CONCURRENCY: 2
# TODO obviously the secrets/vars need to be sorted out
- name: Upload internal dumps
run: rclone sync --exclude .gitkeep src/data_export_internal/ :s3:$R2_INTERNAL_DATA_MARTS_BUCKET
env:
R2_INTERNAL_DATA_MARTS_BUCKET: ${{ vars.R2_INTERNAL_DATA_MARTS_BUCKET }}
RCLONE_S3_PROVIDER: Cloudflare
RCLONE_S3_ENDPOINT: https://${{ vars.R2_ACCOUNT_ID }}.r2.cloudflarestorage.com
RCLONE_S3_ACCESS_KEY_ID: ${{ vars.R2_ACCESS_KEY_ID }}
RCLONE_S3_SECRET_ACCESS_KEY: ${{ secrets.R2_ACCESS_KEY_SECRET }}
RCLONE_S3_BUCKET_ACL: private
RCLONE_S3_CHUNK_SIZE: 20M
RCLONE_S3_UPLOAD_CONCURRENCY: 2
Empty file added data_export_internal/.gitkeep
Empty file.
Binary file not shown.
1 change: 1 addition & 0 deletions models/intermediate/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,4 @@ sources:
- name: teiserver_game_rating_logs
- name: teiserver_user_stats
- name: teiserver_users
- name: teiserver_benchmark_events
8 changes: 8 additions & 0 deletions models/marts/benchmark_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ config(location='data_export_internal/benchmark_events.parquet') }}

SELECT
id,
timestamp,
value,
is_anon
FROM {{ source('pgdumps', 'teiserver_benchmark_events') }}
26 changes: 26 additions & 0 deletions models/marts/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,29 @@ models:
- name: small_team_skill_un
data_type: float
description: Latest small team uncertainty (sigma)

- name: benchmark_events
description: |
Benchmark telemetry events from teiserver, unioned across registered
and anonymous clients. One row per event.
columns:
- name: id
data_type: bigint
description: |
Event id. Unique only within `is_anon` partition (each source
table has its own PK sequence) — not globally unique.
data_tests:
- not_null
- name: timestamp
data_type: timestamp
description: When the event was recorded
data_tests:
- not_null
- name: value
data_type: string
description: Event payload (JSON-encoded)
- name: is_anon
data_type: bool
description: True if the event came from an anonymous client
data_tests:
- not_null
21 changes: 21 additions & 0 deletions scripts/build_dev_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,26 @@ INNER JOIN teiserver_users AS tu
ON tus.user_id = tu.id
ORDER BY user_id;

CREATE TEMP TABLE teiserver_benchmark_events AS
SELECT * FROM (
(
SELECT *
FROM 'data_source/prod/teiserver_benchmark_events.parquet'
WHERE is_anon = false
ORDER BY id
LIMIT 50
)
UNION ALL
(
SELECT *
FROM 'data_source/prod/teiserver_benchmark_events.parquet'
WHERE is_anon = true
ORDER BY id
LIMIT 50
)
)
ORDER BY id;

CREATE TEMP TABLE replay_demos AS
SELECT rd.*
FROM 'data_source/prod/replay_demos.parquet' AS rd
Expand Down Expand Up @@ -128,6 +148,7 @@ copy teiserver_battle_match_memberships to 'data_source/dev/teiserver_battle_mat
copy teiserver_game_rating_logs to 'data_source/dev/teiserver_game_rating_logs.parquet' (format parquet, codec zstd);
copy teiserver_users to 'data_source/dev/teiserver_users.parquet' (format parquet, codec zstd);
copy teiserver_user_stats to 'data_source/dev/teiserver_user_stats.parquet' (format parquet, codec zstd);
copy teiserver_benchmark_events to 'data_source/dev/teiserver_benchmark_events.parquet' (format parquet, codec zstd);
copy replay_demos to 'data_source/dev/replay_demos.parquet' (format parquet, codec zstd);
copy replay_ally_teams to 'data_source/dev/replay_ally_teams.parquet' (format parquet, codec zstd);
copy replay_maps to 'data_source/dev/replay_maps.parquet' (format parquet, codec zstd);
Expand Down
14 changes: 14 additions & 0 deletions scripts/export_prod_data_source.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ COPY teiserver.public.teiserver_game_rating_logs TO 'data_export/teiserver_game_
COPY (SELECT * EXCLUDE (password) FROM teiserver.public.account_users) TO 'data_export/teiserver_users.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9);
COPY teiserver.public.teiserver_account_user_stats TO 'data_export/teiserver_user_stats.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9);

COPY (
SELECT e.id, e.timestamp, e.value, false AS is_anon
FROM teiserver.public.telemetry_complex_client_events AS e
INNER JOIN teiserver.public.telemetry_complex_client_event_types AS t
ON e.event_type_id = t.id
WHERE t.name = 'system:benchmark'
UNION ALL
SELECT e.id, e.timestamp, e.value, true AS is_anon
FROM teiserver.public.telemetry_complex_anon_events AS e
INNER JOIN teiserver.public.telemetry_complex_client_event_types AS t
ON e.event_type_id = t.id
WHERE t.name = 'system:benchmark'
Comment thread
bruno-dasilva marked this conversation as resolved.
) TO 'data_export/teiserver_benchmark_events.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9);

ATTACH 'dbname=bar' AS replay (TYPE POSTGRES, READ_ONLY);

COPY replay.public.Demos TO 'data_export/replay_demos.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 9);
Expand Down
7 changes: 7 additions & 0 deletions tests/benchmark_events_unique_id.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT
id,
is_anon,
count(*) AS n
FROM {{ ref('benchmark_events') }}
GROUP BY id, is_anon
HAVING count(*) > 1
Loading