From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 01/13] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 02/13] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 127b1ff6f98353ab0fdbbb4abdba1bb5d7cff7bd Mon Sep 17 00:00:00 2001 From: D Segreti Date: Tue, 3 Mar 2026 20:19:32 -0500 Subject: [PATCH 03/13] Project setup is complete --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b48cec8b..de5e5873b 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -28,7 +28,7 @@ unzip -q rawdata.zip # Complete assignment here # 1. Create a directory named data - +mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) # 3. List the contents of the ./data/raw directory From 2e596564c920fd7fa5d87999d14588653ba59d48 Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Tue, 3 Mar 2026 20:53:15 -0500 Subject: [PATCH 04/13] done --- 02_activities/assignments/assignment.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index de5e5873b..6da3a486d 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -30,19 +30,32 @@ unzip -q rawdata.zip # 1. Create a directory named data mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) +mv rawdata data/raw # 3. List the contents of the ./data/raw directory +ls rawdata # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs +mkdir ./data/processed +cd ./data/processed +mkdir server_logs +mkdir user_logs +mkdir event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cp "server_logs" data/processed/server_logs # 6. Repeat the above step for user logs and event logs +cp "user_logs" data/processed/server_logs +cp "event_logs" data/processed/server_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs - +rm -ipaddr- # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed +touch data/inventory.txt + + ########################################### From 54f7158b1207b12e763217c9388e08f5dc8bd81e Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Tue, 3 Mar 2026 21:02:57 -0500 Subject: [PATCH 05/13] final --- 02_activities/assignments/assignment.sh | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 6da3a486d..c2b4309bb 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,7 +33,7 @@ mkdir data mv rawdata data/raw # 3. List the contents of the ./data/raw directory -ls rawdata +ls data/raw # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs @@ -44,16 +44,15 @@ mkdir user_logs mkdir event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cp "server_logs" data/processed/server_logs +cp "data/raw" data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp "user_logs" data/processed/server_logs -cp "event_logs" data/processed/server_logs +cp "data/raw" data/processed/server_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -rm -ipaddr- +rm ipaddr-* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -touch data/inventory.txt +touch datainventory.txt From dc6a2b564c664a3050c36795149a520ba59bce90 Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Tue, 3 Mar 2026 21:35:30 -0500 Subject: [PATCH 06/13] finala --- 02_activities/assignments/assignment.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index c2b4309bb..1901a08ac 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -44,10 +44,11 @@ mkdir user_logs mkdir event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cp "data/raw" data/processed/server_logs +cp "server*.log" data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp "data/raw" data/processed/server_logs +cp "user*.log" data/processed/server_logs +cp "event*.log" data/processed/server_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs rm ipaddr-* From 862fcf783ac1cc1fadee6a9325869903b048cbff Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Fri, 6 Mar 2026 10:07:07 -0500 Subject: [PATCH 07/13] finaba --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 1901a08ac..ebafb04d6 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -44,6 +44,7 @@ mkdir user_logs mkdir event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cd data/raw cp "server*.log" data/processed/server_logs # 6. Repeat the above step for user logs and event logs From 11b7d0b90a95ad962f9946b3e330ac17b3da3568 Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Fri, 6 Mar 2026 10:09:48 -0500 Subject: [PATCH 08/13] finabla --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index ebafb04d6..4c06019a7 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -44,7 +44,7 @@ mkdir user_logs mkdir event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd data/raw +cd .. cp "server*.log" data/processed/server_logs # 6. Repeat the above step for user logs and event logs From bd3944171880e216efbf7a6415a1c77be0000bf0 Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Sat, 7 Mar 2026 17:24:06 -0500 Subject: [PATCH 09/13] done --- 02_activities/assignments/assignment.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4c06019a7..b76221f5c 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -38,23 +38,24 @@ ls data/raw # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs mkdir ./data/processed -cd ./data/processed -mkdir server_logs -mkdir user_logs -mkdir event_logs +mkdir ./data/processed/server_logs +mkdir ./data/processed/user_logs +mkdir ./data/processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd .. -cp "server*.log" data/processed/server_logs +cp data/raw/server*.log data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp "user*.log" data/processed/server_logs -cp "event*.log" data/processed/server_logs +cp data/raw/user*.log data/processed/server_logs +cp data/raw/event*.log data/processed/server_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -rm ipaddr-* +rm data/raw/*ipaddr* +rm data/processed/user_logs/*ipaddr* + # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -touch datainventory.txt +touch data/inventory.txt +ls -R /data/processed >> data/inventory.txt From 28d3094e349f72b8660040b651f2751114901b5d Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Sat, 7 Mar 2026 17:27:50 -0500 Subject: [PATCH 10/13] done2 --- 02_activities/assignments/assignment.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index b76221f5c..1da4ce83d 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -43,11 +43,11 @@ mkdir ./data/processed/user_logs mkdir ./data/processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cp data/raw/server*.log data/processed/server_logs +cp data/raw/*server*.log data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp data/raw/user*.log data/processed/server_logs -cp data/raw/event*.log data/processed/server_logs +cp data/raw/*user*.log data/processed/user_logs +cp data/raw/*event*.log data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs rm data/raw/*ipaddr* From fbd15266a6c73024a15c1b68a52d75cd5f7dc7fd Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Sat, 7 Mar 2026 17:30:43 -0500 Subject: [PATCH 11/13] done3 --- 02_activities/assignments/assignment.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 1da4ce83d..eea38f7e1 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -54,8 +54,7 @@ rm data/raw/*ipaddr* rm data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -touch data/inventory.txt -ls -R /data/processed >> data/inventory.txt +ls -R /data/processed > data/inventory.txt From 7e2962cf5ab5190aeafff9fa15517c95a33be88e Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Sat, 7 Mar 2026 17:34:49 -0500 Subject: [PATCH 12/13] done4 --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index eea38f7e1..476bee209 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -54,7 +54,7 @@ rm data/raw/*ipaddr* rm data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -ls -R /data/processed > data/inventory.txt +find /data/processed -type f > data/inventory.txt From 9cb26143e2234d2a4848d0b1c3e43c497bc7d4ed Mon Sep 17 00:00:00 2001 From: Dellannia Segreti Date: Sat, 7 Mar 2026 17:40:10 -0500 Subject: [PATCH 13/13] done5 --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 476bee209..666ace2a2 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -54,7 +54,7 @@ rm data/raw/*ipaddr* rm data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -find /data/processed -type f > data/inventory.txt +find data/processed -type f > data/inventory.txt