From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 01/21] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 02/21] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 3a625e09e68e013b2fa4190a1b40f3e0c073fbc1 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 15:52:40 -0500 Subject: [PATCH 03/21] assignment done! --- 02_activities/assignments/assignment.sh | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b48cec8b..7c7fed746 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -28,22 +28,42 @@ unzip -q rawdata.zip # Complete assignment here # 1. Create a directory named data +mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) +mv ./rawdata ./data +cd data +mv rawdata raw # 3. List the contents of the ./data/raw directory +cd raw +ls raw # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs +cd ~/shell/02_activities/assignments/newproject/data +mkdir processed +cd processed +mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +mv ~/shell/02_activities/assignments/newproject/data/raw/server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs +mv ~/shell/02_activities/assignments/newproject/data/raw/event* ~/shell/02_activities/assignments/newproject/data/processed/event_logs +mv ~/shell/02_activities/assignments/newproject/data/raw/user* ~/shell/02_activities/assignments/newproject/data/processed/user_logs + # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +cd user_logs +rm *ipaddr* +cd ~/shell/02_activities/assignments/newproject/data/raw +rm *ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed - +cd ~/shell/02_activities/assignments/newproject/data +touch inventory.txt +echo "event_logs/ server_logs/ user_logs/" > inventory.txt ########################################### From 631ccbc20540ea55f651d3d38fbb5c3f48659813 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 16:51:37 -0500 Subject: [PATCH 04/21] revise --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 7c7fed746..b337fbd57 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -37,7 +37,7 @@ mv rawdata raw # 3. List the contents of the ./data/raw directory cd raw -ls raw +ls # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs From 49e5c49b1aa44b0d675264a5fb4793f8aa25dad2 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:04:40 -0500 Subject: [PATCH 05/21] revise --- 02_activities/assignments/assignment.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index b337fbd57..b4afc1d5b 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -42,9 +42,8 @@ ls # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs cd ~/shell/02_activities/assignments/newproject/data -mkdir processed -cd processed -mkdir server_logs user_logs event_logs +mkdir -p processed/server_logs processed/user_logs processed/event_logs + # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs mv ~/shell/02_activities/assignments/newproject/data/raw/server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs From c8fa42b07e484d7b8ac2c33f92be3088e08e4694 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:12:57 -0500 Subject: [PATCH 06/21] revise --- 02_activities/assignments/assignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index b4afc1d5b..53fc28f71 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -36,12 +36,12 @@ cd data mv rawdata raw # 3. List the contents of the ./data/raw directory -cd raw -ls +ls raw/ # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs cd ~/shell/02_activities/assignments/newproject/data +mkdir processed mkdir -p processed/server_logs processed/user_logs processed/event_logs From 13a28c77ac216d93e76380257db81ff301dcd6fd Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:28:03 -0500 Subject: [PATCH 07/21] revise --- 02_activities/assignments/assignment.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 53fc28f71..90b1fb72c 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -36,7 +36,7 @@ cd data mv rawdata raw # 3. List the contents of the ./data/raw directory -ls raw/ +ls ~/shell/02_activities/assignments/newproject/data/raw # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs @@ -46,6 +46,7 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cd ~/shell/02_activities/assignments/newproject/data/raw mv ~/shell/02_activities/assignments/newproject/data/raw/server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs From ebaccdb01555a18d98d7241c774cee207d762b81 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:35:05 -0500 Subject: [PATCH 08/21] revise --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 90b1fb72c..0ed38ed6a 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd ~/shell/02_activities/assignments/newproject/data/raw +cd raw mv ~/shell/02_activities/assignments/newproject/data/raw/server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs From d7b2525d125a4aad0120e28df925fa9a954b1e42 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:38:30 -0500 Subject: [PATCH 09/21] revise --- 02_activities/assignments/assignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 0ed38ed6a..3de7dfb2d 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd raw +cd ~/shell/02_activities/assignments/newproject/data/processed mv ~/shell/02_activities/assignments/newproject/data/raw/server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs @@ -55,7 +55,7 @@ mv ~/shell/02_activities/assignments/newproject/data/raw/user* ~/shell/02_activi # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -cd user_logs +cd ~/shell/02_activities/assignments/newproject/data/processed/user_logs rm *ipaddr* cd ~/shell/02_activities/assignments/newproject/data/raw rm *ipaddr* From 32dd68e312eaa30f5e349a8548cd6b8410147ddb Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:43:32 -0500 Subject: [PATCH 10/21] revise --- 02_activities/assignments/assignment.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 3de7dfb2d..afcbd2b38 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -47,18 +47,20 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ~/shell/02_activities/assignments/newproject/data/processed -mv ~/shell/02_activities/assignments/newproject/data/raw/server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs +mv ~/shell/02_activities/assignments/newproject/data/raw/server_* ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs -mv ~/shell/02_activities/assignments/newproject/data/raw/event* ~/shell/02_activities/assignments/newproject/data/processed/event_logs -mv ~/shell/02_activities/assignments/newproject/data/raw/user* ~/shell/02_activities/assignments/newproject/data/processed/user_logs +mv ~/shell/02_activities/assignments/newproject/data/raw/event_* ~/shell/02_activities/assignments/newproject/data/processed/event_logs +mv ~/shell/02_activities/assignments/newproject/data/raw/user_* ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs cd ~/shell/02_activities/assignments/newproject/data/processed/user_logs rm *ipaddr* +rm ipaddr* cd ~/shell/02_activities/assignments/newproject/data/raw rm *ipaddr* +rm ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed cd ~/shell/02_activities/assignments/newproject/data From 2ef3e2f2ccb58fd33cd7a6a5f1ccba57cb244d58 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:48:43 -0500 Subject: [PATCH 11/21] revise --- 02_activities/assignments/assignment.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index afcbd2b38..d02dfaca8 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -47,11 +47,11 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ~/shell/02_activities/assignments/newproject/data/processed -mv ~/shell/02_activities/assignments/newproject/data/raw/server_* ~/shell/02_activities/assignments/newproject/data/processed/server_logs +cp ~/shell/02_activities/assignments/newproject/data/raw/"server_*" ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs -mv ~/shell/02_activities/assignments/newproject/data/raw/event_* ~/shell/02_activities/assignments/newproject/data/processed/event_logs -mv ~/shell/02_activities/assignments/newproject/data/raw/user_* ~/shell/02_activities/assignments/newproject/data/processed/user_logs +cp ~/shell/02_activities/assignments/newproject/data/raw/"event_*" ~/shell/02_activities/assignments/newproject/data/processed/event_logs +cp ~/shell/02_activities/assignments/newproject/data/raw/"user_*" ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs From d2fb0c32d61c9b5549fb5ff395b2b3798f6d6604 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:50:37 -0500 Subject: [PATCH 12/21] revise --- 02_activities/assignments/assignment.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d02dfaca8..65594d71a 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -47,11 +47,11 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ~/shell/02_activities/assignments/newproject/data/processed -cp ~/shell/02_activities/assignments/newproject/data/raw/"server_*" ~/shell/02_activities/assignments/newproject/data/processed/server_logs +cp ~/shell/02_activities/assignments/newproject/data/raw/"server*.log" ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp ~/shell/02_activities/assignments/newproject/data/raw/"event_*" ~/shell/02_activities/assignments/newproject/data/processed/event_logs -cp ~/shell/02_activities/assignments/newproject/data/raw/"user_*" ~/shell/02_activities/assignments/newproject/data/processed/user_logs +cp ~/shell/02_activities/assignments/newproject/data/raw/"event*.log" ~/shell/02_activities/assignments/newproject/data/processed/event_logs +cp ~/shell/02_activities/assignments/newproject/data/raw/"user*.log" ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs From 65b88855dd0fd1ae4393a9044c15e4464928b37d Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 17:55:30 -0500 Subject: [PATCH 13/21] revise --- 02_activities/assignments/assignment.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 65594d71a..bdc8e3ef3 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,12 +46,12 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd ~/shell/02_activities/assignments/newproject/data/processed -cp ~/shell/02_activities/assignments/newproject/data/raw/"server*.log" ~/shell/02_activities/assignments/newproject/data/processed/server_logs +cd ~/shell/02_activities/assignments/newproject/data/raw +cp ./"server*.log" ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp ~/shell/02_activities/assignments/newproject/data/raw/"event*.log" ~/shell/02_activities/assignments/newproject/data/processed/event_logs -cp ~/shell/02_activities/assignments/newproject/data/raw/"user*.log" ~/shell/02_activities/assignments/newproject/data/processed/user_logs +cp ./"event*.log" ~/shell/02_activities/assignments/newproject/data/processed/event_logs +cp ./"user*.log" ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs From ee26d4e0a6a01d563fd3786b36b2fef9fec0e07c Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:04:54 -0500 Subject: [PATCH 14/21] revise --- 02_activities/assignments/assignment.sh | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index bdc8e3ef3..2da7ee927 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -47,20 +47,18 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ~/shell/02_activities/assignments/newproject/data/raw -cp ./"server*.log" ~/shell/02_activities/assignments/newproject/data/processed/server_logs +cp ./server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp ./"event*.log" ~/shell/02_activities/assignments/newproject/data/processed/event_logs -cp ./"user*.log" ~/shell/02_activities/assignments/newproject/data/processed/user_logs +cp ./event* ~/shell/02_activities/assignments/newproject/data/processed/event_logs +cp ./user* ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs cd ~/shell/02_activities/assignments/newproject/data/processed/user_logs -rm *ipaddr* -rm ipaddr* +rm ./*ipaddr* cd ~/shell/02_activities/assignments/newproject/data/raw -rm *ipaddr* -rm ipaddr* +rm ./*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed cd ~/shell/02_activities/assignments/newproject/data From e837cf1cf4db4c0809efff5710ba4da932e44dbd Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:10:06 -0500 Subject: [PATCH 15/21] revise --- 02_activities/assignments/assignment.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 2da7ee927..23718f662 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -47,11 +47,11 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ~/shell/02_activities/assignments/newproject/data/raw -cp ./server* ~/shell/02_activities/assignments/newproject/data/processed/server_logs +cp ./*server*.log ~/shell/02_activities/assignments/newproject/data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp ./event* ~/shell/02_activities/assignments/newproject/data/processed/event_logs -cp ./user* ~/shell/02_activities/assignments/newproject/data/processed/user_logs +cp ./*event*.log ~/shell/02_activities/assignments/newproject/data/processed/event_logs +cp ./*user*.log ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs From 08793b6c335c83282ddf7ad1839fda2beb8539e2 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:12:32 -0500 Subject: [PATCH 16/21] revise --- 02_activities/assignments/assignment.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 23718f662..88de40cc5 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -45,6 +45,14 @@ mkdir processed mkdir -p processed/server_logs processed/user_logs processed/event_logs +# 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +# cd ~/shell/02_activities/assignments/newproject/data/raw +# cp ./*server*.log ~/shell/02_activities/assignments/newproject/data/processed/server_logs + +# 6. Repeat the above step for user logs and event logs +# cp ./*event*.log ~/shell/02_activities/assignments/newproject/data/processed/event_logs +# cp ./*user*.log ~/shell/02_activities/assignments/newproject/data/processed/user_logs + # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ~/shell/02_activities/assignments/newproject/data/raw cp ./*server*.log ~/shell/02_activities/assignments/newproject/data/processed/server_logs @@ -53,7 +61,6 @@ cp ./*server*.log ~/shell/02_activities/assignments/newproject/data/processed/se cp ./*event*.log ~/shell/02_activities/assignments/newproject/data/processed/event_logs cp ./*user*.log ~/shell/02_activities/assignments/newproject/data/processed/user_logs - # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs cd ~/shell/02_activities/assignments/newproject/data/processed/user_logs rm ./*ipaddr* From 6b5efd49b226ed848570e870b00b7af7340d6c69 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:23:18 -0500 Subject: [PATCH 17/21] revise --- 02_activities/assignments/assignment.sh | 29 ++++++++++--------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 88de40cc5..a04954880 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -31,18 +31,17 @@ unzip -q rawdata.zip mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) -mv ./rawdata ./data -cd data -mv rawdata raw +mv rawdata data/raw + # 3. List the contents of the ./data/raw directory -ls ~/shell/02_activities/assignments/newproject/data/raw +ls ./data/raw # 4. Create the directory ./data/processed, # then create the following sub-directories within it: server_logs, user_logs, and event_logs -cd ~/shell/02_activities/assignments/newproject/data -mkdir processed -mkdir -p processed/server_logs processed/user_logs processed/event_logs +mkdir -p ./data/processed/server_logs +mkdir -p ./data/processed/user_logs +mkdir -p ./data/processed/event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs @@ -54,22 +53,18 @@ mkdir -p processed/server_logs processed/user_logs processed/event_logs # cp ./*user*.log ~/shell/02_activities/assignments/newproject/data/processed/user_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd ~/shell/02_activities/assignments/newproject/data/raw -cp ./*server*.log ~/shell/02_activities/assignments/newproject/data/processed/server_logs +cp ./data/raw/*server*.log ./data/processed/server_logs/ # 6. Repeat the above step for user logs and event logs -cp ./*event*.log ~/shell/02_activities/assignments/newproject/data/processed/event_logs -cp ./*user*.log ~/shell/02_activities/assignments/newproject/data/processed/user_logs +cp ./data/raw/*user*.log ./data/processed/user_logs/ +cp ./data/raw/*event*.log ./data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -cd ~/shell/02_activities/assignments/newproject/data/processed/user_logs -rm ./*ipaddr* -cd ~/shell/02_activities/assignments/newproject/data/raw -rm ./*ipaddr* +rm ./data/raw/*ipaddr* +rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -cd ~/shell/02_activities/assignments/newproject/data -touch inventory.txt +touch ./data/inventory.txt echo "event_logs/ server_logs/ user_logs/" > inventory.txt ########################################### From 4103cc0bfcc4739046226a68c64477ae1e2c77ab Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:45:44 -0500 Subject: [PATCH 18/21] merge --- 02_activities/assignments/assignment.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 46b453946..f12090ac9 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -61,12 +61,9 @@ cp ./data/raw/*user*.log ./data/processed/user_logs/ cp ./data/raw/*event*.log ./data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -<<<<<<< HEAD + rm ./data/raw/*ipaddr* rm ./data/processed/user_logs/*ipaddr* -======= -rf -rf ./data ->>>>>>> ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed touch ./data/inventory.txt From ea703e2907d446bd962da6dc149566f14ce25b9b Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:51:47 -0500 Subject: [PATCH 19/21] revise --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index f12090ac9..51b260a3f 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -67,7 +67,7 @@ rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed touch ./data/inventory.txt -echo "event_logs/ server_logs/ user_logs/" > inventory.txt +echo "event_logs/ server_logs/ user_logs/" > ./data/inventory.txt ########################################### From 008d42784e58a43f53831bbbcd54ce3374adc224 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 18:58:25 -0500 Subject: [PATCH 20/21] revise --- 02_activities/assignments/assignment.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 51b260a3f..fc2c19208 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -66,8 +66,7 @@ rm ./data/raw/*ipaddr* rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -touch ./data/inventory.txt -echo "event_logs/ server_logs/ user_logs/" > ./data/inventory.txt +find ./data/processed -type f > ./data/inventory.txt ########################################### From 711342fd48192c98d92bfbac4aba98a68bcb79b9 Mon Sep 17 00:00:00 2001 From: Li Gong Date: Sat, 7 Mar 2026 19:04:10 -0500 Subject: [PATCH 21/21] revise --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index fc2c19208..64b7eb6e6 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -66,7 +66,7 @@ rm ./data/raw/*ipaddr* rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -find ./data/processed -type f > ./data/inventory.txt +find ./data/processed -type f | sed 's#^\./data/processed/##' | sort > ./data/inventory.txt ###########################################