From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 01/11] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 02/11] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 88555e4a2b30909567d887a94850e0aa1db64864 Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 18:52:48 -0300 Subject: [PATCH 03/11] added finished assignment --- 02_activities/assignments/assignment.sh | 26 +++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b48cec8b..abaa0d92b 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -28,22 +28,44 @@ unzip -q rawdata.zip # Complete assignment here # 1. Create a directory named data +mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) +mv rawdata data/ls +mv rawdata raw # 3. List the contents of the ./data/raw directory +ls raw -# 4. Create the directory ./data/processed, +# 4. Create the directory ./data/processed, +cd data +mkdir processed # then create the following sub-directories within it: server_logs, user_logs, and event_logs +cd processed +mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cd .. +cp /Users/nicosarian/desktop/DSI/assignments/shell/02_activities/assignments/data/raw/server* processed/server_logs/ # 6. Repeat the above step for user logs and event logs +cp /Users/nicosarian/desktop/DSI/assignments/shell/02_activities/assignments/data/raw/user* processed/user_logs/ +cp /Users/nicosarian/desktop/DSI/assignments/shell/02_activities/assignments/data/raw/event* processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +cd raw +rm ipaddr* +rm user_ipaddr* +cd .. +cd processed/user_logs +rm ipaddr* +rm user_ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed - +cd .. +cd .. +touch inventory.txt +ls processed/user_logs processed/server_logs processed/event_logs > inventory.txt ########################################### From 712c459a699246cb24e3604133d527d5904f8c90 Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 19:24:29 -0300 Subject: [PATCH 04/11] managed to git push --- 02_activities/inventory.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 02_activities/inventory.txt diff --git a/02_activities/inventory.txt b/02_activities/inventory.txt new file mode 100644 index 000000000..e69de29bb From 3f8c89653bb33d262b3b60073ee3c5ee76848fb5 Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 20:13:18 -0300 Subject: [PATCH 05/11] root folder fixed --- 02_activities/assignments/assignment.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index abaa0d92b..5cb8150cd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,11 +46,11 @@ mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd .. -cp /Users/nicosarian/desktop/DSI/assignments/shell/02_activities/assignments/data/raw/server* processed/server_logs/ +cp data/raw/server* processed/server_logs/ # 6. Repeat the above step for user logs and event logs -cp /Users/nicosarian/desktop/DSI/assignments/shell/02_activities/assignments/data/raw/user* processed/user_logs/ -cp /Users/nicosarian/desktop/DSI/assignments/shell/02_activities/assignments/data/raw/event* processed/event_logs/ +cp data/raw/user* processed/user_logs/ +cp data/raw/event* processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs cd raw From 9092ad3ec49629bd6a76cdc76b220cd21f89a12c Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 20:20:57 -0300 Subject: [PATCH 06/11] fixed ls typo --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 5cb8150cd..64852bfb7 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -31,7 +31,7 @@ unzip -q rawdata.zip mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) -mv rawdata data/ls +mv rawdata data/ mv rawdata raw # 3. List the contents of the ./data/raw directory From 8f409bc8ec427a5d64e826d7fb69e3efbb97553b Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 21:06:19 -0300 Subject: [PATCH 07/11] last edit --- 02_activities/assignments/assignment.sh | 33 +++++++++---------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 64852bfb7..fdb16b2a0 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -28,44 +28,33 @@ unzip -q rawdata.zip # Complete assignment here # 1. Create a directory named data +cd newproject mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) -mv rawdata data/ -mv rawdata raw +mv rawdata data/raw # 3. List the contents of the ./data/raw directory -ls raw +ls data/raw # 4. Create the directory ./data/processed, -cd data -mkdir processed +mkdir data/processed # then create the following sub-directories within it: server_logs, user_logs, and event_logs -cd processed -mkdir server_logs user_logs event_logs +mkdir data/processed/{server_logs,user_logs,event_logs} # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd .. -cp data/raw/server* processed/server_logs/ +cp data/raw/*server*.log data/processed/server_logs/ # 6. Repeat the above step for user logs and event logs -cp data/raw/user* processed/user_logs/ -cp data/raw/event* processed/event_logs/ +cp data/raw/*user*.log data/processed/user_logs/ +cp data/raw/*event*.log data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -cd raw -rm ipaddr* -rm user_ipaddr* -cd .. -cd processed/user_logs -rm ipaddr* -rm user_ipaddr* +rm -f data/raw/*ipaddr* +rm -f data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -cd .. -cd .. -touch inventory.txt -ls processed/user_logs processed/server_logs processed/event_logs > inventory.txt +find data/processed -type f | sort > data/inventory.txt ########################################### From a23c2e15472ebe44162502afad3844dbc2717a53 Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 21:07:36 -0300 Subject: [PATCH 08/11] last edit --- 02_activities/inventory.txt => inventory.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename 02_activities/inventory.txt => inventory.txt (100%) diff --git a/02_activities/inventory.txt b/inventory.txt similarity index 100% rename from 02_activities/inventory.txt rename to inventory.txt From 2c3ec7321b8b8fb41c9cae39693a856077c971ed Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Mon, 2 Mar 2026 21:22:28 -0300 Subject: [PATCH 09/11] final edit --- 02_activities/assignments/assignment.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 6465c245f..5d708e16e 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -51,6 +51,8 @@ cp data/raw/*user*.log data/processed/user_logs/ cp data/raw/*event*.log data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rm -f data/raw/*ipaddr* +rm -f data/processed/user_logs/*ipaddr* rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From b3d234379d274e22fa708f56bbe1763a3e64e5ae Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Fri, 6 Mar 2026 14:15:27 -0300 Subject: [PATCH 10/11] last edit --- 02_activities/assignments/assignment.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 5d708e16e..9811614ea 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -29,7 +29,6 @@ unzip -q rawdata.zip # Complete assignment here # 1. Create a directory named data -cd newproject mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) @@ -50,7 +49,7 @@ cp data/raw/*server*.log data/processed/server_logs/ cp data/raw/*user*.log data/processed/user_logs/ cp data/raw/*event*.log data/processed/event_logs/ -# 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +# 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logsgit rm -f data/raw/*ipaddr* rm -f data/processed/user_logs/*ipaddr* rf -rf ./data From bd75865c7e913dfd79d584b0245314427f99a2eb Mon Sep 17 00:00:00 2001 From: Nico Sarian Date: Fri, 6 Mar 2026 14:17:40 -0300 Subject: [PATCH 11/11] final revie edits --- 02_activities/assignments/assignment.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 9811614ea..631bfb107 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -52,7 +52,6 @@ cp data/raw/*event*.log data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logsgit rm -f data/raw/*ipaddr* rm -f data/processed/user_logs/*ipaddr* -rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed find data/processed -type f | sort > data/inventory.txt