From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 1/4] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 2/4] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 4d13d9f86113413f030ee13e8aa1950df3f876bd Mon Sep 17 00:00:00 2001 From: Lynn Kwon Date: Fri, 10 Apr 2026 14:54:24 -0400 Subject: [PATCH 3/4] Assignment step 1 complete --- newproject/assignment.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 newproject/assignment.sh diff --git a/newproject/assignment.sh b/newproject/assignment.sh new file mode 100644 index 000000000..775a16fab --- /dev/null +++ b/newproject/assignment.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# 1. Create the data directory +mkdir -p data + +# 2. Move rawdata into data/raw +mv rawdata data/raw + +# 3. List contents to verify +ls data/raw + +# 4. Create the sub-directories +mkdir -p data/processed/server_logs data/processed/user_logs data/processed/event_logs + +# 5 & 6. Copy the specific logs +cp data/raw/*server*.log data/processed/server_logs/ +cp data/raw/*user*.log data/processed/user_logs/ +cp data/raw/*event*.log data/processed/event_logs/ + +# 7. Privacy: Force-remove IP address files +rm -f data/raw/*ipaddr* +rm -f data/processed/user_logs/*ipaddr* + +# 8. Create the inventory list +ls -R data/processed > data/inventory.txt + +echo "Assignment steps completed." \ No newline at end of file From d43a22ae34a024e588506318b7811fb270a938a7 Mon Sep 17 00:00:00 2001 From: Lynn Kwon Date: Fri, 10 Apr 2026 16:01:12 -0400 Subject: [PATCH 4/4] Complete Part 1 assignment script --- 02_activities/assignments/assignment.sh | 56 ++++++++++++++++++++----- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b48cec8b..b83d4c384 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -26,25 +26,59 @@ unzip -q rawdata.zip ########################################### # Complete assignment here +#!/bin/bash +set -x -# 1. Create a directory named data +############################################ +# DSI CONSULTING INC. Project setup script # +############################################ +# This script creates standard analysis and output directories +# for a new project. It also creates a README file with the +# project name and a brief description of the project. +# Then it unzips the raw data provided by the client. -# 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) +if [ -d newproject ]; then + echo "Recreating the newproject directory" + rm -rf newproject +fi +mkdir newproject +cd newproject -# 3. List the contents of the ./data/raw directory +mkdir analysis output +touch README.md +touch analysis/main.py -# 4. Create the directory ./data/processed, -# then create the following sub-directories within it: server_logs, user_logs, and event_logs +# download client data +curl -Lo rawdata.zip https://github.com/UofT-DSI/shell/raw/refs/heads/main/02_activities/assignments/rawdata.zip +unzip -q rawdata.zip -# 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +########################################### +# Complete assignment here -# 6. Repeat the above step for user logs and event logs +# 1. +mkdir data -# 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +# 2. +mv rawdata data/raw -# 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed +# 3. +ls data/raw +# 4. +mkdir -p data/processed/server_logs data/processed/user_logs data/processed/event_logs -########################################### +# 5. +cp data/raw/*server*.log data/processed/server_logs/ + +# 6. +cp data/raw/*user*.log data/processed/user_logs/ +cp data/raw/*event*.log data/processed/event_logs/ + +# 7. +rm -f data/raw/*ipaddr* +rm -f data/processed/user_logs/*ipaddr* + +# 8. +find data/processed -type f | sort > data/inventory.txt -echo "Project setup is complete!" +echo "Project steps complete"