Skip to content

Latest commit

 

History

History
600 lines (495 loc) · 28.3 KB

starting_with_questions.md

File metadata and controls

600 lines (495 loc) · 28.3 KB

Answer the following questions and provide the SQL queries used to find the answer.

Question 1: Which cities and countries have the highest level of transaction revenues on the site?

SQL Queries:

/* The total transaction revenues were filtered, summed up, and grouped according to the city. */
SELECT city, SUM(total_transaction_revenue::REAL / 1000000) AS total_transaction_revenue
FROM all_sessions
WHERE city != '(not set)' AND city!= 'not available in demo dataset' AND total_transaction_revenue IS NOT NULL AND transaction_id IS NOT NULL
GROUP BY city
ORDER BY total_transaction_revenue DESC

/* The total transaction revenues were filtered, summed up, and grouped according to the country. */
SELECT country, SUM(total_transaction_revenue::REAL / 1000000) AS total_transaction_revenue
FROM all_sessions
WHERE country != '(not set)' AND total_transaction_revenue IS NOT NULL AND transaction_id IS NOT NULL
GROUP BY country
ORDER BY total_transaction_revenue DESC

Answer:

These are the cities with the highest level of transaction revenues on the site:

city total_transaction_revenue
Sunnyvale 849.24
Sydney 358
Austin 35.78
Mountain View 8.98

These are the countries with the highest level of transaction revenues on the site:

country total_transaction_revenue
United States 3208.950032
Australia 358

Question 2: What is the average number of products ordered from visitors in each city and country?

SQL Queries:

/* The units sold is presumed to have the amount of products sold in one order linked to the visit id, which in turn, is joined with the product information and city information to calculate the average for each city. */

WITH visitor_and_units_sold AS(
	SELECT DISTINCT visit_id, units_sold
	FROM analytics
	WHERE units_sold IS NOT NULL
)
SELECT als.city, CAST(AVG(units_sold) AS INTEGER) AS avg_num_of_products
FROM all_sessions als
JOIN visitor_and_units_sold vus
	ON als.visit_id = vus.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.city != '(not set)' AND city != 'not available in demo dataset'
GROUP BY als.city
ORDER BY als.city

/* The units sold is presumed to have the amount of products sold in one order linked to the visit id, which in turn, is joined with the product information and country information to calculate the average for each city. */

WITH visitor_and_units_sold AS(
	SELECT DISTINCT visit_id, units_sold
	FROM analytics
	WHERE units_sold IS NOT NULL
)
SELECT als.country, CAST(AVG(units_sold) AS INTEGER) AS avg_num_of_products
FROM all_sessions als
JOIN visitor_and_units_sold vus
	ON als.visit_id = vus.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.country != '(not set)'
GROUP BY als.country
ORDER BY als.country

Answer:

These are the following cities with the average number of products as follows:

city avg_num_of_products
Ann Arbor 1
Atlanta 1
Austin 1
Bangkok 1
Berlin 1
Bogota 1
Chicago 6
Dallas 1
Detroit 1
Dublin 1
Hong Kong 1
Houston 3
Hyderabad 2
Kirkland 1
London 1
Mountain View 1
Munich 1
New York 2
Palo Alto 1
Paris 1
Pittsburgh 4
San Francisco 2
San Jose 1
Santiago 1
Seattle 1
Singapore 1
Sunnyvale 2
Tel Aviv-Yafo 1
Toronto 3
Washington 1
Zurich 1

These are the following countires with the average number of products as follows:

country avg_num_of_products
Australia 1
Austria 1
Belgium 1
Bulgaria 2
Canada 3
Chile 1
Colombia 1
Denmark 1
Egypt 1
France 1
Germany 1
Hong Kong 1
India 1
Indonesia 1
Ireland 1
Israel 1
Japan 2
Maldives 1
Mexico 2
Netherlands 1
Romania 1
Singapore 1
South Korea 1
Sweden 1
Switzerland 1
Taiwan 1
Thailand 1
United Kingdom 1
United States 2
Vietnam 1

Question 3: Is there any pattern in the types (product categories) of products ordered from visitors in each city and country?

SQL Queries:

/* The product categories were ranked and partitioned by each city to see product categories by highest number of orders.  Once estabished the SQL code, it was nested with another CTE function to filter out the top product category for each city. */

WITH top_product_categories_by_city AS (

WITH city_sum_of_orders AS (
	SELECT DISTINCT visit_id, SUM(units_sold) AS sum_of_orders
	FROM analytics
	WHERE units_sold IS NOT NULL
	GROUP BY visit_id
)
SELECT 
	als.city, 
	als.v2_product_category,
	RANK () OVER (PARTITION BY als.city ORDER BY cso.sum_of_orders DESC) AS rank
FROM all_sessions als
JOIN city_sum_of_orders cso
	ON als.visit_id = cso.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.city != '(not set)' AND als.city != 'not available in demo dataset' AND als.v2_product_category != '(not set)' AND als.v2_product_category != '${escCatTitle}'
GROUP BY als.city, als.v2_product_category, cso.sum_of_orders
)

SELECT ts.city, ts.v2_product_category AS top_product_category
FROM top_product_categories_by_city ts
WHERE rank = 1

/* The product categories were ranked and partitioned by each city to see product categories by highest number of orders.  Once estabished the SQL code, it was nested with another CTE function to filter out the top product category for each country. */

WITH top_product_categories_by_country AS (

WITH country_sum_of_orders AS (
	SELECT DISTINCT visit_id, SUM(units_sold) AS sum_of_orders
	FROM analytics
	WHERE units_sold IS NOT NULL
	GROUP BY visit_id
)
SELECT 
	als.country, 
	als.v2_product_category,
	cso.sum_of_orders,
	RANK () OVER (PARTITION BY als.country ORDER BY cso.sum_of_orders DESC) AS rank
FROM all_sessions als
JOIN country_sum_of_orders cso
	ON als.visit_id = cso.visit_id
WHERE als.country != '(not set)' AND als.v2_product_category != '(not set)' AND als.v2_product_category != '${escCatTitle}'
GROUP BY als.country, als.v2_product_category, cso.sum_of_orders
)

SELECT ts.country, ts.v2_product_category AS top_product_category
FROM top_product_categories_by_country ts
WHERE rank = 1

Answer:

The top product category for each city is as follows:

city top_product_category
Ann Arbor Home/Apparel/Men's/Men's-T-Shirts/
Atlanta Home/Apparel/Men's/Men's-T-Shirts/
Atlanta Home/Bags/
Austin Home/Apparel/Men's/Men's-Performance Wear/
Austin Home/Office/Notebooks & Journals/
Bangkok Home/Drinkware/
Berlin Home/Office/
Bogota Home/Apparel/Men's/Men's-T-Shirts/
Chicago Home/Shop by Brand/
Dallas Home/Shop by Brand/YouTube/
Detroit Drinkware
Dublin Home/Bags/
Dublin Home/Shop by Brand/YouTube/
Hong Kong Home/Apparel/Men's/Men's-T-Shirts/
Hyderabad Home/Accessories/Stickers/
Kirkland Home/Office/
London Home/Office/Notebooks & Journals/
Mountain View Home/Apparel/Men's/Men's-Outerwear/
Munich Home/Apparel/Headgear/
New York Home/Bags/Backpacks/
Palo Alto Nest-USA
Paris Home/Shop by Brand/Android/
Pittsburgh Home/Office/Notebooks & Journals/
San Francisco Home/Apparel/Women's/Women's-T-Shirts/
San Jose Home/Apparel/Men's/Men's-T-Shirts/
Santiago Home/Bags/
Seattle Home/Nest/Nest-USA/
Sunnyvale Housewares
Tel Aviv-Yafo Home/Shop by Brand/YouTube/
Toronto Home/Apparel/Headgear/
Washington Home/Apparel/Kid's/Kid's-Infant/
Zurich Home/Apparel/Men's/Men's-T-Shirts/

The top category for each country is as follows:

country top_product_category
Australia Home/Accessories/Stickers/
Austria Home/Accessories/Housewares/
Belgium Home/Drinkware/Water Bottles and Tumblers/
Bulgaria Home/Apparel/Men's/Men's-T-Shirts/
Canada Home/Apparel/Headgear/
Chile Home/Bags/
Colombia Home/Apparel/Men's/Men's-T-Shirts/
Denmark Home/Drinkware/
Egypt Home/Shop by Brand/Android/
France Home/Shop by Brand/Android/
France Headgear
Germany Home/Apparel/Headgear/
Germany Home/Office/
Hong Kong Home/Nest/Nest-USA/
India Home/Accessories/Stickers/
Indonesia Home/Bags/Backpacks/
Ireland Home/Shop by Brand/YouTube/
Ireland Home/Bags/
Israel Home/Shop by Brand/YouTube/
Japan Home/Accessories/
Maldives Home/Apparel/Men's/Men's-Outerwear/
Mexico Home/Nest/Nest-USA/
Netherlands Home/Apparel/Men's/Men's-T-Shirts/
South Korea Home/Electronics/Electronics Accessories/
Sweden Home/Electronics/
Switzerland Home/Apparel/Men's/Men's-T-Shirts/
Taiwan Home/Apparel/Men's/Men's-Performance Wear/
Thailand Home/Drinkware/
United Kingdom Home/Office/Notebooks & Journals/
United States Housewares
Vietnam Home/Shop by Brand/YouTube/

I found that it was difficult to find a pattern in the product categories because there was still missing information from cities, countries, or both. I would think that the product category could change between cities and countries should the missing information be received.

Question 4: What is the top-selling product from each city/country? Can we find any pattern worthy of noting in the products sold?

SQL Queries:

/* The top selling product from each city was determined by joining the product name and city based on the number of orders.  The product with the most number of order was ranked and filtered out by cities. */

WITH top_selling_products_by_city AS (

WITH number_of_orders AS (
	SELECT DISTINCT visit_id, COUNT(visit_id) AS top_sellers
	FROM analytics
	WHERE units_sold IS NOT NULL
	GROUP BY visit_id
)
SELECT
	als.city, 
	als.v2_product_name, 
	noo.top_sellers,
	RANK () OVER(PARTITION BY als.city ORDER BY noo.top_sellers DESC) AS rank
FROM all_sessions als
JOIN number_of_orders noo
	ON als.visit_id = noo.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE city != '(not set)' AND city != 'not available in demo dataset' AND als.v2_product_category != '${escCatTitle}'
GROUP BY als.city, als.v2_product_name, noo.top_sellers
ORDER BY als.city
)
SELECT ts.city, ts.v2_product_name AS top_selling_product
FROM top_selling_products_by_city ts
WHERE rank = 1


/* The top selling product from each country was determined by joining the product name and country based on the number of orders.  The product with the most number of order was ranked and filtered out by countries. */

WITH top_selling_products_by_country AS (

WITH number_of_orders AS (
	SELECT DISTINCT visit_id, COUNT(visit_id) AS top_sellers
	FROM analytics
	WHERE units_sold IS NOT NULL
	GROUP BY visit_id
)
SELECT
	als.country, 
	als.v2_product_name, 
	noo.top_sellers,
	RANK () OVER(PARTITION BY als.country ORDER BY noo.top_sellers DESC) AS rank
FROM all_sessions als
JOIN number_of_orders noo
	ON als.visit_id = noo.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.country != '(not set)' AND als.v2_product_category != '${escCatTitle}'
GROUP BY als.country, als.v2_product_name, noo.top_sellers
ORDER BY als.country
)
SELECT ts.country, ts.v2_product_name AS top_selling_product
FROM top_selling_products_by_country ts
WHERE rank = 1

Answer:

These are the top-selling products for each city:

city top_selling_product
Ann Arbor Google Men's Vintage Badge Tee Black
Atlanta Android Men's Vintage Henley
Atlanta Google Alpine Style Backpack
Austin Google Men's Airflow 1/4 Zip Pullover Black
Austin YouTube RFID Journal
Bangkok 26 oz Double Wall Insulated Bottle
Berlin Google Doodle Decal
Bogota YouTube Men's 3/4 Sleeve Henley
Chicago Google Womens 3/4 Sleeve Baseball Raglan Heather/Black
Dallas YouTube Leatherette Notebook Combo
Detroit Google 22 oz Water Bottle
Dublin Google Laptop Backpack
Dublin YouTube RFID Journal
Hong Kong Android Men's Long Sleeve Badge Crew Tee Heather
Hyderabad Google Bongo Cupholder Bluetooth Speaker
Hyderabad Keyboard DOT Sticker
Kirkland Android Sticker Sheet Ultra Removable
London Android Hard Cover Journal
Mountain View Google Men's Airflow 1/4 Zip Pullover Black
Munich Google Twill Cap
New York Google Alpine Style Backpack
Palo Alto Nest® Learning Thermostat 3rd Gen-USA - White
Paris Android Lunch Kit
Pittsburgh YouTube Hard Cover Journal
San Francisco Google Women's Scoop Neck Tee White
San Jose YouTube Men's Vintage Tank
Santiago Sport Bag
Seattle Nest® Cam Indoor Security Camera - USA
Singapore Google Men's Short Sleeve Performance Badge Tee Navy
Sunnyvale SPF-15 Slim & Slender Lip Balm
Tel Aviv-Yafo YouTube Hard Cover Journal
Toronto Sport Bag
Washington Google Bib White
Zurich YouTube Men's 3/4 Sleeve Henley

These are the top-selling products for each country:

country top_selling_product
Australia Android Sticker Sheet Ultra Removable
Austria Google Car Clip Phone Holder
Belgium Google 17 oz Double Wall Stainless Steel Insulated Bottle
Bulgaria YouTube Men's Vintage Tank
Canada Sport Bag
Canada Google Men's Short Sleeve Performance Badge Tee Pewter
Chile Sport Bag
Colombia YouTube Men's 3/4 Sleeve Henley
Denmark 26 oz Double Wall Insulated Bottle
Egypt Android RFID Journal
France Android Lunch Kit
France Android Wool Heather Cap Heather/Black
Germany Google Twill Cap
Germany Google Doodle Decal
Hong Kong Nest® Learning Thermostat 3rd Gen-USA - White
India YouTube Custom Decals
India Keyboard DOT Sticker
India Android Stretch Fit Hat Black
India Android Men's Engineer Short Sleeve Tee Charcoal
India Google Bongo Cupholder Bluetooth Speaker
India YouTube Men's Vintage Henley
Indonesia Google Rucksack
Ireland YouTube RFID Journal
Ireland Google Laptop Backpack
Israel YouTube Hard Cover Journal
Japan Google Lunch Bag
Maldives Google Men's Performance Full Zip Jacket Black
Mexico Nest® Cam Indoor Security Camera - USA
Mexico YouTube Men's Vintage Henley
Netherlands Android Men's Vintage Tank
Romania 8 pc Android Sticker Sheet
Singapore Google Men's Short Sleeve Performance Badge Tee Navy
South Korea Galaxy Screen Cleaning Cloth
Sweden Google Women's Short Sleeve Hero Tee Sky Blue
Switzerland YouTube Men's 3/4 Sleeve Henley
Taiwan Google Men's Performance 1/4 Zip Pullover Heather/Black
Taiwan Google Women's Short Sleeve Hero Tee Black
Thailand 26 oz Double Wall Insulated Bottle
United Kingdom Android Hard Cover Journal
United States SPF-15 Slim & Slender Lip Balm
Vietnam YouTube RFID Journal

Similar to Question 3 in the 'starting_with_questions.md' file, I would think that the missing information could alter the top-selling products between cities and countries.

Question 5: Can we summarize the impact of revenue generated from each city/country?

SQL Queries:

/* Revenue was linked with the visit ID, for which it was joined to verify purchases to calculate the revenue by city. */

WITH revenue AS (
	SELECT DISTINCT visit_id, revenue / 1000000 AS revenue
	FROM analytics
	WHERE units_sold IS NOT NULL AND revenue IS NOT NULL
)
SELECT als.city, SUM(r.revenue) AS sum_revenue
FROM all_sessions als
JOIN revenue r
	ON als.visit_id = r.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.city != '(not set)' AND als.city != 'not available in demo dataset'
GROUP BY als.city
ORDER BY sum_revenue DESC

/* Revenue was linked with the visit ID, for which it was joined to verify purchases to calculate the revenue by country. */

WITH revenue AS (
	SELECT DISTINCT visit_id, revenue / 1000000 AS revenue
	FROM analytics
	WHERE units_sold IS NOT NULL AND revenue IS NOT NULL
)
SELECT als.country, SUM(r.revenue) AS sum_revenue
FROM all_sessions als
JOIN revenue r
	ON als.visit_id = r.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.country != '(not set)'
GROUP BY als.country
ORDER BY sum_revenue DESC

Answer:

Here is the summary of the revenue impact generated from each city:

city sum_revenue
New York 1103
Sunnyvale 608
Mountain View 481
Seattle 357
Chicago 306
San Francisco 276
San Jose 153
Palo Alto 151
Tel Aviv-Yafo 32
Zurich 16

Here is the summary of the revenue impact generated from each country:

country sum_revenue
United States 4629
Israel 32
Switzerland 16

While it was easier to draw the 'revenue' data from the 'analytics' table, there was information available on the 'unit_sold' and 'unit_price' to calculate the revenue. To see the variance:

/* Comparision between 'revenue' column versus calculated revenue by 'units_sold' * 'unit_price' by city. */
WITH revenue AS (
	SELECT DISTINCT visit_id, revenue / 1000000 AS revenue, CAST((units_sold * unit_price) / 1000000 AS INTEGER) AS calc_revenue
	FROM analytics
	WHERE units_sold IS NOT NULL AND revenue IS NOT NULL
)
SELECT als.city, SUM(r.revenue) AS sum_revenue, SUM(r.calc_revenue) AS sum_calc_revenue, (SUM(r.calc_revenue) - SUM(r.revenue)) / SUM(r.revenue) * 100 AS revenue_diff_pct
FROM all_sessions als
JOIN revenue r
	ON als.visit_id = r.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.city != '(not set)' AND als.city != 'not available in demo dataset'
GROUP BY als.city
ORDER BY sum_revenue DESC

/* Comparision between 'revenue' column versus calculated revenue by 'units_sold' * 'unit_price' by country. */

WITH revenue AS (
	SELECT DISTINCT visit_id, revenue / 1000000 AS revenue, CAST((units_sold * unit_price) / 1000000 AS INTEGER) AS calc_revenue
	FROM analytics
	WHERE units_sold IS NOT NULL AND revenue IS NOT NULL
)
SELECT als.country, SUM(r.revenue) AS sum_revenue, SUM(r.calc_revenue) AS sum_calc_revenue, (SUM(r.calc_revenue) - SUM(r.revenue)) / SUM(r.revenue) * 100 AS revenue_diff_pct
FROM all_sessions als
JOIN revenue r
	ON als.visit_id = r.visit_id
JOIN products p
	ON als.product_sku = p.sku
WHERE als.country != '(not set)'
GROUP BY als.country
ORDER BY sum_revenue DESC

Here is the revenue compared to the calculated revenue by (units sold * unit price) for each city:

city sum_revenue sum_calc_revenue revenue_diff_pct
New York 1103 1101 -0.18132366273798730700
Sunnyvale 608 602 -0.98684210526315789500
Mountain View 481 467 -2.91060291060291060300
Seattle 357 357 0.00000000000000000000
Chicago 306 298 -2.61437908496732026100
San Francisco 276 261 -5.43478260869565217400
San Jose 153 149 -2.61437908496732026100
Palo Alto 151 149 -1.32450331125827814600
Tel Aviv-Yafo 32 24 -25.00000000000000000000
Zurich 16 14 -12.50000000000000000000

Here is the revenue compared to the calculated revenue by (units sold * unit price) for each country:

country sum_revenue sum_calc_revenue revenue_diff_pct
United States 4629 4558 -1.53380859796932382800
Israel 32 24 -25.00000000000000000000
Switzerland 16 14 -12.50000000000000000000