From 584cff02052b8688fc672fd0cdc11e71b5d384f8 Mon Sep 17 00:00:00 2001 From: Daniel Sparing Date: Wed, 1 Mar 2023 19:49:35 +0100 Subject: [PATCH] typo --- 01_intro.py | 4 ++-- 02_load_data.py | 4 ++-- 03_prep_data.py | 4 ++-- 04_markov_chains.py | 10 +++++----- 05_spend_optimization.py | 10 +++++----- LICENSE | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/01_intro.py b/01_intro.py index e3c2bb7..cb77543 100644 --- a/01_intro.py +++ b/01_intro.py @@ -18,7 +18,7 @@ # MAGIC # MAGIC * Broadly speaking, heuristic methods are rule-based and consist of both `single-touch` and `multi-touch` approaches. Single-touch methods, such as `first-touch` and `last-touch`, assign credit to the first channel, or the last channel, associated with a conversion. Multi-touch methods, such as `linear` and `time-decay`, assign credit to multiple channels associated with a conversion. In the case of linear, credit is assigned uniformly across all channels, whereas for time-decay, an increasing amount of credit is assigned to the channels that appear closer in time to the conversion event. # MAGIC -# MAGIC * In contrast to heuristic methods, data-driven methods determine assignment using probabilites and statistics. Examples of data-driven methods include `Markov Chains` and `SHAP`. In this series of notebooks, we cover the use of Markov Chains and include a comparison to a few heuristic methods. +# MAGIC * In contrast to heuristic methods, data-driven methods determine assignment using probabilities and statistics. Examples of data-driven methods include `Markov Chains` and `SHAP`. In this series of notebooks, we cover the use of Markov Chains and include a comparison to a few heuristic methods. # COMMAND ---------- @@ -78,7 +78,7 @@ # MAGIC # MAGIC * In the following sections, you will generate this synthetic dataset and then process it using Structured Streaming. You will then apply additional transformations so that it is suitable to use with Markov Chains. # MAGIC -# MAGIC * **Note:** Default settings are used to generate this data set. Aftering working through this series of notebooks for the first time, you may want to customize these settings for additional exploration. Please note that if you do so, commentary in the notebooks may not line up with the newly generated data. +# MAGIC * **Note:** Default settings are used to generate this data set. After working through this series of notebooks for the first time, you may want to customize these settings for additional exploration. Please note that if you do so, commentary in the notebooks may not line up with the newly generated data. # COMMAND ---------- diff --git a/02_load_data.py b/02_load_data.py index 4d4b460..d110eab 100644 --- a/02_load_data.py +++ b/02_load_data.py @@ -17,7 +17,7 @@ # MAGIC ### In this notebook you: # MAGIC * Use `Databricks Autoloader` to import the ad impression and conversion data generated in the notebook `01_intro`. # MAGIC * Write the data out in `Delta` format. -# MAGIC * Create a database and table for easy access and querability. +# MAGIC * Create a database and table for easy access and queryability. # COMMAND ---------- @@ -135,7 +135,7 @@ # MAGIC %md # MAGIC ## Step 3: Write Data to Delta Lake # MAGIC -# MAGIC In this section of the solution accelerator, we write our data out to [Delta Lake](https://delta.io/) and then create a table (and database) for easy access and querability. +# MAGIC In this section of the solution accelerator, we write our data out to [Delta Lake](https://delta.io/) and then create a table (and database) for easy access and queryability. # MAGIC # MAGIC * Delta Lake is an open-source project that enables building a **Lakehouse architecture** on top of existing storage systems such as S3, ADLS, GCS, and HDFS. # MAGIC * Information on the **Lakehouse Architecture** can be found in this [paper](http://cidrdb.org/cidr2021/papers/cidr2021_paper17.pdf) that was presented at [CIDR 2021](http://cidrdb.org/cidr2021/index.html) and in this [video](https://www.youtube.com/watch?v=RU2dXoVU8hY) diff --git a/03_prep_data.py b/03_prep_data.py index 67250a8..1b8028d 100644 --- a/03_prep_data.py +++ b/03_prep_data.py @@ -182,7 +182,7 @@ # MAGIC # MAGIC * In practice, Z-ordering is most suitable for high-cardinality columns that you frequently want to filter on. # MAGIC -# MAGIC * Please note that the data set we are using here is relatively small and Z-ordering is likely unncessary. It has been included, however, for illustration purposes. +# MAGIC * Please note that the data set we are using here is relatively small and Z-ordering is likely unnecessary. It has been included, however, for illustration purposes. # COMMAND ---------- @@ -293,7 +293,7 @@ # COMMAND ---------- # MAGIC %md -# MAGIC ##### Example 2: Propogate updates made to the gold_user_journey table to the gold_attribution table +# MAGIC ##### Example 2: Propagate updates made to the gold_user_journey table to the gold_attribution table # COMMAND ---------- diff --git a/04_markov_chains.py b/04_markov_chains.py index 1205eb1..41d5c3e 100644 --- a/04_markov_chains.py +++ b/04_markov_chains.py @@ -33,7 +33,7 @@ # MAGIC * Heuristic-based attribution methods like first-touch, last-touch, and linear are relatively easy to implement but are less accurate than data-driven methods. With marketing dollars at stake, data-driven methods are highly recommended. # MAGIC # MAGIC * There are three steps to take when using Markov Chains to calculate attribution: -# MAGIC * Step 1: Construct a transition probablity matrix +# MAGIC * Step 1: Construct a transition probability matrix # MAGIC * Step 2: Calculate the total conversion probability # MAGIC * Step 3: Use the removal effect to calculate attribution # MAGIC @@ -41,7 +41,7 @@ # MAGIC # MAGIC **An Example** # MAGIC -# MAGIC In the image below, we have a transition probability graph that shows the probabilty of going from one state to another state. In the context of a customer journey, states can be non-terminal (viewing an impression on a given channel) or terminal (conversion, no conversion). +# MAGIC In the image below, we have a transition probability graph that shows the probability of going from one state to another state. In the context of a customer journey, states can be non-terminal (viewing an impression on a given channel) or terminal (conversion, no conversion). # MAGIC # MAGIC
# MAGIC @@ -61,7 +61,7 @@ # MAGIC # MAGIC ```P(Conversion) = (0.2 X 0.8) + (0.2 X 0.2 X 0.1) + (0.8 X 0.6) + (0.8 X 0.4 X 0.8) + (0.8 X 0.4 X 0.2 X 0.1) = 0.90``` # MAGIC -# MAGIC Now, let's calculate the removal effect for one of our channels: Facebook/Social. For this, we will set the conversion for Facebook/Social to 0% and then recalculate the total conversion probabilty. Now we have `0.48`. +# MAGIC Now, let's calculate the removal effect for one of our channels: Facebook/Social. For this, we will set the conversion for Facebook/Social to 0% and then recalculate the total conversion probability. Now we have `0.48`. # MAGIC # MAGIC ```P(Conversion) = (0.2 X 0.0) + (0.2 X 0.0 X 0.1) + (0.8 X 0.6) + (0.8 X 0.4 X 0) +(0.8 X 0.4 X 0.0 X 0.1) = 0.48``` # MAGIC @@ -85,7 +85,7 @@ # MAGIC # MAGIC In this step, we will: # MAGIC 1. Import libraries -# MAGIC 2. Run the utils notebook to gain acces to the get_params function +# MAGIC 2. Run the utils notebook to gain access to the get_params function # MAGIC 3. get_params and store the relevant values in variables # MAGIC 4. Set the current database so that it doesn't need to be manually specified each time it's used @@ -141,7 +141,7 @@ # MAGIC %md # MAGIC ## Step 2: Construct the Transition Probability Matrix # MAGIC -# MAGIC As discussed above, the transition probability matrix contains the probablities associated with moving from one state to another state. This is calculated using the data from all customer journeys. +# MAGIC As discussed above, the transition probability matrix contains the probabilities associated with moving from one state to another state. This is calculated using the data from all customer journeys. # MAGIC # MAGIC In this step, we will: # MAGIC 1. Define a user-defined function (UDF), `get_transition_array`, that takes a customer journey and enumerates each of the corresponding channel transitions diff --git a/05_spend_optimization.py b/05_spend_optimization.py index ca6eb9f..f878a1e 100644 --- a/05_spend_optimization.py +++ b/05_spend_optimization.py @@ -196,11 +196,11 @@ # COMMAND ---------- -base_converion_rate_pd = spark.table("base_conversion_rate").toPandas() +base_conversion_rate_pd = spark.table("base_conversion_rate").toPandas() pie, ax = plt.subplots(figsize=[20,9]) -labels = base_converion_rate_pd['interaction_type'] -plt.pie(x=base_converion_rate_pd['count'], autopct="%.1f%%", explode=[0.05]*2, labels=labels, pctdistance=0.5) +labels = base_conversion_rate_pd['interaction_type'] +plt.pie(x=base_conversion_rate_pd['count'], autopct="%.1f%%", explode=[0.05]*2, labels=labels, pctdistance=0.5) plt.title("Base Conversion Rate"); # COMMAND ---------- @@ -292,14 +292,14 @@ cpa_summary_pd = spark.table("cpa_summary").toPandas() pt = sns.catplot(x='channel', y='CPA_in_Dollars',hue='attribution_model',data=cpa_summary_pd, kind='bar', aspect=4, ci=None) -plt.title("Cost of Aquisition by Channel") +plt.title("Cost of Acquisition by Channel") pt.fig.set_figwidth(20) pt.fig.set_figheight(9) plt.tick_params(labelsize=15) plt.ylabel("CPA in $") plt.xlabel("Channels") -plt.title("Channel Cost per Aquisition"); +plt.title("Channel Cost per Acquisition"); # COMMAND ---------- diff --git a/LICENSE b/LICENSE index 0f9eba2..8bf56c3 100644 --- a/LICENSE +++ b/LICENSE @@ -7,7 +7,7 @@ to an Agreement (defined below) between Licensee (defined below) and Databricks, Software shall be deemed part of the Downloadable Services under the Agreement, or if the Agreement does not define Downloadable Services, Subscription Services, or if neither are defined then the term in such Agreement that refers to the applicable Databricks Platform Services (as defined below) shall be substituted herein for “Downloadable Services.” Licensee's use of the Software must comply at -all times with any restrictions applicable to the Downlodable Services and Subscription Services, generally, and must be used in +all times with any restrictions applicable to the Downloadable Services and Subscription Services, generally, and must be used in accordance with any applicable documentation. For the avoidance of doubt, the Software constitutes Databricks Confidential Information under the Agreement.