typo

databricks-industry-solutions · Mar 1, 2023 · 584cff0 · 584cff0
1 parent 4ce4def
commit 584cff0
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 17 deletions.
diff --git a/01_intro.py b/01_intro.py
@@ -18,7 +18,7 @@
 # MAGIC 
 # MAGIC * Broadly speaking, heuristic methods are rule-based and consist of both `single-touch` and `multi-touch` approaches. Single-touch methods, such as `first-touch` and `last-touch`, assign credit to the first channel, or the last channel, associated with a conversion. Multi-touch methods, such as `linear` and `time-decay`, assign credit to multiple channels associated with a conversion. In the case of linear, credit is assigned uniformly across all channels, whereas for time-decay, an increasing amount of credit is assigned to the channels that appear closer in time to the conversion event.
 # MAGIC 
-# MAGIC * In contrast to heuristic methods, data-driven methods determine assignment using probabilites and statistics. Examples of data-driven methods include `Markov Chains` and `SHAP`. In this series of notebooks, we cover the use of Markov Chains and include a comparison to a few heuristic methods.
+# MAGIC * In contrast to heuristic methods, data-driven methods determine assignment using probabilities and statistics. Examples of data-driven methods include `Markov Chains` and `SHAP`. In this series of notebooks, we cover the use of Markov Chains and include a comparison to a few heuristic methods.
 
 # COMMAND ----------
 
@@ -78,7 +78,7 @@
 # MAGIC 
 # MAGIC * In the following sections, you will generate this synthetic dataset and then process it using Structured Streaming. You will then apply additional transformations so that it is suitable to use with Markov Chains.
 # MAGIC 
-# MAGIC * **Note:** Default settings are used to generate this data set. Aftering working through this series of notebooks for the first time, you may want to customize these settings for additional exploration. Please note that if you do so, commentary in the notebooks may not line up with the newly generated data.
+# MAGIC * **Note:** Default settings are used to generate this data set. After working through this series of notebooks for the first time, you may want to customize these settings for additional exploration. Please note that if you do so, commentary in the notebooks may not line up with the newly generated data.
 
 # COMMAND ----------
 

diff --git a/02_load_data.py b/02_load_data.py
@@ -17,7 +17,7 @@
 # MAGIC ### In this notebook you:
 # MAGIC * Use `Databricks Autoloader` to import the ad impression and conversion data generated in the notebook `01_intro`.
 # MAGIC * Write the data out in `Delta` format.
-# MAGIC * Create a database and table for easy access and querability.
+# MAGIC * Create a database and table for easy access and queryability.
 
 # COMMAND ----------
 
@@ -135,7 +135,7 @@
 # MAGIC %md
 # MAGIC ## Step 3: Write Data to Delta Lake
 # MAGIC 
-# MAGIC In this section of the solution accelerator, we write our data out to [Delta Lake](https://delta.io/) and then create a table (and database) for easy access and querability.
+# MAGIC In this section of the solution accelerator, we write our data out to [Delta Lake](https://delta.io/) and then create a table (and database) for easy access and queryability.
 # MAGIC 
 # MAGIC * Delta Lake is an open-source project that enables building a **Lakehouse architecture** on top of existing storage systems such as S3, ADLS, GCS, and HDFS.
 # MAGIC    * Information on the **Lakehouse Architecture** can be found in this [paper](http://cidrdb.org/cidr2021/papers/cidr2021_paper17.pdf) that was presented at [CIDR 2021](http://cidrdb.org/cidr2021/index.html) and in this [video](https://www.youtube.com/watch?v=RU2dXoVU8hY)

diff --git a/03_prep_data.py b/03_prep_data.py
@@ -182,7 +182,7 @@
 # MAGIC 
 # MAGIC * In practice, Z-ordering is most suitable for high-cardinality columns that you frequently want to filter on.
 # MAGIC 
-# MAGIC * Please note that the data set we are using here is relatively small and Z-ordering is likely unncessary. It has been included, however, for illustration purposes.
+# MAGIC * Please note that the data set we are using here is relatively small and Z-ordering is likely unnecessary. It has been included, however, for illustration purposes.
 
 # COMMAND ----------
 
@@ -293,7 +293,7 @@
 # COMMAND ----------
 
 # MAGIC %md
-# MAGIC ##### Example 2: Propogate updates made to the gold_user_journey table to the gold_attribution table
+# MAGIC ##### Example 2: Propagate updates made to the gold_user_journey table to the gold_attribution table
 
 # COMMAND ----------
 

diff --git a/04_markov_chains.py b/04_markov_chains.py
@@ -33,15 +33,15 @@
 # MAGIC * Heuristic-based attribution methods like first-touch, last-touch, and linear are relatively easy to implement but are less accurate than data-driven methods. With marketing dollars at stake, data-driven methods are highly recommended.
 # MAGIC 
 # MAGIC * There are three steps to take when using Markov Chains to calculate attribution:
-# MAGIC   * Step 1: Construct a transition probablity matrix
+# MAGIC   * Step 1: Construct a transition probability matrix
 # MAGIC   * Step 2: Calculate the total conversion probability
 # MAGIC   * Step 3: Use the removal effect to calculate attribution
 # MAGIC   
 # MAGIC * As the name suggests, a transition probability matrix is a matrix that contains the probabilities associated with moving from one state to another state. This is calculated using the data from all available customer journeys. With this matrix in place, we can then easily calculate the total conversion probability, which represents, on average, the likelihood that a given user will experience a conversion event. Lastly, we use the total conversion probability as an input for calculating the removal effect for each channel. The way that the removal effect is calculated is best illustrated with an example.
 # MAGIC 
 # MAGIC **An Example**
 # MAGIC 
-# MAGIC In the image below, we have a transition probability graph that shows the probabilty of going from one state to another state. In the context of a customer journey, states can be non-terminal (viewing an impression on a given channel) or terminal (conversion, no conversion).
+# MAGIC In the image below, we have a transition probability graph that shows the probability of going from one state to another state. In the context of a customer journey, states can be non-terminal (viewing an impression on a given channel) or terminal (conversion, no conversion).
 # MAGIC 
 # MAGIC <div style="text-align: left">
 # MAGIC   <img src="https://cme-solution-accelerators-images.s3.us-west-2.amazonaws.com/multi-touch-attribution/mta-dag-1.png"; width="60%">
@@ -61,7 +61,7 @@
 # MAGIC 
 # MAGIC ```P(Conversion) = (0.2 X 0.8) + (0.2 X 0.2 X 0.1) + (0.8 X 0.6) + (0.8 X 0.4 X 0.8) + (0.8 X 0.4 X 0.2 X 0.1)  = 0.90```
 # MAGIC 
-# MAGIC Now, let's calculate the removal effect for one of our channels: Facebook/Social. For this, we will set the conversion for Facebook/Social to 0% and then recalculate the total conversion probabilty. Now we have `0.48`.
+# MAGIC Now, let's calculate the removal effect for one of our channels: Facebook/Social. For this, we will set the conversion for Facebook/Social to 0% and then recalculate the total conversion probability. Now we have `0.48`.
 # MAGIC 
 # MAGIC ```P(Conversion) = (0.2 X 0.0) + (0.2 X 0.0 X 0.1) + (0.8 X 0.6) + (0.8 X 0.4 X 0) +(0.8 X 0.4 X 0.0 X 0.1)  = 0.48```
 # MAGIC 
@@ -85,7 +85,7 @@
 # MAGIC 
 # MAGIC In this step, we will:
 # MAGIC 1. Import libraries
-# MAGIC 2. Run the utils notebook to gain acces to the get_params function
+# MAGIC 2. Run the utils notebook to gain access to the get_params function
 # MAGIC 3. get_params and store the relevant values in variables
 # MAGIC 4. Set the current database so that it doesn't need to be manually specified each time it's used
 
@@ -141,7 +141,7 @@
 # MAGIC %md
 # MAGIC ## Step 2: Construct the Transition Probability Matrix
 # MAGIC 
-# MAGIC As discussed above, the transition probability matrix contains the probablities associated with moving from one state to another state. This is calculated using the data from all customer journeys.
+# MAGIC As discussed above, the transition probability matrix contains the probabilities associated with moving from one state to another state. This is calculated using the data from all customer journeys.
 # MAGIC 
 # MAGIC In this step, we will:
 # MAGIC 1. Define a user-defined function (UDF), `get_transition_array`, that takes a customer journey and enumerates each of the corresponding channel transitions

diff --git a/05_spend_optimization.py b/05_spend_optimization.py
@@ -196,11 +196,11 @@
 
 # COMMAND ----------
 
-base_converion_rate_pd = spark.table("base_conversion_rate").toPandas()
+base_conversion_rate_pd = spark.table("base_conversion_rate").toPandas()
 
 pie, ax = plt.subplots(figsize=[20,9])
-labels = base_converion_rate_pd['interaction_type']
-plt.pie(x=base_converion_rate_pd['count'], autopct="%.1f%%", explode=[0.05]*2, labels=labels, pctdistance=0.5)
+labels = base_conversion_rate_pd['interaction_type']
+plt.pie(x=base_conversion_rate_pd['count'], autopct="%.1f%%", explode=[0.05]*2, labels=labels, pctdistance=0.5)
 plt.title("Base Conversion Rate");
 
 # COMMAND ----------
@@ -292,14 +292,14 @@
 cpa_summary_pd = spark.table("cpa_summary").toPandas()
 
 pt = sns.catplot(x='channel', y='CPA_in_Dollars',hue='attribution_model',data=cpa_summary_pd, kind='bar', aspect=4, ci=None)
-plt.title("Cost of Aquisition by Channel")
+plt.title("Cost of Acquisition by Channel")
 pt.fig.set_figwidth(20)
 pt.fig.set_figheight(9)
 
 plt.tick_params(labelsize=15)
 plt.ylabel("CPA in $")
 plt.xlabel("Channels")
-plt.title("Channel Cost per Aquisition");
+plt.title("Channel Cost per Acquisition");
 
 # COMMAND ----------
 

diff --git a/LICENSE b/LICENSE
@@ -7,7 +7,7 @@ to an Agreement (defined below) between Licensee (defined below) and Databricks,
 Software shall be deemed part of the Downloadable Services under the Agreement, or if the Agreement does not define Downloadable Services,
 Subscription Services, or if neither are defined then the term in such Agreement that refers to the applicable Databricks Platform
 Services (as defined below) shall be substituted herein for “Downloadable Services.”  Licensee's use of the Software must comply at
-all times with any restrictions applicable to the Downlodable Services and Subscription Services, generally, and must be used in
+all times with any restrictions applicable to the Downloadable Services and Subscription Services, generally, and must be used in
 accordance with any applicable documentation. For the avoidance of doubt, the Software constitutes Databricks Confidential Information
 under the Agreement.