Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mock CRM data #19

Merged
merged 1 commit into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*venv*
*.ipynb_checkpoints*
.vscode
28 changes: 28 additions & 0 deletions crm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# CRM sample data

This sample data is designed to work like Mathesar's internal CRM

## Usage

## Loading data

The CRM sample data is in `generated_data.sql`.

Load it into your local Mathesar development environment like so:

```
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < generated_data.sql
```

## Modifying and re-generating the data

1. Make adjustments as necessary in `source_Data`. Data in here is generated with a mix if manual grunt work, LLM wizardry, and text-based massaging.

1. Run the script:

```
./generate.py
```

This regenerates the `generated_data.sql` file from the source data.

126 changes: 126 additions & 0 deletions crm/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env python3

import json
import yaml
import os
import csv
import io

SOURCE_DATA_PATH = "source_data"
CONTACTS_PATH = os.path.join(SOURCE_DATA_PATH, "contacts.json")
INTERACTIONS_PATH = os.path.join(SOURCE_DATA_PATH, "interactions")
TEMPLATE_PATH = os.path.join(SOURCE_DATA_PATH, "template.sql")
OUTPUT_PATH = "generated_data.sql"


def read_file(file):
with open(file, "r") as f:
return f.read()


def parse_markdown(content):
if not content.startswith("---"):
return {"frontmatter": {}, "content": content}
_, frontmatter, content = content.split("---", 2)
return {"frontmatter": yaml.safe_load(frontmatter), "content": content.strip()}


def clean_cell(cell):
if cell is None:
return r"\N"
if isinstance(cell, str):
return cell.replace("\n", r"\n")
return cell


def tsv(rows):
csv_data = io.StringIO()
writer = csv.writer(csv_data, delimiter="\t", lineterminator="\n")
cleaned_rows = [[clean_cell(cell) for cell in row] for row in rows]
writer.writerows(cleaned_rows)
return csv_data.getvalue().strip()


contacts = json.loads(read_file(CONTACTS_PATH))
website_types = {w["type"] for c in contacts for w in c["websites"]}
website_ids = {type: id for id, type in enumerate(website_types, start=1)}
tags = {t for c in contacts for t in c["tags"]}
tag_ids = {tag: id for id, tag in enumerate(tags, start=1)}


def get_contact_rows():
for contact in contacts:
yield [
contact["id"],
contact["full_name"],
contact["informal_name"],
contact["notes"],
]


def get_email_rows():
id = 0
for contact in contacts:
is_primary = True
for email in contact["emails"]:
id += 1
yield [id, contact["id"], email["address"], is_primary, email["source"]]
is_primary = False


def get_website_type_rows():
for type, id in website_ids.items():
yield [id, type]


def get_website_rows():
id = 0
for contact in contacts:
for website in contact["websites"]:
id += 1
type_id = website_ids[website["type"]]
yield [id, contact["id"], website["url"], type_id]


def get_interaction_rows():
for id, filename in enumerate(os.listdir(INTERACTIONS_PATH), start=1):
if not filename.endswith(".md"):
continue
path = os.path.join(INTERACTIONS_PATH, filename)
interaction = parse_markdown(read_file(path))
yield [
id,
interaction["frontmatter"]["contact"],
interaction["frontmatter"]["date"],
interaction["frontmatter"]["subject"],
interaction["content"],
]


def get_tag_rows():
for tag, id in tag_ids.items():
yield [id, tag]


def get_contact_tag_rows():
id = 0
for contact in contacts:
for tag in contact["tags"]:
id += 1
yield [id, contact["id"], tag_ids[tag]]


sql = (
read_file(TEMPLATE_PATH)
.replace("__CONTACT_TSV_DATA__", tsv(get_contact_rows()))
.replace("__EMAIL_TSV_DATA__", tsv(get_email_rows()))
.replace("__WEBSITE_TYPE_TSV_DATA__", tsv(get_website_type_rows()))
.replace("__WEBSITE_TSV_DATA__", tsv(get_website_rows()))
.replace("__INTERACTION_TSV_DATA__", tsv(get_interaction_rows()))
.replace("__TAG_TSV_DATA__", tsv(get_tag_rows()))
.replace("__CONTACT_TAG_TSV_DATA__", tsv(get_contact_tag_rows()))
)


with open(OUTPUT_PATH, "w") as f:
f.write(sql)
279 changes: 279 additions & 0 deletions crm/generated_data.sql

Large diffs are not rendered by default.

Loading
Loading