diff --git a/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr b/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr index 19b73603a76ba..1b8dac823a0a1 100644 --- a/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr +++ b/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr @@ -59,24 +59,24 @@ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons WHERE ifNull(equals(persons.`properties___$some_prop`, 'something'), 0) ORDER BY persons.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) INTERSECT ( - (SELECT persons.id AS id - FROM - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$another_prop'), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$another_prop`, - person.id AS id - FROM person - WHERE and(equals(person.team_id, 99999), in(id, - (SELECT where_optimization.id AS id - FROM person AS where_optimization - WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, '$another_prop'), ''), 'null'), '^"|"$', ''), 'something'), 0))))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons - WHERE ifNull(equals(persons.`properties___$another_prop`, 'something'), 0) - ORDER BY persons.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')))) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) INTERSECT ( + (SELECT persons.id AS id + FROM + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$another_prop'), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$another_prop`, + person.id AS id + FROM person + WHERE and(equals(person.team_id, 99999), in(id, + (SELECT where_optimization.id AS id + FROM person AS where_optimization + WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, '$another_prop'), ''), 'null'), '^"|"$', ''), 'something'), 0))))) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons + WHERE ifNull(equals(persons.`properties___$another_prop`, 'something'), 0) + ORDER BY persons.id ASC + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')))) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- # name: TestCohort.test_cohortpeople_with_not_in_cohort_operator @@ -137,9 +137,9 @@ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons WHERE ifNull(equals(persons.`properties___$some_prop`, 'something1'), 0) ORDER BY persons.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- # name: TestCohort.test_cohortpeople_with_not_in_cohort_operator.2 @@ -236,8 +236,8 @@ WHERE and(equals(e.team_id, 99999), greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp.999999', 6, 'UTC')), equals(e.event, '$pageview'))) GROUP BY actor_id) AS source ORDER BY source.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) EXCEPT ((( (SELECT persons.id AS id FROM @@ -252,9 +252,9 @@ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons WHERE ifNull(equals(persons.`properties___$some_prop`, 'something1'), 0) ORDER BY persons.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto'))))) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto'))))) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- # name: TestCohort.test_cohortpeople_with_not_in_cohort_operator_and_no_precalculation @@ -393,9 +393,9 @@ or isNotNull(fromUnixTimestamp(0))))) GROUP BY actor_id) AS source ORDER BY source.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- # name: TestCohort.test_cohortpeople_with_not_in_cohort_operator_for_behavioural_cohorts.2 @@ -478,8 +478,8 @@ WHERE and(equals(e.team_id, 99999), greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp.999999', 6, 'UTC')), equals(e.event, '$pageview'))) GROUP BY actor_id) AS source ORDER BY source.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) EXCEPT ((( (SELECT source.id AS id FROM @@ -508,9 +508,9 @@ or isNotNull(fromUnixTimestamp(0))))) GROUP BY actor_id) AS source ORDER BY source.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto'))))) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto'))))) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- # name: TestCohort.test_static_cohort_precalculated diff --git a/ee/clickhouse/models/test/test_cohort.py b/ee/clickhouse/models/test/test_cohort.py index 81bc271f57f7a..5a16d253a2e3d 100644 --- a/ee/clickhouse/models/test/test_cohort.py +++ b/ee/clickhouse/models/test/test_cohort.py @@ -1,10 +1,12 @@ from datetime import datetime, timedelta +import re from typing import Optional from django.utils import timezone from freezegun import freeze_time from posthog.client import sync_execute +from posthog.hogql.constants import MAX_SELECT_COHORT_CALCULATION_LIMIT from posthog.hogql.hogql import HogQLContext from posthog.models.action import Action from posthog.models.cohort import Cohort, get_and_update_pending_version @@ -68,22 +70,29 @@ def get_person_ids_by_cohort_id( return [str(row[0]) for row in results] -def calculate_cohort_hogql_test_harness(cohort: Cohort, pending_version: int): - version = pending_version * 2 + 2 - cohort.calculate_people_ch(version) - query = f""" - SELECT count() FROM - (SELECT person_id FROM cohortpeople as cp WHERE cp.version = {version} and cp.cohort_id = {cohort.pk}) as cp1 - FULL OUTER JOIN (SELECT person_id FROM cohortpeople as cp WHERE cp.version = {version - 1} and cp.cohort_id = {cohort.pk}) as cp2 - ON cp1.person_id = cp2.person_id - WHERE empty(cp1.person_id) or empty(cp2.person_id) - """ - result = sync_execute(query) - assert 0 == result[0][0] - return version - - class TestCohort(ClickhouseTestMixin, BaseTest): + def calculate_cohort_hogql_test_harness(self, cohort: Cohort, pending_version: int): + version = pending_version * 2 + 2 + + with self.capture_queries_startswith(("INSERT", "insert")) as queries: + cohort.calculate_people_ch(version) + + query = f""" + SELECT count() FROM + (SELECT person_id FROM cohortpeople as cp WHERE cp.version = {version} and cp.cohort_id = {cohort.pk}) as cp1 + FULL OUTER JOIN (SELECT person_id FROM cohortpeople as cp WHERE cp.version = {version - 1} and cp.cohort_id = {cohort.pk}) as cp2 + ON cp1.person_id = cp2.person_id + WHERE empty(cp1.person_id) or empty(cp2.person_id) + """ + result = sync_execute(query) + assert 0 == result[0][0] + for query in queries: + if "LIMIT" in query: + assert all( + limit == str(MAX_SELECT_COHORT_CALCULATION_LIMIT) for limit in re.findall(r"LIMIT (\d+)", query) + ) + return version + def _get_cohortpeople(self, cohort: Cohort, *, team_id: Optional[int] = None): team_id = team_id or cohort.team_id return sync_execute( @@ -532,7 +541,7 @@ def test_insert_by_distinct_id_or_email(self): self.assertEqual(results, 3) #  If we accidentally call calculate_people it shouldn't erase people - calculate_cohort_hogql_test_harness(cohort, 0) + self.calculate_cohort_hogql_test_harness(cohort, 0) results = get_person_ids_by_cohort_id(self.team.pk, cohort.id) self.assertEqual(len(results), 3) @@ -571,7 +580,7 @@ def test_cohortpeople_basic(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) results = self._get_cohortpeople(cohort1) self.assertEqual(len(results), 2) @@ -607,13 +616,13 @@ def test_cohortpeople_action_basic(self): ) cohort1 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 1}], name="cohort1") - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) results = self._get_cohortpeople(cohort1) self.assertEqual(len(results), 2) cohort2 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 1}], name="cohort2") - calculate_cohort_hogql_test_harness(cohort2, 0) + self.calculate_cohort_hogql_test_harness(cohort2, 0) results = self._get_cohortpeople(cohort2) self.assertEqual(len(results), 2) @@ -699,7 +708,7 @@ def test_cohortpeople_action_count(self): groups=[{"action_id": action.pk, "days": 3, "count": 2, "count_operator": "gte"}], name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) results = self._get_cohortpeople(cohort1) self.assertEqual(len(results), 2) @@ -709,7 +718,7 @@ def test_cohortpeople_action_count(self): groups=[{"action_id": action.pk, "days": 3, "count": 1, "count_operator": "lte"}], name="cohort2", ) - calculate_cohort_hogql_test_harness(cohort2, 0) + self.calculate_cohort_hogql_test_harness(cohort2, 0) results = self._get_cohortpeople(cohort2) self.assertEqual(len(results), 1) @@ -719,7 +728,7 @@ def test_cohortpeople_action_count(self): groups=[{"action_id": action.pk, "days": 3, "count": 1, "count_operator": "eq"}], name="cohort3", ) - calculate_cohort_hogql_test_harness(cohort3, 0) + self.calculate_cohort_hogql_test_harness(cohort3, 0) results = self._get_cohortpeople(cohort3) self.assertEqual(len(results), 1) @@ -753,9 +762,9 @@ def test_cohortpeople_deleted_person(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) p2.delete() - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) def test_cohortpeople_prop_changed(self): with freeze_time((datetime.now() - timedelta(days=3)).strftime("%Y-%m-%d")): @@ -791,14 +800,14 @@ def test_cohortpeople_prop_changed(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) with freeze_time((datetime.now() - timedelta(days=2)).strftime("%Y-%m-%d")): p2.version = 1 p2.properties = {"$some_prop": "another", "$another_prop": "another"} p2.save() - calculate_cohort_hogql_test_harness(cohort1, 1) + self.calculate_cohort_hogql_test_harness(cohort1, 1) results = self._get_cohortpeople(cohort1) @@ -833,7 +842,7 @@ def test_cohort_change(self): ], name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) results = self._get_cohortpeople(cohort1) self.assertEqual(len(results), 1) @@ -849,7 +858,7 @@ def test_cohort_change(self): ] cohort1.save() - calculate_cohort_hogql_test_harness(cohort1, 1) + self.calculate_cohort_hogql_test_harness(cohort1, 1) results = self._get_cohortpeople(cohort1) self.assertEqual(len(results), 1) @@ -866,7 +875,7 @@ def test_static_cohort_precalculated(self): cohort = Cohort.objects.create(team=self.team, groups=[], is_static=True, last_calculation=timezone.now()) cohort.insert_users_by_list(["1", "123"]) - calculate_cohort_hogql_test_harness(cohort, 0) + self.calculate_cohort_hogql_test_harness(cohort, 0) with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): sql, _ = format_filter_query(cohort, 0, HogQLContext(team_id=self.team.pk)) @@ -881,7 +890,7 @@ def test_cohortpeople_with_valid_other_cohort_filter(self): groups=[{"properties": [{"key": "foo", "value": "bar", "type": "person"}]}], name="cohort0", ) - calculate_cohort_hogql_test_harness(cohort0, 0) + self.calculate_cohort_hogql_test_harness(cohort0, 0) cohort1: Cohort = Cohort.objects.create( team=self.team, @@ -889,7 +898,7 @@ def test_cohortpeople_with_valid_other_cohort_filter(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) res = self._get_cohortpeople(cohort1) self.assertEqual(len(res), 1) @@ -929,7 +938,7 @@ def test_cohortpeople_with_not_in_cohort_operator(self): groups=[{"properties": [{"key": "$some_prop", "value": "something1", "type": "person"}]}], name="cohort0", ) - calculate_cohort_hogql_test_harness(cohort0, 0) + self.calculate_cohort_hogql_test_harness(cohort0, 0) cohort1 = Cohort.objects.create( team=self.team, @@ -958,7 +967,7 @@ def test_cohortpeople_with_not_in_cohort_operator(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): filter = Filter( @@ -1125,7 +1134,7 @@ def test_cohortpeople_with_not_in_cohort_operator_for_behavioural_cohorts(self): ], name="cohort0", ) - calculate_cohort_hogql_test_harness(cohort0, 0) + self.calculate_cohort_hogql_test_harness(cohort0, 0) cohort1 = Cohort.objects.create( team=self.team, @@ -1154,7 +1163,7 @@ def test_cohortpeople_with_not_in_cohort_operator_for_behavioural_cohorts(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): filter = Filter( @@ -1186,7 +1195,7 @@ def test_cohortpeople_with_nonexistent_other_cohort_filter(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) res = self._get_cohortpeople(cohort1) self.assertEqual(len(res), 0) @@ -1198,7 +1207,7 @@ def test_clickhouse_empty_query(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort2, 0) + self.calculate_cohort_hogql_test_harness(cohort2, 0) self.assertFalse(Cohort.objects.get().is_calculating) def test_query_with_multiple_new_style_cohorts(self): @@ -1337,7 +1346,7 @@ def test_query_with_multiple_new_style_cohorts(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) result = self._get_cohortpeople(cohort1) self.assertCountEqual([p1.uuid, p3.uuid], [r[0] for r in result]) @@ -1365,7 +1374,7 @@ def test_update_cohort(self): name="cohort1", ) - calculate_cohort_hogql_test_harness(cohort1, 0) + self.calculate_cohort_hogql_test_harness(cohort1, 0) # Should only have p1 in this cohort results = self._get_cohortpeople(cohort1) @@ -1373,7 +1382,7 @@ def test_update_cohort(self): cohort1.groups = [{"properties": [{"key": "$another_prop", "value": "something", "type": "person"}]}] cohort1.save() - calculate_cohort_hogql_test_harness(cohort1, 1) + self.calculate_cohort_hogql_test_harness(cohort1, 1) # Should only have p2, p3 in this cohort results = self._get_cohortpeople(cohort1) @@ -1381,7 +1390,7 @@ def test_update_cohort(self): cohort1.groups = [{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}] cohort1.save() - calculate_cohort_hogql_test_harness(cohort1, 2) + self.calculate_cohort_hogql_test_harness(cohort1, 2) # Should only have p1 again in this cohort results = self._get_cohortpeople(cohort1) @@ -1411,7 +1420,7 @@ def test_cohort_versioning(self): name="cohort1", ) - version = calculate_cohort_hogql_test_harness(cohort1, 5) + version = self.calculate_cohort_hogql_test_harness(cohort1, 5) cohort1.pending_version = version cohort1.version = version @@ -1454,7 +1463,7 @@ def test_calculate_people_ch_in_multiteam_project(self): name="shared cohort", ) # Calculate cohort - calculate_cohort_hogql_test_harness(shared_cohort, 0) + self.calculate_cohort_hogql_test_harness(shared_cohort, 0) # Verify shared_cohort is now calculated for both teams results_team1 = self._get_cohortpeople(shared_cohort, team_id=self.team.pk) diff --git a/ee/clickhouse/queries/test/test_cohort_query.py b/ee/clickhouse/queries/test/test_cohort_query.py index 4d8ab911f1b38..228424c11195c 100644 --- a/ee/clickhouse/queries/test/test_cohort_query.py +++ b/ee/clickhouse/queries/test/test_cohort_query.py @@ -61,6 +61,7 @@ def execute(filter: Filter, team: Team): q, params = cohort_query.get_query() res = sync_execute(q, {**params, **filter.hogql_context.values}) unittest.TestCase().assertCountEqual(res, cohort_query.hogql_result.results) + assert ["id"] == cohort_query.hogql_result.columns return res, q, params @@ -3417,3 +3418,85 @@ def test_type_misalignment(self): res, q, params = execute(filter, self.team) assert 1 == len(res) + + def test_project_properties(self): + PropertyDefinition.objects.create( + team=self.team, + name="bool_key", + property_type="Boolean", + type=PropertyDefinition.Type.EVENT, + ) + + other_team = Team.objects.create(organization=self.organization, project=self.project) + + action = Action.objects.create( + team=self.team, + name="action", + steps_json=[ + { + "event": "$pageview", + "properties": [ + { + "key": "bool_key", + "operator": "exact", + "value": ["true"], + "type": "event", + } + ], + } + ], + ) + + cohort = Cohort.objects.create( + team=self.team, + name="cohort", + is_static=False, + filters={ + "properties": { + "type": "OR", + "values": [ + { + "type": "OR", + "values": [ + { + "key": action.pk, + "type": "behavioral", + "value": "performed_event", + "negation": False, + "event_type": "actions", + "time_value": "30", + "time_interval": "day", + } + ], + } + ], + } + }, + ) + + filter_data = { + "properties": { + "type": "OR", + "values": [{"key": "id", "value": cohort.pk, "type": "cohort"}], + } + } + + cohort_query1 = CohortQuery( + filter=Filter( + data=filter_data, + team=self.team, + ), + team=self.team, + ) + cohort_query2 = CohortQuery( + filter=Filter( + data=filter_data, + team=other_team, + ), + team=other_team, + ) + + assert ( + cohort_query1.clickhouse_query.replace(f"team_id, {self.team.pk}", f"team_id, {str(other_team.pk)}") + == cohort_query2.clickhouse_query + ) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f84c970c7f153..c0c5a5919b7af 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -63,7 +63,7 @@ importers: version: 2.29.0(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0) eslint-plugin-jest: specifier: ^28.6.0 - version: 28.6.0(@typescript-eslint/eslint-plugin@7.1.1(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(jest@29.7.0)(typescript@4.9.5) + version: 28.6.0(@typescript-eslint/eslint-plugin@7.1.1(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(jest@29.7.0(@types/node@18.18.4)(ts-node@10.9.1(@swc/core@1.10.14(@swc/helpers@0.5.15))(@types/node@18.18.4)(typescript@4.9.5)))(typescript@4.9.5) eslint-plugin-posthog: specifier: workspace:* version: link:common/eslint_rules @@ -96,7 +96,7 @@ importers: version: 4.3.0(stylelint@15.11.0(typescript@4.9.5)) stylelint-config-standard-scss: specifier: ^11.1.0 - version: 11.1.0(postcss@8.5.2)(stylelint@15.11.0(typescript@4.9.5)) + version: 11.1.0(postcss@8.4.31)(stylelint@15.11.0(typescript@4.9.5)) stylelint-order: specifier: ^6.0.3 version: 6.0.3(stylelint@15.11.0(typescript@4.9.5)) @@ -979,7 +979,7 @@ importers: version: 2.29.0(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0) eslint-plugin-jest: specifier: ^28.6.0 - version: 28.6.0(@typescript-eslint/eslint-plugin@7.1.1(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(jest@29.7.0)(typescript@4.9.5) + version: 28.6.0(@typescript-eslint/eslint-plugin@7.1.1(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(jest@29.7.0(@types/node@18.18.4)(ts-node@10.9.1(@swc/core@1.10.14(@swc/helpers@0.5.15))(@types/node@18.18.4)(typescript@4.9.5)))(typescript@4.9.5) eslint-plugin-posthog: specifier: workspace:* version: link:../common/eslint_rules @@ -22489,7 +22489,7 @@ snapshots: loader-utils: 2.0.4 make-dir: 3.1.0 schema-utils: 2.7.1 - webpack: 5.88.2(@swc/core@1.10.14(@swc/helpers@0.5.15))(esbuild@0.18.20)(webpack-cli@5.1.4) + webpack: 5.88.2 babel-loader@9.1.3(@babel/core@7.26.0)(webpack@5.88.2): dependencies: @@ -23544,7 +23544,7 @@ snapshots: postcss-value-parser: 4.2.0 schema-utils: 2.7.1 semver: 6.3.1 - webpack: 5.88.2(@swc/core@1.10.14(@swc/helpers@0.5.15))(esbuild@0.18.20)(webpack-cli@5.1.4) + webpack: 5.88.2 css-loader@6.8.1(webpack@5.88.2): dependencies: @@ -24757,7 +24757,7 @@ snapshots: eslint-import-resolver-node@0.3.9: dependencies: - debug: 3.2.7(supports-color@8.1.1) + debug: 3.2.7(supports-color@5.5.0) is-core-module: 2.13.1 resolve: 1.22.8 transitivePeerDependencies: @@ -24765,7 +24765,7 @@ snapshots: eslint-module-utils@2.8.0(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint-import-resolver-node@0.3.9)(eslint@8.57.0): dependencies: - debug: 3.2.7(supports-color@8.1.1) + debug: 3.2.7(supports-color@5.5.0) optionalDependencies: '@typescript-eslint/parser': 7.1.1(eslint@8.57.0)(typescript@4.9.5) eslint: 8.57.0 @@ -24807,7 +24807,7 @@ snapshots: array.prototype.findlastindex: 1.2.3 array.prototype.flat: 1.3.2 array.prototype.flatmap: 1.3.2 - debug: 3.2.7(supports-color@8.1.1) + debug: 3.2.7(supports-color@5.5.0) doctrine: 2.1.0 eslint: 8.57.0 eslint-import-resolver-node: 0.3.9 @@ -24828,7 +24828,7 @@ snapshots: - eslint-import-resolver-webpack - supports-color - eslint-plugin-jest@28.6.0(@typescript-eslint/eslint-plugin@7.1.1(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(jest@29.7.0)(typescript@4.9.5): + eslint-plugin-jest@28.6.0(@typescript-eslint/eslint-plugin@7.1.1(@typescript-eslint/parser@7.1.1(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(typescript@4.9.5))(eslint@8.57.0)(jest@29.7.0(@types/node@18.18.4)(ts-node@10.9.1(@swc/core@1.10.14(@swc/helpers@0.5.15))(@types/node@18.18.4)(typescript@4.9.5)))(typescript@4.9.5): dependencies: '@typescript-eslint/utils': 7.1.1(eslint@8.57.0)(typescript@4.9.5) eslint: 8.57.0 @@ -25272,7 +25272,7 @@ snapshots: dependencies: loader-utils: 2.0.4 schema-utils: 3.3.0 - webpack: 5.88.2(@swc/core@1.10.14(@swc/helpers@0.5.15))(esbuild@0.18.20)(webpack-cli@5.1.4) + webpack: 5.88.2 file-system-cache@2.3.0: dependencies: @@ -29597,7 +29597,7 @@ snapshots: postcss: 8.5.2 schema-utils: 3.3.0 semver: 7.7.0 - webpack: 5.88.2(@swc/core@1.10.14(@swc/helpers@0.5.15))(esbuild@0.18.20)(webpack-cli@5.1.4) + webpack: 5.88.2 postcss-logical@8.0.0(postcss@8.5.2): dependencies: @@ -29861,6 +29861,10 @@ snapshots: dependencies: postcss: 8.4.31 + postcss-scss@4.0.9(postcss@8.4.31): + dependencies: + postcss: 8.4.31 + postcss-scss@4.0.9(postcss@8.5.2): dependencies: postcss: 8.5.2 @@ -31110,7 +31114,7 @@ snapshots: neo-async: 2.6.2 schema-utils: 3.3.0 semver: 7.7.0 - webpack: 5.88.2(@swc/core@1.10.14(@swc/helpers@0.5.15))(esbuild@0.18.20)(webpack-cli@5.1.4) + webpack: 5.88.2 optionalDependencies: sass: 1.56.0 @@ -31733,7 +31737,7 @@ snapshots: dependencies: loader-utils: 2.0.4 schema-utils: 3.3.0 - webpack: 5.88.2(@swc/core@1.10.14(@swc/helpers@0.5.15))(esbuild@0.18.20)(webpack-cli@5.1.4) + webpack: 5.88.2 style-loader@3.3.3(webpack@5.88.2): dependencies: @@ -31752,6 +31756,15 @@ snapshots: stylelint: 15.11.0(typescript@4.9.5) stylelint-order: 6.0.3(stylelint@15.11.0(typescript@4.9.5)) + stylelint-config-recommended-scss@13.1.0(postcss@8.4.31)(stylelint@15.11.0(typescript@4.9.5)): + dependencies: + postcss-scss: 4.0.9(postcss@8.4.31) + stylelint: 15.11.0(typescript@4.9.5) + stylelint-config-recommended: 13.0.0(stylelint@15.11.0(typescript@4.9.5)) + stylelint-scss: 5.3.1(stylelint@15.11.0(typescript@4.9.5)) + optionalDependencies: + postcss: 8.4.31 + stylelint-config-recommended-scss@13.1.0(postcss@8.5.2)(stylelint@15.11.0(typescript@4.9.5)): dependencies: postcss-scss: 4.0.9(postcss@8.5.2) @@ -31765,6 +31778,14 @@ snapshots: dependencies: stylelint: 15.11.0(typescript@4.9.5) + stylelint-config-standard-scss@11.1.0(postcss@8.4.31)(stylelint@15.11.0(typescript@4.9.5)): + dependencies: + stylelint: 15.11.0(typescript@4.9.5) + stylelint-config-recommended-scss: 13.1.0(postcss@8.4.31)(stylelint@15.11.0(typescript@4.9.5)) + stylelint-config-standard: 34.0.0(stylelint@15.11.0(typescript@4.9.5)) + optionalDependencies: + postcss: 8.4.31 + stylelint-config-standard-scss@11.1.0(postcss@8.5.2)(stylelint@15.11.0(typescript@4.9.5)): dependencies: stylelint: 15.11.0(typescript@4.9.5) diff --git a/posthog/api/test/__snapshots__/test_cohort.ambr b/posthog/api/test/__snapshots__/test_cohort.ambr index 4032bd1884f6a..7793e8529b382 100644 --- a/posthog/api/test/__snapshots__/test_cohort.ambr +++ b/posthog/api/test/__snapshots__/test_cohort.ambr @@ -152,8 +152,8 @@ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons WHERE ifNull(equals(persons.`properties___$some_prop`, 'something'), 0) ORDER BY persons.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) UNION DISTINCT ( (SELECT source.id AS id FROM @@ -177,9 +177,9 @@ WHERE and(equals(e.team_id, 99999), greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp.999999', 6, 'UTC')), equals(e.event, '$pageview'))) GROUP BY actor_id) AS source ORDER BY source.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto'))) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto'))) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- # name: TestCohort.test_async_deletion_of_cohort.5 @@ -273,8 +273,8 @@ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons WHERE ifNull(equals(persons.`properties___$some_prop`, 'something'), 0) ORDER BY persons.id ASC - LIMIT 100 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) as person SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto')) as person SETTINGS optimize_aggregation_in_order = 1, + join_algorithm = 'auto' ''' # --- diff --git a/posthog/hogql/autocomplete.py b/posthog/hogql/autocomplete.py index 0b2586c242324..304488d764426 100644 --- a/posthog/hogql/autocomplete.py +++ b/posthog/hogql/autocomplete.py @@ -2,6 +2,9 @@ from copy import deepcopy from typing import Optional, cast from collections.abc import Callable + +from django.db.models.functions.comparison import Coalesce + from posthog.hogql.context import HogQLContext from posthog.hogql.database.database import HOGQL_CHARACTERS_TO_BE_WRAPPED, Database, create_hogql_database from posthog.hogql.database.models import ( @@ -41,6 +44,7 @@ ) from common.hogvm.python.stl import STL from common.hogvm.python.stl.bytecode import BYTECODE_STL +from django.db import models ALL_HOG_FUNCTIONS = sorted(list(STL.keys()) + list(BYTECODE_STL.keys())) MATCH_ANY_CHARACTER = "$$_POSTHOG_ANY_$$" @@ -564,9 +568,13 @@ def get_hogql_autocomplete( match_term = "" with timings.measure("property_filter"): - property_query = PropertyDefinition.objects.filter( + property_query = PropertyDefinition.objects.alias( + effective_project_id=Coalesce( + "project_id", "team_id", output_field=models.BigIntegerField() + ) + ).filter( + effective_project_id=context.team.project_id, # type: ignore name__contains=match_term, - team_id=team.pk, type=property_type, ) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 63e498c12aa31..ae49d1a71ef88 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -1,5 +1,6 @@ from typing import Literal, Optional, cast +from django.db.models.functions.comparison import Coalesce from pydantic import BaseModel from posthog.constants import ( @@ -48,6 +49,8 @@ from posthog.warehouse.models import DataWarehouseJoin from posthog.utils import get_from_dict_or_attr from django.db.models import Q +from django.db import models + from posthog.warehouse.models.util import get_view_or_table_by_name @@ -85,14 +88,18 @@ def _handle_bool_values(value: ValueT, expr: ast.Expr, property: Property, team: if value != "true" and value != "false": return value if property.type == "person": - property_types = PropertyDefinition.objects.filter( - team=team, + property_types = PropertyDefinition.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ).filter( + effective_project_id=team.project_id, # type: ignore name=property.key, type=PropertyDefinition.Type.PERSON, ) elif property.type == "group": - property_types = PropertyDefinition.objects.filter( - team=team, + property_types = PropertyDefinition.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ).filter( + effective_project_id=team.project_id, # type: ignore name=property.key, type=PropertyDefinition.Type.GROUP, group_type_index=property.group_type_index, @@ -132,8 +139,10 @@ def _handle_bool_values(value: ValueT, expr: ast.Expr, property: Property, team: return value else: - property_types = PropertyDefinition.objects.filter( - team=team, + property_types = PropertyDefinition.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ).filter( + effective_project_id=team.project_id, # type: ignore name=property.key, type=PropertyDefinition.Type.EVENT, ) diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 70b9b5ef88bcc..011b844da2446 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -285,258 +285,3 @@ def execute(self) -> HogQLQueryResponse: def execute_hogql_query(*args, **kwargs) -> HogQLQueryResponse: return HogQLQueryExecutor(*args, **kwargs).execute() - - -""" -import dataclasses -from typing import Optional, Union, cast - -from posthog.clickhouse.client.connection import Workload -from posthog.errors import ExposedCHQueryError -from posthog.hogql import ast -from posthog.hogql.constants import HogQLGlobalSettings, LimitContext, get_default_limit_for_context -from posthog.hogql.errors import ExposedHogQLError -from posthog.hogql.hogql import HogQLContext -from posthog.hogql.modifiers import create_default_modifiers_for_team -from posthog.hogql.parser import parse_select -from posthog.hogql.placeholders import replace_placeholders, find_placeholders -from posthog.hogql.printer import ( - prepare_ast_for_printing, - print_ast, - print_prepared_ast, -) -from posthog.hogql.filters import replace_filters -from posthog.hogql.timings import HogQLTimings -from posthog.hogql.variables import replace_variables -from posthog.hogql.visitor import clone_expr -from posthog.hogql.resolver_utils import extract_select_queries -from posthog.models.team import Team -from posthog.clickhouse.query_tagging import tag_queries -from posthog.hogql.database.database import create_hogql_database -from posthog.client import sync_execute -from posthog.schema import ( - HogQLQueryResponse, - HogQLFilters, - HogQLQueryModifiers, - HogQLMetadata, - HogQLMetadataResponse, - HogLanguage, - HogQLVariable, -) -from posthog.settings import HOGQL_INCREASED_MAX_EXECUTION_TIME - - -def execute_hogql_query( - query: Union[str, ast.SelectQuery, ast.SelectSetQuery], - team: Team, - *, - query_type: str = "hogql_query", - filters: Optional[HogQLFilters] = None, - placeholders: Optional[dict[str, ast.Expr]] = None, - variables: Optional[dict[str, HogQLVariable]] = None, - workload: Workload = Workload.DEFAULT, - settings: Optional[HogQLGlobalSettings] = None, - modifiers: Optional[HogQLQueryModifiers] = None, - limit_context: Optional[LimitContext] = LimitContext.QUERY, - timings: Optional[HogQLTimings] = None, - pretty: Optional[bool] = True, - context: Optional[HogQLContext] = None, -) -> HogQLQueryResponse: - if timings is None: - timings = HogQLTimings() - - if context is None: - context = HogQLContext(team_id=team.pk, timings=timings) - with timings.measure("create_hogql_database"): - context.database = create_hogql_database(team.pk, modifiers, team_arg=team, timings=timings) - - query_modifiers = create_default_modifiers_for_team(team, modifiers) - debug = modifiers is not None and modifiers.debug - error: Optional[str] = None - explain: Optional[list[str]] = None - results = None - types = None - metadata: Optional[HogQLMetadataResponse] = None - - with timings.measure("query"): - if isinstance(query, ast.SelectQuery) or isinstance(query, ast.SelectSetQuery): - select_query = query - query = None - else: - select_query = parse_select(str(query), timings=timings) - - with timings.measure("variables"): - if variables and len(variables.keys()) > 0: - select_query = replace_variables(node=select_query, variables=list(variables.values()), team=team) - - with timings.measure("replace_placeholders"): - placeholders_in_query = find_placeholders(select_query) - placeholders = placeholders or {} - - if "filters" in placeholders and filters is not None: - raise ValueError( - f"Query contains 'filters' placeholder, yet filters are also provided as a standalone query parameter." - ) - if "filters" in placeholders_in_query or any( - placeholder and placeholder.startswith("filters.") for placeholder in placeholders_in_query - ): - select_query = replace_filters(select_query, filters, team) - - leftover_placeholders: list[str] = [] - for placeholder in placeholders_in_query: - if placeholder is None: - raise ValueError("Placeholder expressions are not yet supported") - if placeholder != "filters" and not placeholder.startswith("filters."): - leftover_placeholders.append(placeholder) - - placeholders_in_query = leftover_placeholders - - if len(placeholders_in_query) > 0: - if len(placeholders) == 0: - raise ValueError( - f"Query contains placeholders, but none were provided. Placeholders in query: {', '.join(s for s in placeholders_in_query if s is not None)}" - ) - select_query = replace_placeholders(select_query, placeholders) - - with timings.measure("max_limit"): - for one_query in extract_select_queries(select_query): - if one_query.limit is None: - one_query.limit = ast.Constant(value=get_default_limit_for_context(limit_context)) - - # Get printed HogQL query, and returned columns. Using a cloned query. - with timings.measure("hogql"): - with timings.measure("prepare_ast"): - hogql_query_context = dataclasses.replace( - context, - # set the team.pk here so someone can't pass a context for a different team 🤷‍️ - team_id=team.pk, - team=team, - enable_select_queries=True, - timings=timings, - modifiers=query_modifiers, - ) - - with timings.measure("clone"): - cloned_query = clone_expr(select_query, True) - select_query_hogql = cast( - ast.SelectQuery, - prepare_ast_for_printing(node=cloned_query, context=hogql_query_context, dialect="hogql"), - ) - - with timings.measure("print_ast"): - hogql = print_prepared_ast( - select_query_hogql, hogql_query_context, "hogql", pretty=pretty if pretty is not None else True - ) - print_columns = [] - columns_query = ( - next(extract_select_queries(select_query_hogql)) - if isinstance(select_query_hogql, ast.SelectSetQuery) - else select_query_hogql - ) - for node in columns_query.select: - if isinstance(node, ast.Alias): - print_columns.append(node.alias) - else: - print_columns.append( - print_prepared_ast( - node=node, - context=hogql_query_context, - dialect="hogql", - stack=[select_query_hogql], - ) - ) - - settings = settings or HogQLGlobalSettings() - if limit_context in (LimitContext.EXPORT, LimitContext.COHORT_CALCULATION, LimitContext.QUERY_ASYNC): - settings.max_execution_time = HOGQL_INCREASED_MAX_EXECUTION_TIME - - # Print the ClickHouse SQL query - with timings.measure("print_ast"): - try: - clickhouse_context = dataclasses.replace( - context, - # set the team.pk here so someone can't pass a context for a different team 🤷‍️ - team_id=team.pk, - team=team, - enable_select_queries=True, - timings=timings, - modifiers=query_modifiers, - ) - clickhouse_sql = print_ast( - select_query, - context=clickhouse_context, - dialect="clickhouse", - settings=settings, - pretty=pretty if pretty is not None else True, - ) - except Exception as e: - if debug: - clickhouse_sql = None - if isinstance(e, ExposedCHQueryError | ExposedHogQLError): - error = str(e) - else: - error = "Unknown error" - else: - raise - - if clickhouse_sql is not None: - timings_dict = timings.to_dict() - with timings.measure("clickhouse_execute"): - tag_queries( - team_id=team.pk, - query_type=query_type, - has_joins="JOIN" in clickhouse_sql, - has_json_operations="JSONExtract" in clickhouse_sql or "JSONHas" in clickhouse_sql, - timings=timings_dict, - modifiers={k: v for k, v in modifiers.model_dump().items() if v is not None} if modifiers else {}, - ) - - try: - results, types = sync_execute( - clickhouse_sql, - clickhouse_context.values, - with_column_types=True, - workload=workload, - team_id=team.pk, - readonly=True, - ) - except Exception as e: - if debug: - results = [] - if isinstance(e, ExposedCHQueryError | ExposedHogQLError): - error = str(e) - else: - error = "Unknown error" - else: - raise - - if debug and error is None: # If the query errored, explain will fail as well. - with timings.measure("explain"): - explain_results = sync_execute( - f"EXPLAIN {clickhouse_sql}", - clickhouse_context.values, - with_column_types=True, - workload=workload, - team_id=team.pk, - readonly=True, - ) - explain = [str(r[0]) for r in explain_results[0]] - with timings.measure("metadata"): - from posthog.hogql.metadata import get_hogql_metadata - - metadata = get_hogql_metadata(HogQLMetadata(language=HogLanguage.HOG_QL, query=hogql, debug=True), team) - - return HogQLQueryResponse( - query=query, - hogql=hogql, - clickhouse=clickhouse_sql, - error=error, - timings=timings.to_list(), - results=results, - columns=print_columns, - types=types, - modifiers=query_modifiers, - explain=explain, - metadata=metadata, - ) -""" diff --git a/posthog/hogql/transforms/property_types.py b/posthog/hogql/transforms/property_types.py index e561607629f1f..ec952690994af 100644 --- a/posthog/hogql/transforms/property_types.py +++ b/posthog/hogql/transforms/property_types.py @@ -1,5 +1,7 @@ from typing import Literal, cast +from django.db.models.functions.comparison import Coalesce + from posthog.clickhouse.materialized_columns import ( MaterializedColumn, TablesWithMaterializedColumns, @@ -13,9 +15,11 @@ ) from posthog.hogql.escape_sql import escape_hogql_identifier from posthog.hogql.visitor import CloningVisitor, TraversingVisitor +from posthog.models import Team from posthog.models.property import PropertyName, TableColumn from posthog.schema import PersonsOnEventsMode from posthog.hogql.database.s3_table import S3Table +from django.db import models def build_property_swapper(node: ast.AST, context: HogQLContext) -> None: @@ -24,28 +28,41 @@ def build_property_swapper(node: ast.AST, context: HogQLContext) -> None: if not context or not context.team_id: return + if not context.team: + context.team = Team.objects.get(id=context.team_id) + + if not context.team: + return + # find all properties property_finder = PropertyFinder(context) property_finder.visit(node) - # fetch them event_property_values = ( - PropertyDefinition.objects.filter( + PropertyDefinition.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ) + .filter( + effective_project_id=context.team.project_id, # type: ignore name__in=property_finder.event_properties, - team_id=context.team_id, type__in=[None, PropertyDefinition.Type.EVENT], - ).values_list("name", "property_type") + ) + .values_list("name", "property_type") if property_finder.event_properties else [] ) event_properties = {name: property_type for name, property_type in event_property_values if property_type} person_property_values = ( - PropertyDefinition.objects.filter( + PropertyDefinition.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ) + .filter( + effective_project_id=context.team.project_id, # type: ignore name__in=property_finder.person_properties, - team_id=context.team_id, type=PropertyDefinition.Type.PERSON, - ).values_list("name", "property_type") + ) + .values_list("name", "property_type") if property_finder.person_properties else [] ) @@ -55,12 +72,18 @@ def build_property_swapper(node: ast.AST, context: HogQLContext) -> None: for group_id, properties in property_finder.group_properties.items(): if not properties: continue - group_property_values = PropertyDefinition.objects.filter( - name__in=properties, - team_id=context.team_id, - type=PropertyDefinition.Type.GROUP, - group_type_index=group_id, - ).values_list("name", "property_type") + group_property_values = ( + PropertyDefinition.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ) + .filter( + effective_project_id=context.team.project_id, # type: ignore + name__in=properties, + type=PropertyDefinition.Type.GROUP, + group_type_index=group_id, + ) + .values_list("name", "property_type") + ) group_properties.update( {f"{group_id}_{name}": property_type for name, property_type in group_property_values if property_type} ) diff --git a/posthog/hogql_queries/hogql_cohort_query.py b/posthog/hogql_queries/hogql_cohort_query.py index 4ec32c737a4f0..dea989827aded 100644 --- a/posthog/hogql_queries/hogql_cohort_query.py +++ b/posthog/hogql_queries/hogql_cohort_query.py @@ -12,6 +12,7 @@ from posthog.hogql.property import get_property_type from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner +from posthog.hogql_queries.events_query_runner import EventsQueryRunner from posthog.models import Filter, Cohort, Team, Property from posthog.models.property import PropertyGroup from posthog.queries.foss_cohort_query import ( @@ -22,6 +23,7 @@ ) from posthog.schema import ( ActorsQuery, + EventsQuery, InsightActorsQuery, TrendsQuery, DateRange, @@ -51,7 +53,7 @@ class TestWrapperCohortQuery(CohortQuery): def __init__(self, filter: Filter, team: Team): cohort_query = CohortQuery(filter=filter, team=team) hogql_cohort_query = HogQLCohortQuery(cohort_query=cohort_query) - # hogql_query = hogql_cohort_query.query_str("hogql") + self.clickhouse_query = hogql_cohort_query.query_str("clickhouse") self.hogql_result = execute_hogql_query(hogql_cohort_query.get_query(), team) super().__init__(filter=filter, team=team) @@ -156,30 +158,32 @@ def get_performed_event_condition(self, prop: Property, first_time: bool = False def get_performed_event_multiple(self, prop: Property) -> ast.SelectQuery: count = parse_and_validate_positive_integer(prop.operator_value, "operator_value") - # either an action or an event - series: list[Union[EventsNode, ActionsNode]] + + if prop.explicit_datetime: + date_from = prop.explicit_datetime + else: + date_value = parse_and_validate_positive_integer(prop.time_value, "time_value") + date_interval = validate_interval(prop.time_interval) + date_from = f"-{date_value}{date_interval[:1]}" + + events_query = EventsQuery(after=date_from, select=["person_id", "count()"]) if prop.event_type == "events": - series = [EventsNode(event=prop.key)] * (count + 1) + events_query.event = prop.key elif prop.event_type == "actions": - series = [ActionsNode(id=int(prop.key))] * (count + 1) + events_query.actionId = int(prop.key) else: raise ValueError(f"Event type must be 'events' or 'actions'") - funnelStep: Optional[int] = None - - funnelCustomSteps: Optional[list[int]] = None - if prop.operator == "gte": - funnelStep = count + events_query.where = [f"count() >= {count}"] elif prop.operator == "lte": - funnelCustomSteps = list(range(1, count + 1)) + events_query.where = [f"count() <= {count}"] elif prop.operator == "gt": - funnelStep = count + 1 + events_query.where = [f"count() > {count}"] elif prop.operator == "lt": - funnelCustomSteps = list(range(1, count)) + events_query.where = [f"count() < {count}"] elif prop.operator == "eq" or prop.operator == "exact" or prop.operator is None: # type: ignore[comparison-overlap] - # People who dropped out at count + 1 - funnelStep = -(count + 1) + events_query.where = [f"count() = {count}"] else: raise ValidationError("count_operator must be gt(e), lt(e), exact, or None") @@ -190,25 +194,12 @@ def get_performed_event_multiple(self, prop: Property) -> ast.SelectQuery: if isinstance(property, PropertyGroup): raise ValidationError("Property groups are not supported in this behavioral cohort type") typed_properties.append(property_to_typed_property(property)) - for serie in series: - serie.properties = typed_properties + events_query.properties = typed_properties - if prop.explicit_datetime: - date_from = prop.explicit_datetime - else: - date_value = parse_and_validate_positive_integer(prop.time_value, "time_value") - date_interval = validate_interval(prop.time_interval) - date_from = f"-{date_value}{date_interval[:1]}" - - funnel_query = FunnelsQuery( - series=series, - dateRange=DateRange(date_from=date_from), - funnelsFilter=FunnelsFilter( - funnelWindowInterval=12 * 50, funnelWindowIntervalUnit=FunnelConversionWindowTimeUnit.MONTH - ), - ) - return self._actors_query_from_source( - FunnelsActorsQuery(source=funnel_query, funnelStep=funnelStep, funnelCustomSteps=funnelCustomSteps) + events_query_runner = EventsQueryRunner(team=self.team, query=events_query) + return cast( + ast.SelectQuery, + parse_select("select person_id as id from {event_query}", {"event_query": events_query_runner.to_query()}), ) def get_performed_event_sequence(self, prop: Property) -> ast.SelectQuery: @@ -376,7 +367,7 @@ def get_static_cohort_condition(self, prop: Property) -> ast.SelectQuery: return cast( ast.SelectQuery, parse_select( - f"SELECT person_id FROM static_cohort_people WHERE cohort_id = {cohort.pk} AND team_id = {self.team.pk}", + f"SELECT person_id as id FROM static_cohort_people WHERE cohort_id = {cohort.pk} AND team_id = {self.team.pk}", ), ) @@ -459,9 +450,9 @@ def build_conditions( subsequent_select_queries=[ SelectSetNode( select_query=query, - set_operator="UNION DISTINCT" - if all_negated - else ("EXCEPT" if negation else "INTERSECT"), + set_operator=( + "UNION DISTINCT" if all_negated else ("EXCEPT" if negation else "INTERSECT") + ), ) for (query, negation) in queries[1:] ], diff --git a/posthog/models/cohort/cohort.py b/posthog/models/cohort/cohort.py index 418b1f90ad634..642d091aa96f7 100644 --- a/posthog/models/cohort/cohort.py +++ b/posthog/models/cohort/cohort.py @@ -2,6 +2,7 @@ from datetime import datetime from typing import Any, Literal, Optional, Union, cast +import posthoganalytics import structlog from django.conf import settings from django.db import connection, models @@ -261,19 +262,22 @@ def fn(): fn() return - # Jan 29 2025 - Temporarily commented out because of celery load issues - return - - # try: - # fn() - # except Exception: - # logger.exception( - # "cohort_hogql_calculation_failed", - # id=self.pk, - # current_version=self.version, - # new_version=pending_version, - # exc_info=True, - # ) + if posthoganalytics.feature_enabled( + "enable_hogql_cohort_calculation", + str(self.team.organization_id), + groups={"organization": str(self.team.organization_id)}, + group_properties={"organization": {"id": str(self.team.organization_id)}}, + ): + try: + fn() + except Exception: + logger.exception( + "cohort_hogql_calculation_failed", + id=self.pk, + current_version=self.version, + new_version=pending_version, + exc_info=True, + ) def insert_users_by_list(self, items: list[str], *, team_id: Optional[int] = None) -> None: """ diff --git a/posthog/models/cohort/util.py b/posthog/models/cohort/util.py index 074668b2942aa..292bd847627e7 100644 --- a/posthog/models/cohort/util.py +++ b/posthog/models/cohort/util.py @@ -178,9 +178,9 @@ def get_entity_query( action=action, prepend="_{}_action".format(group_idx), hogql_context=hogql_context, - person_properties_mode=person_properties_mode - if person_properties_mode - else PersonPropertiesMode.USING_SUBQUERY, + person_properties_mode=( + person_properties_mode if person_properties_mode else PersonPropertiesMode.USING_SUBQUERY + ), ) return action_filter_query, action_params else: @@ -396,7 +396,7 @@ def _recalculate_cohortpeople_for_team_hogql( query=query, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_JOINED), team=team, - limit_context=LimitContext.QUERY_ASYNC, + limit_context=LimitContext.COHORT_CALCULATION, ).generate_clickhouse_sql() cohort_params = hogql_context.values