From b02f1dd9540df0e5a8c407efb9af37406ee4123e Mon Sep 17 00:00:00 2001
From: OksanaGolovina <85133534+OksanaGolovina@users.noreply.github.com>
Date: Sat, 25 Sep 2021 22:01:02 +0300
Subject: [PATCH] Add files via upload
---
Yapiki_publ/Public_solution_KI_YP.ipynb | 3330 +++++++++++++++++++++++
Yapiki_publ/Read me.txt | 39 +
Yapiki_publ/city_population.rar | Bin 0 -> 5238874 bytes
Yapiki_publ/zarplaty.xlsx | Bin 0 -> 13684 bytes
4 files changed, 3369 insertions(+)
create mode 100644 Yapiki_publ/Public_solution_KI_YP.ipynb
create mode 100644 Yapiki_publ/Read me.txt
create mode 100644 Yapiki_publ/city_population.rar
create mode 100644 Yapiki_publ/zarplaty.xlsx
diff --git a/Yapiki_publ/Public_solution_KI_YP.ipynb b/Yapiki_publ/Public_solution_KI_YP.ipynb
new file mode 100644
index 0000000..196a42e
--- /dev/null
+++ b/Yapiki_publ/Public_solution_KI_YP.ipynb
@@ -0,0 +1,3330 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "id": "78b262fa"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import typing\n",
+ "import torch\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML\n",
+ "from lightautoml.tasks import Task\n",
+ "\n",
+ "import phik\n",
+ "from phik.report import plot_correlation_matrix\n",
+ "from phik import report"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "id": "5114ddf7"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((279792, 77), (2974, 76))"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_data = pd.read_csv('data/train.csv')\n",
+ "test_data = pd.read_csv('data/test.csv')\n",
+ "train_data.shape, test_data.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## EDA analisys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Анализ отчет pandas_profiling (! big size file) https://drive.google.com/file/d/1xQl3LvpX9J0G6gJoaBjzRcBFKZi6QZXz/view?usp=sharing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for col in train_data.select_dtypes(include=np.number).columns:\n",
+ " train_data[col] = pd.to_numeric(train_data[col], downcast = 'unsigned')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " lat | \n",
+ " lng | \n",
+ " osm_amenity_points_in_0.001 | \n",
+ " osm_amenity_points_in_0.005 | \n",
+ " osm_amenity_points_in_0.0075 | \n",
+ " osm_amenity_points_in_0.01 | \n",
+ " osm_building_points_in_0.001 | \n",
+ " osm_building_points_in_0.005 | \n",
+ " osm_building_points_in_0.0075 | \n",
+ " osm_building_points_in_0.01 | \n",
+ " ... | \n",
+ " reform_count_of_houses_500 | \n",
+ " reform_house_population_1000 | \n",
+ " reform_house_population_500 | \n",
+ " reform_mean_floor_count_1000 | \n",
+ " reform_mean_floor_count_500 | \n",
+ " reform_mean_year_building_1000 | \n",
+ " reform_mean_year_building_500 | \n",
+ " total_square | \n",
+ " realty_type | \n",
+ " price_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " ... | \n",
+ " 279792.000000 | \n",
+ " 265196.000000 | \n",
+ " 252558.000000 | \n",
+ " 263084.000000 | \n",
+ " 249624.000000 | \n",
+ " 263553.000000 | \n",
+ " 250155.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ " 279792.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 54.364078 | \n",
+ " 47.763540 | \n",
+ " 2.709084 | \n",
+ " 40.605146 | \n",
+ " 81.596171 | \n",
+ " 133.285458 | \n",
+ " 0.037442 | \n",
+ " 0.885701 | \n",
+ " 2.046467 | \n",
+ " 3.748163 | \n",
+ " ... | \n",
+ " 30.110661 | \n",
+ " 2042.541716 | \n",
+ " 644.610557 | \n",
+ " 7.051233 | \n",
+ " 7.360464 | \n",
+ " 1967.532599 | \n",
+ " 1967.988580 | \n",
+ " 507.833604 | \n",
+ " 54.974088 | \n",
+ " 0.016058 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 4.245713 | \n",
+ " 17.044625 | \n",
+ " 4.202451 | \n",
+ " 53.293388 | \n",
+ " 105.193169 | \n",
+ " 172.290136 | \n",
+ " 0.391014 | \n",
+ " 6.858338 | \n",
+ " 14.801566 | \n",
+ " 25.679859 | \n",
+ " ... | \n",
+ " 27.686234 | \n",
+ " 1359.884747 | \n",
+ " 445.699329 | \n",
+ " 3.542084 | \n",
+ " 4.231369 | \n",
+ " 45.807699 | \n",
+ " 54.110015 | \n",
+ " 1704.251771 | \n",
+ " 47.856417 | \n",
+ " 0.125700 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 42.651897 | \n",
+ " 19.892178 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 5.100000 | \n",
+ " 10.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 53.226600 | \n",
+ " 37.582988 | \n",
+ " 0.000000 | \n",
+ " 7.000000 | \n",
+ " 16.000000 | \n",
+ " 28.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 10.000000 | \n",
+ " 932.000000 | \n",
+ " 290.000000 | \n",
+ " 4.591837 | \n",
+ " 4.619959 | \n",
+ " 1960.070000 | \n",
+ " 1959.890097 | \n",
+ " 65.900000 | \n",
+ " 10.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 55.679090 | \n",
+ " 39.702435 | \n",
+ " 1.000000 | \n",
+ " 22.000000 | \n",
+ " 46.000000 | \n",
+ " 77.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 25.000000 | \n",
+ " 1949.000000 | \n",
+ " 602.000000 | \n",
+ " 6.368932 | \n",
+ " 6.395349 | \n",
+ " 1970.890411 | \n",
+ " 1971.647059 | \n",
+ " 128.737034 | \n",
+ " 10.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 56.306976 | \n",
+ " 55.957523 | \n",
+ " 4.000000 | \n",
+ " 51.000000 | \n",
+ " 101.000000 | \n",
+ " 164.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 2.000000 | \n",
+ " ... | \n",
+ " 43.000000 | \n",
+ " 2978.000000 | \n",
+ " 936.000000 | \n",
+ " 8.698925 | \n",
+ " 9.100000 | \n",
+ " 1983.701754 | \n",
+ " 1986.950000 | \n",
+ " 336.000000 | \n",
+ " 110.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 69.500740 | \n",
+ " 151.777000 | \n",
+ " 46.000000 | \n",
+ " 468.000000 | \n",
+ " 851.000000 | \n",
+ " 1392.000000 | \n",
+ " 30.000000 | \n",
+ " 586.000000 | \n",
+ " 949.000000 | \n",
+ " 1162.000000 | \n",
+ " ... | \n",
+ " 289.000000 | \n",
+ " 18392.000000 | \n",
+ " 6105.000000 | \n",
+ " 53.717949 | \n",
+ " 221.666667 | \n",
+ " 2019.000000 | \n",
+ " 2020.000000 | \n",
+ " 40000.000000 | \n",
+ " 110.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8 rows × 70 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lat lng osm_amenity_points_in_0.001 \\\n",
+ "count 279792.000000 279792.000000 279792.000000 \n",
+ "mean 54.364078 47.763540 2.709084 \n",
+ "std 4.245713 17.044625 4.202451 \n",
+ "min 42.651897 19.892178 0.000000 \n",
+ "25% 53.226600 37.582988 0.000000 \n",
+ "50% 55.679090 39.702435 1.000000 \n",
+ "75% 56.306976 55.957523 4.000000 \n",
+ "max 69.500740 151.777000 46.000000 \n",
+ "\n",
+ " osm_amenity_points_in_0.005 osm_amenity_points_in_0.0075 \\\n",
+ "count 279792.000000 279792.000000 \n",
+ "mean 40.605146 81.596171 \n",
+ "std 53.293388 105.193169 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 7.000000 16.000000 \n",
+ "50% 22.000000 46.000000 \n",
+ "75% 51.000000 101.000000 \n",
+ "max 468.000000 851.000000 \n",
+ "\n",
+ " osm_amenity_points_in_0.01 osm_building_points_in_0.001 \\\n",
+ "count 279792.000000 279792.000000 \n",
+ "mean 133.285458 0.037442 \n",
+ "std 172.290136 0.391014 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 28.000000 0.000000 \n",
+ "50% 77.000000 0.000000 \n",
+ "75% 164.000000 0.000000 \n",
+ "max 1392.000000 30.000000 \n",
+ "\n",
+ " osm_building_points_in_0.005 osm_building_points_in_0.0075 \\\n",
+ "count 279792.000000 279792.000000 \n",
+ "mean 0.885701 2.046467 \n",
+ "std 6.858338 14.801566 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 0.000000 0.000000 \n",
+ "50% 0.000000 0.000000 \n",
+ "75% 0.000000 1.000000 \n",
+ "max 586.000000 949.000000 \n",
+ "\n",
+ " osm_building_points_in_0.01 ... reform_count_of_houses_500 \\\n",
+ "count 279792.000000 ... 279792.000000 \n",
+ "mean 3.748163 ... 30.110661 \n",
+ "std 25.679859 ... 27.686234 \n",
+ "min 0.000000 ... 0.000000 \n",
+ "25% 0.000000 ... 10.000000 \n",
+ "50% 0.000000 ... 25.000000 \n",
+ "75% 2.000000 ... 43.000000 \n",
+ "max 1162.000000 ... 289.000000 \n",
+ "\n",
+ " reform_house_population_1000 reform_house_population_500 \\\n",
+ "count 265196.000000 252558.000000 \n",
+ "mean 2042.541716 644.610557 \n",
+ "std 1359.884747 445.699329 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 932.000000 290.000000 \n",
+ "50% 1949.000000 602.000000 \n",
+ "75% 2978.000000 936.000000 \n",
+ "max 18392.000000 6105.000000 \n",
+ "\n",
+ " reform_mean_floor_count_1000 reform_mean_floor_count_500 \\\n",
+ "count 263084.000000 249624.000000 \n",
+ "mean 7.051233 7.360464 \n",
+ "std 3.542084 4.231369 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 4.591837 4.619959 \n",
+ "50% 6.368932 6.395349 \n",
+ "75% 8.698925 9.100000 \n",
+ "max 53.717949 221.666667 \n",
+ "\n",
+ " reform_mean_year_building_1000 reform_mean_year_building_500 \\\n",
+ "count 263553.000000 250155.000000 \n",
+ "mean 1967.532599 1967.988580 \n",
+ "std 45.807699 54.110015 \n",
+ "min 1.000000 1.000000 \n",
+ "25% 1960.070000 1959.890097 \n",
+ "50% 1970.890411 1971.647059 \n",
+ "75% 1983.701754 1986.950000 \n",
+ "max 2019.000000 2020.000000 \n",
+ "\n",
+ " total_square realty_type price_type \n",
+ "count 279792.000000 279792.000000 279792.000000 \n",
+ "mean 507.833604 54.974088 0.016058 \n",
+ "std 1704.251771 47.856417 0.125700 \n",
+ "min 5.100000 10.000000 0.000000 \n",
+ "25% 65.900000 10.000000 0.000000 \n",
+ "50% 128.737034 10.000000 0.000000 \n",
+ "75% 336.000000 110.000000 0.000000 \n",
+ "max 40000.000000 110.000000 1.000000 \n",
+ "\n",
+ "[8 rows x 70 columns]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_data.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def check_features(df):\n",
+ " return pd.DataFrame({'unique_values': df.nunique(),'type': df.dtypes,'pct_missing': df.isna().sum()/len(df) * 100}).sort_values(by = 'pct_missing', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " floor | \n",
+ " reform_mean_floor_count_500 | \n",
+ " reform_mean_year_building_500 | \n",
+ " reform_house_population_500 | \n",
+ " reform_mean_floor_count_1000 | \n",
+ " reform_mean_year_building_1000 | \n",
+ " reform_house_population_1000 | \n",
+ " street | \n",
+ " osm_city_nearest_population | \n",
+ " region | \n",
+ " ... | \n",
+ " osm_finance_points_in_0.005 | \n",
+ " osm_finance_points_in_0.001 | \n",
+ " osm_culture_points_in_0.01 | \n",
+ " osm_culture_points_in_0.0075 | \n",
+ " osm_culture_points_in_0.005 | \n",
+ " osm_culture_points_in_0.001 | \n",
+ " osm_crossing_points_in_0.01 | \n",
+ " osm_crossing_points_in_0.0075 | \n",
+ " osm_crossing_points_in_0.005 | \n",
+ " price_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " unique_values | \n",
+ " 206 | \n",
+ " 17121 | \n",
+ " 43143 | \n",
+ " 2366 | \n",
+ " 49017 | \n",
+ " 76044 | \n",
+ " 6206 | \n",
+ " 28841 | \n",
+ " 169 | \n",
+ " 49 | \n",
+ " ... | \n",
+ " 29 | \n",
+ " 7 | \n",
+ " 216 | \n",
+ " 159 | \n",
+ " 111 | \n",
+ " 16 | \n",
+ " 268 | \n",
+ " 191 | \n",
+ " 108 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " type | \n",
+ " object | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " object | \n",
+ " float64 | \n",
+ " object | \n",
+ " ... | \n",
+ " uint8 | \n",
+ " uint8 | \n",
+ " uint16 | \n",
+ " uint16 | \n",
+ " uint8 | \n",
+ " uint8 | \n",
+ " uint16 | \n",
+ " uint8 | \n",
+ " uint8 | \n",
+ " uint8 | \n",
+ "
\n",
+ " \n",
+ " pct_missing | \n",
+ " 62.9886 | \n",
+ " 10.7823 | \n",
+ " 10.5925 | \n",
+ " 9.73366 | \n",
+ " 5.97158 | \n",
+ " 5.80395 | \n",
+ " 5.21673 | \n",
+ " 0.573998 | \n",
+ " 0.0196575 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 77 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " floor reform_mean_floor_count_500 \\\n",
+ "unique_values 206 17121 \n",
+ "type object float64 \n",
+ "pct_missing 62.9886 10.7823 \n",
+ "\n",
+ " reform_mean_year_building_500 reform_house_population_500 \\\n",
+ "unique_values 43143 2366 \n",
+ "type float64 float64 \n",
+ "pct_missing 10.5925 9.73366 \n",
+ "\n",
+ " reform_mean_floor_count_1000 reform_mean_year_building_1000 \\\n",
+ "unique_values 49017 76044 \n",
+ "type float64 float64 \n",
+ "pct_missing 5.97158 5.80395 \n",
+ "\n",
+ " reform_house_population_1000 street \\\n",
+ "unique_values 6206 28841 \n",
+ "type float64 object \n",
+ "pct_missing 5.21673 0.573998 \n",
+ "\n",
+ " osm_city_nearest_population region ... \\\n",
+ "unique_values 169 49 ... \n",
+ "type float64 object ... \n",
+ "pct_missing 0.0196575 0 ... \n",
+ "\n",
+ " osm_finance_points_in_0.005 osm_finance_points_in_0.001 \\\n",
+ "unique_values 29 7 \n",
+ "type uint8 uint8 \n",
+ "pct_missing 0 0 \n",
+ "\n",
+ " osm_culture_points_in_0.01 osm_culture_points_in_0.0075 \\\n",
+ "unique_values 216 159 \n",
+ "type uint16 uint16 \n",
+ "pct_missing 0 0 \n",
+ "\n",
+ " osm_culture_points_in_0.005 osm_culture_points_in_0.001 \\\n",
+ "unique_values 111 16 \n",
+ "type uint8 uint8 \n",
+ "pct_missing 0 0 \n",
+ "\n",
+ " osm_crossing_points_in_0.01 osm_crossing_points_in_0.0075 \\\n",
+ "unique_values 268 191 \n",
+ "type uint16 uint8 \n",
+ "pct_missing 0 0 \n",
+ "\n",
+ " osm_crossing_points_in_0.005 price_type \n",
+ "unique_values 108 2 \n",
+ "type uint8 uint8 \n",
+ "pct_missing 0 0 \n",
+ "\n",
+ "[3 rows x 77 columns]"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "check_features(train_data).T"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([nan, 3.0, 4.0, -1.0, 1.0, 14.0, 2.0, 8.0, -2.0, 6.0, 10.0, 5.0,\n",
+ " 16.0, 19.0, 7.0, 9.0, 58.0, 24.0, 18.0, 26.0, 17.0, 48.0, 11.0,\n",
+ " -3.0, 15.0, 22.0, 60.0, 12.0, 21.0, 35.0, 28.0, 38.0, 39.0, 13.0,\n",
+ " 81.0, 44.0, 82.0, 25.0, 45.0, 47.0, 23.0, 37.0, 29.0, 113.0, 78.0,\n",
+ " 42.0, 69.0, 27.0, 46.0, 53.0, 80.0, 70.0, 76.0, 64.0, 30.0, 73.0,\n",
+ " 77.0, 52.0, 67.0, 65.0, 20.0, 40.0, 49.0, 75.0, 93.0, 94.0, 91.0,\n",
+ " 72.0, 79.0, 84.0, 92.0, 33.0, 66.0, 90.0, 31.0, 36.0, 61.0, 71.0,\n",
+ " 68.0, 51.0, 97.0, 43.0, 95.0, 85.0, 50.0, 0.0, 62.0, 54.0, 74.0,\n",
+ " 57.0, 41.0, 34.0, 59.0, 56.0, 123.0, 55.0, 83.0, '27.0', '1.0',\n",
+ " '5.0', '-1.0', '67.0', '2.0', '0.0', '4.0', '6.0', '3.0', '15.0',\n",
+ " '10.0', '11.0', '30.0', '12.0', '-2.0', '14.0', '36.0', '8.0',\n",
+ " '50.0', '17.0', '19.0', '37.0', '68.0', '7.0', '42.0', '9.0',\n",
+ " '16.0', '20.0', '53.0', '91.0', '84.0', '38.0', '21.0', '48.0',\n",
+ " '22.0', '23.0', '1', '18.0', 'подвал, 1', '2', 'подвал',\n",
+ " 'цоколь, 1', '1,2,антресоль', 'цоколь', '4', '5', 'тех.этаж (6)',\n",
+ " '3', 'Подвал', 'Цоколь', '10', 'фактически на уровне 1 этажа', '6',\n",
+ " '1,2,3', '1, подвал', '1,2,3,4', '1,2', '1,2,3,4,5', '5, мансарда',\n",
+ " '1-й, подвал', '12', '15', '13', '1, подвал, антресоль', 'мезонин',\n",
+ " 'подвал, 1-3', '8', '7', '1 (Цокольный этаж)',\n",
+ " '3, Мансарда (4 эт)', 'подвал,1', '1, антресоль', '1-3',\n",
+ " 'мансарда (4эт)', '1, 2.', '9', 'подвал , 1 ', '1, 2',\n",
+ " 'подвал, 1,2,3', '1 + подвал (без отделки)', 'мансарда', '2,3',\n",
+ " '4, 5', '1-й, 2-й', '18', '1 этаж, подвал', '1, цоколь',\n",
+ " 'подвал, 1-7, техэтаж', '3 (антресоль)', '1, 2, 3',\n",
+ " 'Цоколь, 1,2(мансарда)', 'подвал, 3. 4 этаж', 'подвал, 1-4 этаж',\n",
+ " 'подва, 1.2 этаж', '2, 3', '-1', '1.2', '11', '36', '7,8',\n",
+ " '1 этаж', '1-й', '3 этаж', '4 этаж', '5 этаж', 'подвал,1,2,3,4,5',\n",
+ " '29', 'подвал, цоколь, 1 этаж', '3, мансарда'], dtype=object)"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_data['floor'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Вручную обработали столбец с этажами\n",
+ "for df in [train_data, test_data]:\n",
+ " df.replace('1', 1, inplace=True)\n",
+ " df.replace('1.0', 1, inplace=True)\n",
+ "\n",
+ "train_data['floor'] = train_data.apply(lambda row: 1 if row['floor'] == 1 else 0, axis=1)\n",
+ "test_data['floor'] = test_data.apply(lambda row: 1 if row['floor'] == 1 else 0, axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# вручную удалила повторяющиеся признаки с разным метражом, оставив только значение \"in 0.01\" для osm и \"1000\" для reform, чтобы матрица корреляции помещалась на экране\n",
+ "columns_for_pearson = [\n",
+ " 'per_square_meter_price', \n",
+ " 'city', 'floor',\n",
+ " 'osm_amenity_points_in_0.01', \n",
+ " 'osm_building_points_in_0.01', \n",
+ " 'osm_catering_points_in_0.01', \n",
+ " 'osm_city_closest_dist', \n",
+ " 'osm_city_nearest_name', \n",
+ " 'osm_city_nearest_population',\n",
+ " 'osm_crossing_closest_dist', \n",
+ " 'osm_crossing_points_in_0.01', \n",
+ " 'osm_culture_points_in_0.01',\n",
+ " 'osm_healthcare_points_in_0.01', \n",
+ " 'osm_historic_points_in_0.01', \n",
+ " 'osm_hotels_points_in_0.01',\n",
+ " 'osm_leisure_points_in_0.01', \n",
+ " 'osm_offices_points_in_0.01', \n",
+ " 'osm_shops_points_in_0.01', \n",
+ " 'osm_subway_closest_dist',\n",
+ " 'osm_train_stop_closest_dist', \n",
+ " 'osm_train_stop_points_in_0.01', \n",
+ " 'osm_transport_stop_closest_dist',\n",
+ " 'osm_transport_stop_points_in_0.01', \n",
+ " 'reform_count_of_houses_1000', \n",
+ " 'reform_house_population_1000',\n",
+ " 'reform_mean_floor_count_1000', \n",
+ " 'reform_mean_year_building_1000', \n",
+ " 'region', \n",
+ " 'total_square', \n",
+ " 'street', \n",
+ " 'date', \n",
+ " 'realty_type', \n",
+ " 'price_type']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " pearson | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " per_square_meter_price | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " osm_city_nearest_population | \n",
+ " 0.55 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " osm_amenity_points_in_0.01 | \n",
+ " 0.48 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " osm_healthcare_points_in_0.01 | \n",
+ " 0.46 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " osm_catering_points_in_0.01 | \n",
+ " 0.46 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " osm_leisure_points_in_0.01 | \n",
+ " 0.46 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " osm_shops_points_in_0.01 | \n",
+ " 0.44 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " osm_transport_stop_points_in_0.01 | \n",
+ " 0.43 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " osm_crossing_points_in_0.01 | \n",
+ " 0.43 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " osm_offices_points_in_0.01 | \n",
+ " 0.42 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature pearson\n",
+ "0 per_square_meter_price 1.00\n",
+ "6 osm_city_nearest_population 0.55\n",
+ "2 osm_amenity_points_in_0.01 0.48\n",
+ "10 osm_healthcare_points_in_0.01 0.46\n",
+ "4 osm_catering_points_in_0.01 0.46\n",
+ "13 osm_leisure_points_in_0.01 0.46\n",
+ "15 osm_shops_points_in_0.01 0.44\n",
+ "20 osm_transport_stop_points_in_0.01 0.43\n",
+ "8 osm_crossing_points_in_0.01 0.43\n",
+ "14 osm_offices_points_in_0.01 0.42"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Находим признаки, имеющие максимальное значение коэффициента корреляции Пирсона с целевой переменной. \n",
+ "# В качестве отсечки выбрано значение 0.4 по модулю\n",
+ "pearson = train_data[columns_for_pearson].corr().round(2)\n",
+ "pearson_max_corr = (\n",
+ " pearson['per_square_meter_price'].to_frame().reset_index()\n",
+ " .rename(columns={'per_square_meter_price':'pearson', 'index':'feature'})\n",
+ " .sort_values(by='pearson', ascending=False)\n",
+ " .query('pearson > 0.4 or pearson < -0.4')\n",
+ " )\n",
+ "pearson_max_corr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# fig, ax = plt.subplots(figsize=(20, 20))\n",
+ "# sns.heatmap(train_data[columns_for_pearson].corr().round(2), annot=True, square=True, cmap='mako')\n",
+ "# ax.set_title(label = 'МАТРИЦА КОРРЕЛЯЦИИ ПРИЗНАКОВ $r$', fontdict={'fontsize': 15, 'fontweight': 'bold'})\n",
+ "# plt.show();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Высокая взаимная корреляция (> 0.8) у следующих переменных:\n",
+ "- amenity и catering, healthcare, office, shop\n",
+ "- catering и shop\n",
+ "- office и shop, catering\n",
+ "- healthcare и catering, office, shop,\n",
+ "- transport и crossing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# убрали city и street(закодирована)\n",
+ "\n",
+ "columns_for_phik = [\n",
+ " 'per_square_meter_price',\n",
+ " 'floor',\n",
+ " 'osm_amenity_points_in_0.01',\n",
+ " 'osm_building_points_in_0.01',\n",
+ " 'osm_catering_points_in_0.01',\n",
+ " 'osm_city_closest_dist',\n",
+ " 'osm_city_nearest_name',\n",
+ " 'osm_city_nearest_population',\n",
+ " 'osm_crossing_closest_dist',\n",
+ " 'osm_crossing_points_in_0.01',\n",
+ " 'osm_culture_points_in_0.01',\n",
+ " 'osm_healthcare_points_in_0.01',\n",
+ " 'osm_historic_points_in_0.01',\n",
+ " 'osm_hotels_points_in_0.01',\n",
+ " 'osm_leisure_points_in_0.01',\n",
+ " 'osm_offices_points_in_0.01',\n",
+ " 'osm_shops_points_in_0.01',\n",
+ " 'osm_subway_closest_dist',\n",
+ " 'osm_train_stop_closest_dist',\n",
+ " 'osm_train_stop_points_in_0.01',\n",
+ " 'osm_transport_stop_closest_dist',\n",
+ " 'osm_transport_stop_points_in_0.01',\n",
+ " 'reform_count_of_houses_1000',\n",
+ " 'reform_house_population_1000',\n",
+ " 'reform_mean_floor_count_1000',\n",
+ " 'reform_mean_year_building_1000',\n",
+ " 'region',\n",
+ " 'total_square',\n",
+ " 'date',\n",
+ " 'realty_type',\n",
+ " 'price_type']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " per_square_meter_price | \n",
+ " floor | \n",
+ " osm_amenity_points_in_0.01 | \n",
+ " osm_building_points_in_0.01 | \n",
+ " osm_catering_points_in_0.01 | \n",
+ " osm_city_closest_dist | \n",
+ " osm_city_nearest_name | \n",
+ " osm_city_nearest_population | \n",
+ " osm_crossing_closest_dist | \n",
+ " osm_crossing_points_in_0.01 | \n",
+ " ... | \n",
+ " osm_transport_stop_points_in_0.01 | \n",
+ " reform_count_of_houses_1000 | \n",
+ " reform_house_population_1000 | \n",
+ " reform_mean_floor_count_1000 | \n",
+ " reform_mean_year_building_1000 | \n",
+ " region | \n",
+ " total_square | \n",
+ " date | \n",
+ " realty_type | \n",
+ " price_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " per_square_meter_price | \n",
+ " 1.00 | \n",
+ " 0.18 | \n",
+ " 0.48 | \n",
+ " 0.02 | \n",
+ " 0.48 | \n",
+ " 0.05 | \n",
+ " 0.57 | \n",
+ " 0.55 | \n",
+ " 0.00 | \n",
+ " 0.57 | \n",
+ " ... | \n",
+ " 0.46 | \n",
+ " 0.16 | \n",
+ " 0.19 | \n",
+ " 0.23 | \n",
+ " 0.03 | \n",
+ " 0.52 | \n",
+ " 0.02 | \n",
+ " 0.11 | \n",
+ " 0.18 | \n",
+ " 0.05 | \n",
+ "
\n",
+ " \n",
+ " floor | \n",
+ " 0.18 | \n",
+ " 1.00 | \n",
+ " 0.07 | \n",
+ " 0.01 | \n",
+ " 0.06 | \n",
+ " 0.08 | \n",
+ " 0.28 | \n",
+ " 0.15 | \n",
+ " 0.01 | \n",
+ " 0.14 | \n",
+ " ... | \n",
+ " 0.12 | \n",
+ " 0.03 | \n",
+ " 0.10 | \n",
+ " 0.15 | \n",
+ " 0.00 | \n",
+ " 0.23 | \n",
+ " 0.01 | \n",
+ " 0.22 | \n",
+ " 0.04 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " osm_amenity_points_in_0.01 | \n",
+ " 0.48 | \n",
+ " 0.07 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.96 | \n",
+ " 0.07 | \n",
+ " 0.56 | \n",
+ " 0.49 | \n",
+ " 0.00 | \n",
+ " 0.82 | \n",
+ " ... | \n",
+ " 0.69 | \n",
+ " 0.63 | \n",
+ " 0.62 | \n",
+ " 0.25 | \n",
+ " 0.04 | \n",
+ " 0.45 | \n",
+ " 0.05 | \n",
+ " 0.07 | \n",
+ " 0.18 | \n",
+ " 0.22 | \n",
+ "
\n",
+ " \n",
+ " osm_building_points_in_0.01 | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.28 | \n",
+ " 0.17 | \n",
+ " 0.00 | \n",
+ " 0.11 | \n",
+ " ... | \n",
+ " 0.04 | \n",
+ " 0.03 | \n",
+ " 0.03 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ " 0.22 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.03 | \n",
+ " 0.01 | \n",
+ "
\n",
+ " \n",
+ " osm_catering_points_in_0.01 | \n",
+ " 0.48 | \n",
+ " 0.06 | \n",
+ " 0.96 | \n",
+ " 0.02 | \n",
+ " 1.00 | \n",
+ " 0.05 | \n",
+ " 0.51 | \n",
+ " 0.47 | \n",
+ " 0.00 | \n",
+ " 0.80 | \n",
+ " ... | \n",
+ " 0.63 | \n",
+ " 0.57 | \n",
+ " 0.60 | \n",
+ " 0.23 | \n",
+ " 0.03 | \n",
+ " 0.40 | \n",
+ " 0.05 | \n",
+ " 0.07 | \n",
+ " 0.19 | \n",
+ " 0.20 | \n",
+ "
\n",
+ " \n",
+ " osm_city_closest_dist | \n",
+ " 0.05 | \n",
+ " 0.08 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.05 | \n",
+ " 1.00 | \n",
+ " 0.80 | \n",
+ " 0.11 | \n",
+ " 0.73 | \n",
+ " 0.22 | \n",
+ " ... | \n",
+ " 0.15 | \n",
+ " 0.09 | \n",
+ " 0.11 | \n",
+ " 0.13 | \n",
+ " 0.00 | \n",
+ " 0.40 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.06 | \n",
+ " 0.01 | \n",
+ "
\n",
+ " \n",
+ " osm_city_nearest_name | \n",
+ " 0.57 | \n",
+ " 0.28 | \n",
+ " 0.56 | \n",
+ " 0.28 | \n",
+ " 0.51 | \n",
+ " 0.80 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 0.42 | \n",
+ " 0.64 | \n",
+ " ... | \n",
+ " 0.62 | \n",
+ " 0.55 | \n",
+ " 0.48 | \n",
+ " 0.59 | \n",
+ " 0.44 | \n",
+ " 1.00 | \n",
+ " 0.12 | \n",
+ " 0.22 | \n",
+ " 0.37 | \n",
+ " 0.30 | \n",
+ "
\n",
+ " \n",
+ " osm_city_nearest_population | \n",
+ " 0.55 | \n",
+ " 0.15 | \n",
+ " 0.49 | \n",
+ " 0.17 | \n",
+ " 0.47 | \n",
+ " 0.11 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.60 | \n",
+ " ... | \n",
+ " 0.49 | \n",
+ " 0.20 | \n",
+ " 0.24 | \n",
+ " 0.31 | \n",
+ " 0.05 | \n",
+ " 0.97 | \n",
+ " 0.07 | \n",
+ " 0.14 | \n",
+ " 0.14 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ " osm_crossing_closest_dist | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.73 | \n",
+ " 0.42 | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " ... | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " osm_crossing_points_in_0.01 | \n",
+ " 0.57 | \n",
+ " 0.14 | \n",
+ " 0.82 | \n",
+ " 0.11 | \n",
+ " 0.80 | \n",
+ " 0.22 | \n",
+ " 0.64 | \n",
+ " 0.60 | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " ... | \n",
+ " 0.77 | \n",
+ " 0.67 | \n",
+ " 0.59 | \n",
+ " 0.41 | \n",
+ " 0.17 | \n",
+ " 0.55 | \n",
+ " 0.18 | \n",
+ " 0.17 | \n",
+ " 0.31 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ " osm_culture_points_in_0.01 | \n",
+ " 0.31 | \n",
+ " 0.04 | \n",
+ " 0.61 | \n",
+ " 0.00 | \n",
+ " 0.60 | \n",
+ " 0.01 | \n",
+ " 0.46 | \n",
+ " 0.22 | \n",
+ " 0.00 | \n",
+ " 0.62 | \n",
+ " ... | \n",
+ " 0.34 | \n",
+ " 0.37 | \n",
+ " 0.15 | \n",
+ " 0.12 | \n",
+ " 0.00 | \n",
+ " 0.39 | \n",
+ " 0.04 | \n",
+ " 0.03 | \n",
+ " 0.07 | \n",
+ " 0.04 | \n",
+ "
\n",
+ " \n",
+ " osm_healthcare_points_in_0.01 | \n",
+ " 0.46 | \n",
+ " 0.10 | \n",
+ " 0.85 | \n",
+ " 0.02 | \n",
+ " 0.80 | \n",
+ " 0.07 | \n",
+ " 0.56 | \n",
+ " 0.49 | \n",
+ " 0.00 | \n",
+ " 0.81 | \n",
+ " ... | \n",
+ " 0.68 | \n",
+ " 0.53 | \n",
+ " 0.42 | \n",
+ " 0.23 | \n",
+ " 0.05 | \n",
+ " 0.46 | \n",
+ " 0.06 | \n",
+ " 0.07 | \n",
+ " 0.13 | \n",
+ " 0.04 | \n",
+ "
\n",
+ " \n",
+ " osm_historic_points_in_0.01 | \n",
+ " 0.38 | \n",
+ " 0.05 | \n",
+ " 0.75 | \n",
+ " 0.01 | \n",
+ " 0.73 | \n",
+ " 0.04 | \n",
+ " 0.45 | \n",
+ " 0.35 | \n",
+ " 0.00 | \n",
+ " 0.76 | \n",
+ " ... | \n",
+ " 0.52 | \n",
+ " 0.52 | \n",
+ " 0.19 | \n",
+ " 0.19 | \n",
+ " 0.04 | \n",
+ " 0.33 | \n",
+ " 0.08 | \n",
+ " 0.05 | \n",
+ " 0.14 | \n",
+ " 0.05 | \n",
+ "
\n",
+ " \n",
+ " osm_hotels_points_in_0.01 | \n",
+ " 0.37 | \n",
+ " 0.06 | \n",
+ " 0.88 | \n",
+ " 0.00 | \n",
+ " 0.88 | \n",
+ " 0.03 | \n",
+ " 0.44 | \n",
+ " 0.42 | \n",
+ " 0.00 | \n",
+ " 0.71 | \n",
+ " ... | \n",
+ " 0.50 | \n",
+ " 0.52 | \n",
+ " 0.56 | \n",
+ " 0.17 | \n",
+ " 0.01 | \n",
+ " 0.36 | \n",
+ " 0.05 | \n",
+ " 0.05 | \n",
+ " 0.08 | \n",
+ " 0.21 | \n",
+ "
\n",
+ " \n",
+ " osm_leisure_points_in_0.01 | \n",
+ " 0.47 | \n",
+ " 0.10 | \n",
+ " 0.72 | \n",
+ " 0.03 | \n",
+ " 0.68 | \n",
+ " 0.07 | \n",
+ " 0.57 | \n",
+ " 0.50 | \n",
+ " 0.00 | \n",
+ " 0.71 | \n",
+ " ... | \n",
+ " 0.58 | \n",
+ " 0.40 | \n",
+ " 0.39 | \n",
+ " 0.27 | \n",
+ " 0.05 | \n",
+ " 0.48 | \n",
+ " 0.06 | \n",
+ " 0.08 | \n",
+ " 0.14 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ " osm_offices_points_in_0.01 | \n",
+ " 0.47 | \n",
+ " 0.08 | \n",
+ " 0.89 | \n",
+ " 0.02 | \n",
+ " 0.87 | \n",
+ " 0.05 | \n",
+ " 0.57 | \n",
+ " 0.52 | \n",
+ " 0.00 | \n",
+ " 0.76 | \n",
+ " ... | \n",
+ " 0.61 | \n",
+ " 0.56 | \n",
+ " 0.59 | \n",
+ " 0.27 | \n",
+ " 0.04 | \n",
+ " 0.47 | \n",
+ " 0.05 | \n",
+ " 0.07 | \n",
+ " 0.19 | \n",
+ " 0.20 | \n",
+ "
\n",
+ " \n",
+ " osm_shops_points_in_0.01 | \n",
+ " 0.47 | \n",
+ " 0.07 | \n",
+ " 0.96 | \n",
+ " 0.02 | \n",
+ " 0.92 | \n",
+ " 0.07 | \n",
+ " 0.57 | \n",
+ " 0.49 | \n",
+ " 0.00 | \n",
+ " 0.79 | \n",
+ " ... | \n",
+ " 0.67 | \n",
+ " 0.64 | \n",
+ " 0.65 | \n",
+ " 0.25 | \n",
+ " 0.06 | \n",
+ " 0.47 | \n",
+ " 0.03 | \n",
+ " 0.07 | \n",
+ " 0.17 | \n",
+ " 0.20 | \n",
+ "
\n",
+ " \n",
+ " osm_subway_closest_dist | \n",
+ " 0.18 | \n",
+ " 0.09 | \n",
+ " 0.14 | \n",
+ " 0.12 | \n",
+ " 0.12 | \n",
+ " 0.41 | \n",
+ " 0.99 | \n",
+ " 0.31 | \n",
+ " 0.32 | \n",
+ " 0.20 | \n",
+ " ... | \n",
+ " 0.18 | \n",
+ " 0.12 | \n",
+ " 0.09 | \n",
+ " 0.15 | \n",
+ " 0.04 | \n",
+ " 0.91 | \n",
+ " 0.03 | \n",
+ " 0.06 | \n",
+ " 0.09 | \n",
+ " 0.04 | \n",
+ "
\n",
+ " \n",
+ " osm_train_stop_closest_dist | \n",
+ " 0.02 | \n",
+ " 0.02 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.73 | \n",
+ " 0.88 | \n",
+ " 0.03 | \n",
+ " 0.79 | \n",
+ " 0.09 | \n",
+ " ... | \n",
+ " 0.06 | \n",
+ " 0.03 | \n",
+ " 0.05 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.43 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " osm_train_stop_points_in_0.01 | \n",
+ " 0.27 | \n",
+ " 0.02 | \n",
+ " 0.33 | \n",
+ " 0.00 | \n",
+ " 0.34 | \n",
+ " 0.01 | \n",
+ " 0.32 | \n",
+ " 0.29 | \n",
+ " 0.00 | \n",
+ " 0.46 | \n",
+ " ... | \n",
+ " 0.25 | \n",
+ " 0.12 | \n",
+ " 0.13 | \n",
+ " 0.12 | \n",
+ " 0.05 | \n",
+ " 0.26 | \n",
+ " 0.04 | \n",
+ " 0.04 | \n",
+ " 0.11 | \n",
+ " 0.03 | \n",
+ "
\n",
+ " \n",
+ " osm_transport_stop_closest_dist | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.73 | \n",
+ " 0.25 | \n",
+ " 0.01 | \n",
+ " 0.88 | \n",
+ " 0.00 | \n",
+ " ... | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.14 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " osm_transport_stop_points_in_0.01 | \n",
+ " 0.46 | \n",
+ " 0.12 | \n",
+ " 0.69 | \n",
+ " 0.04 | \n",
+ " 0.63 | \n",
+ " 0.15 | \n",
+ " 0.62 | \n",
+ " 0.49 | \n",
+ " 0.02 | \n",
+ " 0.77 | \n",
+ " ... | \n",
+ " 1.00 | \n",
+ " 0.52 | \n",
+ " 0.45 | \n",
+ " 0.33 | \n",
+ " 0.05 | \n",
+ " 0.52 | \n",
+ " 0.05 | \n",
+ " 0.08 | \n",
+ " 0.20 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ " reform_count_of_houses_1000 | \n",
+ " 0.16 | \n",
+ " 0.03 | \n",
+ " 0.63 | \n",
+ " 0.03 | \n",
+ " 0.57 | \n",
+ " 0.09 | \n",
+ " 0.55 | \n",
+ " 0.20 | \n",
+ " 0.01 | \n",
+ " 0.67 | \n",
+ " ... | \n",
+ " 0.52 | \n",
+ " 1.00 | \n",
+ " 0.55 | \n",
+ " 0.38 | \n",
+ " 0.06 | \n",
+ " 0.43 | \n",
+ " 0.05 | \n",
+ " 0.03 | \n",
+ " 0.13 | \n",
+ " 0.13 | \n",
+ "
\n",
+ " \n",
+ " reform_house_population_1000 | \n",
+ " 0.19 | \n",
+ " 0.10 | \n",
+ " 0.62 | \n",
+ " 0.03 | \n",
+ " 0.60 | \n",
+ " 0.11 | \n",
+ " 0.48 | \n",
+ " 0.24 | \n",
+ " 0.00 | \n",
+ " 0.59 | \n",
+ " ... | \n",
+ " 0.45 | \n",
+ " 0.55 | \n",
+ " 1.00 | \n",
+ " 0.31 | \n",
+ " 0.04 | \n",
+ " 0.36 | \n",
+ " 0.02 | \n",
+ " 0.04 | \n",
+ " 0.20 | \n",
+ " 0.16 | \n",
+ "
\n",
+ " \n",
+ " reform_mean_floor_count_1000 | \n",
+ " 0.23 | \n",
+ " 0.15 | \n",
+ " 0.25 | \n",
+ " 0.03 | \n",
+ " 0.23 | \n",
+ " 0.13 | \n",
+ " 0.59 | \n",
+ " 0.31 | \n",
+ " 0.01 | \n",
+ " 0.41 | \n",
+ " ... | \n",
+ " 0.33 | \n",
+ " 0.38 | \n",
+ " 0.31 | \n",
+ " 1.00 | \n",
+ " 0.08 | \n",
+ " 0.47 | \n",
+ " 0.02 | \n",
+ " 0.06 | \n",
+ " 0.14 | \n",
+ " 0.05 | \n",
+ "
\n",
+ " \n",
+ " reform_mean_year_building_1000 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ " 0.04 | \n",
+ " 0.00 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ " 0.44 | \n",
+ " 0.05 | \n",
+ " 0.00 | \n",
+ " 0.17 | \n",
+ " ... | \n",
+ " 0.05 | \n",
+ " 0.06 | \n",
+ " 0.04 | \n",
+ " 0.08 | \n",
+ " 1.00 | \n",
+ " 0.37 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " region | \n",
+ " 0.52 | \n",
+ " 0.23 | \n",
+ " 0.45 | \n",
+ " 0.22 | \n",
+ " 0.40 | \n",
+ " 0.40 | \n",
+ " 1.00 | \n",
+ " 0.97 | \n",
+ " 0.07 | \n",
+ " 0.55 | \n",
+ " ... | \n",
+ " 0.52 | \n",
+ " 0.43 | \n",
+ " 0.36 | \n",
+ " 0.47 | \n",
+ " 0.37 | \n",
+ " 1.00 | \n",
+ " 0.08 | \n",
+ " 0.17 | \n",
+ " 0.28 | \n",
+ " 0.27 | \n",
+ "
\n",
+ " \n",
+ " total_square | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.05 | \n",
+ " 0.00 | \n",
+ " 0.05 | \n",
+ " 0.00 | \n",
+ " 0.12 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.18 | \n",
+ " ... | \n",
+ " 0.05 | \n",
+ " 0.05 | \n",
+ " 0.02 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.08 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.04 | \n",
+ " 0.01 | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " 0.11 | \n",
+ " 0.22 | \n",
+ " 0.07 | \n",
+ " 0.01 | \n",
+ " 0.07 | \n",
+ " 0.02 | \n",
+ " 0.22 | \n",
+ " 0.14 | \n",
+ " 0.00 | \n",
+ " 0.17 | \n",
+ " ... | \n",
+ " 0.08 | \n",
+ " 0.03 | \n",
+ " 0.04 | \n",
+ " 0.06 | \n",
+ " 0.02 | \n",
+ " 0.17 | \n",
+ " 0.02 | \n",
+ " 1.00 | \n",
+ " 0.10 | \n",
+ " 0.02 | \n",
+ "
\n",
+ " \n",
+ " realty_type | \n",
+ " 0.18 | \n",
+ " 0.04 | \n",
+ " 0.18 | \n",
+ " 0.03 | \n",
+ " 0.19 | \n",
+ " 0.06 | \n",
+ " 0.37 | \n",
+ " 0.14 | \n",
+ " 0.01 | \n",
+ " 0.31 | \n",
+ " ... | \n",
+ " 0.20 | \n",
+ " 0.13 | \n",
+ " 0.20 | \n",
+ " 0.14 | \n",
+ " 0.02 | \n",
+ " 0.28 | \n",
+ " 0.04 | \n",
+ " 0.10 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ "
\n",
+ " \n",
+ " price_type | \n",
+ " 0.05 | \n",
+ " 0.00 | \n",
+ " 0.22 | \n",
+ " 0.01 | \n",
+ " 0.20 | \n",
+ " 0.01 | \n",
+ " 0.30 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.09 | \n",
+ " ... | \n",
+ " 0.07 | \n",
+ " 0.13 | \n",
+ " 0.16 | \n",
+ " 0.05 | \n",
+ " 0.00 | \n",
+ " 0.27 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.02 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
31 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " per_square_meter_price floor \\\n",
+ "per_square_meter_price 1.00 0.18 \n",
+ "floor 0.18 1.00 \n",
+ "osm_amenity_points_in_0.01 0.48 0.07 \n",
+ "osm_building_points_in_0.01 0.02 0.01 \n",
+ "osm_catering_points_in_0.01 0.48 0.06 \n",
+ "osm_city_closest_dist 0.05 0.08 \n",
+ "osm_city_nearest_name 0.57 0.28 \n",
+ "osm_city_nearest_population 0.55 0.15 \n",
+ "osm_crossing_closest_dist 0.00 0.01 \n",
+ "osm_crossing_points_in_0.01 0.57 0.14 \n",
+ "osm_culture_points_in_0.01 0.31 0.04 \n",
+ "osm_healthcare_points_in_0.01 0.46 0.10 \n",
+ "osm_historic_points_in_0.01 0.38 0.05 \n",
+ "osm_hotels_points_in_0.01 0.37 0.06 \n",
+ "osm_leisure_points_in_0.01 0.47 0.10 \n",
+ "osm_offices_points_in_0.01 0.47 0.08 \n",
+ "osm_shops_points_in_0.01 0.47 0.07 \n",
+ "osm_subway_closest_dist 0.18 0.09 \n",
+ "osm_train_stop_closest_dist 0.02 0.02 \n",
+ "osm_train_stop_points_in_0.01 0.27 0.02 \n",
+ "osm_transport_stop_closest_dist 0.00 0.01 \n",
+ "osm_transport_stop_points_in_0.01 0.46 0.12 \n",
+ "reform_count_of_houses_1000 0.16 0.03 \n",
+ "reform_house_population_1000 0.19 0.10 \n",
+ "reform_mean_floor_count_1000 0.23 0.15 \n",
+ "reform_mean_year_building_1000 0.03 0.00 \n",
+ "region 0.52 0.23 \n",
+ "total_square 0.02 0.01 \n",
+ "date 0.11 0.22 \n",
+ "realty_type 0.18 0.04 \n",
+ "price_type 0.05 0.00 \n",
+ "\n",
+ " osm_amenity_points_in_0.01 \\\n",
+ "per_square_meter_price 0.48 \n",
+ "floor 0.07 \n",
+ "osm_amenity_points_in_0.01 1.00 \n",
+ "osm_building_points_in_0.01 0.02 \n",
+ "osm_catering_points_in_0.01 0.96 \n",
+ "osm_city_closest_dist 0.07 \n",
+ "osm_city_nearest_name 0.56 \n",
+ "osm_city_nearest_population 0.49 \n",
+ "osm_crossing_closest_dist 0.00 \n",
+ "osm_crossing_points_in_0.01 0.82 \n",
+ "osm_culture_points_in_0.01 0.61 \n",
+ "osm_healthcare_points_in_0.01 0.85 \n",
+ "osm_historic_points_in_0.01 0.75 \n",
+ "osm_hotels_points_in_0.01 0.88 \n",
+ "osm_leisure_points_in_0.01 0.72 \n",
+ "osm_offices_points_in_0.01 0.89 \n",
+ "osm_shops_points_in_0.01 0.96 \n",
+ "osm_subway_closest_dist 0.14 \n",
+ "osm_train_stop_closest_dist 0.03 \n",
+ "osm_train_stop_points_in_0.01 0.33 \n",
+ "osm_transport_stop_closest_dist 0.00 \n",
+ "osm_transport_stop_points_in_0.01 0.69 \n",
+ "reform_count_of_houses_1000 0.63 \n",
+ "reform_house_population_1000 0.62 \n",
+ "reform_mean_floor_count_1000 0.25 \n",
+ "reform_mean_year_building_1000 0.04 \n",
+ "region 0.45 \n",
+ "total_square 0.05 \n",
+ "date 0.07 \n",
+ "realty_type 0.18 \n",
+ "price_type 0.22 \n",
+ "\n",
+ " osm_building_points_in_0.01 \\\n",
+ "per_square_meter_price 0.02 \n",
+ "floor 0.01 \n",
+ "osm_amenity_points_in_0.01 0.02 \n",
+ "osm_building_points_in_0.01 1.00 \n",
+ "osm_catering_points_in_0.01 0.02 \n",
+ "osm_city_closest_dist 0.00 \n",
+ "osm_city_nearest_name 0.28 \n",
+ "osm_city_nearest_population 0.17 \n",
+ "osm_crossing_closest_dist 0.00 \n",
+ "osm_crossing_points_in_0.01 0.11 \n",
+ "osm_culture_points_in_0.01 0.00 \n",
+ "osm_healthcare_points_in_0.01 0.02 \n",
+ "osm_historic_points_in_0.01 0.01 \n",
+ "osm_hotels_points_in_0.01 0.00 \n",
+ "osm_leisure_points_in_0.01 0.03 \n",
+ "osm_offices_points_in_0.01 0.02 \n",
+ "osm_shops_points_in_0.01 0.02 \n",
+ "osm_subway_closest_dist 0.12 \n",
+ "osm_train_stop_closest_dist 0.00 \n",
+ "osm_train_stop_points_in_0.01 0.00 \n",
+ "osm_transport_stop_closest_dist 0.00 \n",
+ "osm_transport_stop_points_in_0.01 0.04 \n",
+ "reform_count_of_houses_1000 0.03 \n",
+ "reform_house_population_1000 0.03 \n",
+ "reform_mean_floor_count_1000 0.03 \n",
+ "reform_mean_year_building_1000 0.00 \n",
+ "region 0.22 \n",
+ "total_square 0.00 \n",
+ "date 0.01 \n",
+ "realty_type 0.03 \n",
+ "price_type 0.01 \n",
+ "\n",
+ " osm_catering_points_in_0.01 \\\n",
+ "per_square_meter_price 0.48 \n",
+ "floor 0.06 \n",
+ "osm_amenity_points_in_0.01 0.96 \n",
+ "osm_building_points_in_0.01 0.02 \n",
+ "osm_catering_points_in_0.01 1.00 \n",
+ "osm_city_closest_dist 0.05 \n",
+ "osm_city_nearest_name 0.51 \n",
+ "osm_city_nearest_population 0.47 \n",
+ "osm_crossing_closest_dist 0.00 \n",
+ "osm_crossing_points_in_0.01 0.80 \n",
+ "osm_culture_points_in_0.01 0.60 \n",
+ "osm_healthcare_points_in_0.01 0.80 \n",
+ "osm_historic_points_in_0.01 0.73 \n",
+ "osm_hotels_points_in_0.01 0.88 \n",
+ "osm_leisure_points_in_0.01 0.68 \n",
+ "osm_offices_points_in_0.01 0.87 \n",
+ "osm_shops_points_in_0.01 0.92 \n",
+ "osm_subway_closest_dist 0.12 \n",
+ "osm_train_stop_closest_dist 0.02 \n",
+ "osm_train_stop_points_in_0.01 0.34 \n",
+ "osm_transport_stop_closest_dist 0.00 \n",
+ "osm_transport_stop_points_in_0.01 0.63 \n",
+ "reform_count_of_houses_1000 0.57 \n",
+ "reform_house_population_1000 0.60 \n",
+ "reform_mean_floor_count_1000 0.23 \n",
+ "reform_mean_year_building_1000 0.03 \n",
+ "region 0.40 \n",
+ "total_square 0.05 \n",
+ "date 0.07 \n",
+ "realty_type 0.19 \n",
+ "price_type 0.20 \n",
+ "\n",
+ " osm_city_closest_dist \\\n",
+ "per_square_meter_price 0.05 \n",
+ "floor 0.08 \n",
+ "osm_amenity_points_in_0.01 0.07 \n",
+ "osm_building_points_in_0.01 0.00 \n",
+ "osm_catering_points_in_0.01 0.05 \n",
+ "osm_city_closest_dist 1.00 \n",
+ "osm_city_nearest_name 0.80 \n",
+ "osm_city_nearest_population 0.11 \n",
+ "osm_crossing_closest_dist 0.73 \n",
+ "osm_crossing_points_in_0.01 0.22 \n",
+ "osm_culture_points_in_0.01 0.01 \n",
+ "osm_healthcare_points_in_0.01 0.07 \n",
+ "osm_historic_points_in_0.01 0.04 \n",
+ "osm_hotels_points_in_0.01 0.03 \n",
+ "osm_leisure_points_in_0.01 0.07 \n",
+ "osm_offices_points_in_0.01 0.05 \n",
+ "osm_shops_points_in_0.01 0.07 \n",
+ "osm_subway_closest_dist 0.41 \n",
+ "osm_train_stop_closest_dist 0.73 \n",
+ "osm_train_stop_points_in_0.01 0.01 \n",
+ "osm_transport_stop_closest_dist 0.73 \n",
+ "osm_transport_stop_points_in_0.01 0.15 \n",
+ "reform_count_of_houses_1000 0.09 \n",
+ "reform_house_population_1000 0.11 \n",
+ "reform_mean_floor_count_1000 0.13 \n",
+ "reform_mean_year_building_1000 0.00 \n",
+ "region 0.40 \n",
+ "total_square 0.00 \n",
+ "date 0.02 \n",
+ "realty_type 0.06 \n",
+ "price_type 0.01 \n",
+ "\n",
+ " osm_city_nearest_name \\\n",
+ "per_square_meter_price 0.57 \n",
+ "floor 0.28 \n",
+ "osm_amenity_points_in_0.01 0.56 \n",
+ "osm_building_points_in_0.01 0.28 \n",
+ "osm_catering_points_in_0.01 0.51 \n",
+ "osm_city_closest_dist 0.80 \n",
+ "osm_city_nearest_name 1.00 \n",
+ "osm_city_nearest_population 1.00 \n",
+ "osm_crossing_closest_dist 0.42 \n",
+ "osm_crossing_points_in_0.01 0.64 \n",
+ "osm_culture_points_in_0.01 0.46 \n",
+ "osm_healthcare_points_in_0.01 0.56 \n",
+ "osm_historic_points_in_0.01 0.45 \n",
+ "osm_hotels_points_in_0.01 0.44 \n",
+ "osm_leisure_points_in_0.01 0.57 \n",
+ "osm_offices_points_in_0.01 0.57 \n",
+ "osm_shops_points_in_0.01 0.57 \n",
+ "osm_subway_closest_dist 0.99 \n",
+ "osm_train_stop_closest_dist 0.88 \n",
+ "osm_train_stop_points_in_0.01 0.32 \n",
+ "osm_transport_stop_closest_dist 0.25 \n",
+ "osm_transport_stop_points_in_0.01 0.62 \n",
+ "reform_count_of_houses_1000 0.55 \n",
+ "reform_house_population_1000 0.48 \n",
+ "reform_mean_floor_count_1000 0.59 \n",
+ "reform_mean_year_building_1000 0.44 \n",
+ "region 1.00 \n",
+ "total_square 0.12 \n",
+ "date 0.22 \n",
+ "realty_type 0.37 \n",
+ "price_type 0.30 \n",
+ "\n",
+ " osm_city_nearest_population \\\n",
+ "per_square_meter_price 0.55 \n",
+ "floor 0.15 \n",
+ "osm_amenity_points_in_0.01 0.49 \n",
+ "osm_building_points_in_0.01 0.17 \n",
+ "osm_catering_points_in_0.01 0.47 \n",
+ "osm_city_closest_dist 0.11 \n",
+ "osm_city_nearest_name 1.00 \n",
+ "osm_city_nearest_population 1.00 \n",
+ "osm_crossing_closest_dist 0.00 \n",
+ "osm_crossing_points_in_0.01 0.60 \n",
+ "osm_culture_points_in_0.01 0.22 \n",
+ "osm_healthcare_points_in_0.01 0.49 \n",
+ "osm_historic_points_in_0.01 0.35 \n",
+ "osm_hotels_points_in_0.01 0.42 \n",
+ "osm_leisure_points_in_0.01 0.50 \n",
+ "osm_offices_points_in_0.01 0.52 \n",
+ "osm_shops_points_in_0.01 0.49 \n",
+ "osm_subway_closest_dist 0.31 \n",
+ "osm_train_stop_closest_dist 0.03 \n",
+ "osm_train_stop_points_in_0.01 0.29 \n",
+ "osm_transport_stop_closest_dist 0.01 \n",
+ "osm_transport_stop_points_in_0.01 0.49 \n",
+ "reform_count_of_houses_1000 0.20 \n",
+ "reform_house_population_1000 0.24 \n",
+ "reform_mean_floor_count_1000 0.31 \n",
+ "reform_mean_year_building_1000 0.05 \n",
+ "region 0.97 \n",
+ "total_square 0.07 \n",
+ "date 0.14 \n",
+ "realty_type 0.14 \n",
+ "price_type 0.07 \n",
+ "\n",
+ " osm_crossing_closest_dist \\\n",
+ "per_square_meter_price 0.00 \n",
+ "floor 0.01 \n",
+ "osm_amenity_points_in_0.01 0.00 \n",
+ "osm_building_points_in_0.01 0.00 \n",
+ "osm_catering_points_in_0.01 0.00 \n",
+ "osm_city_closest_dist 0.73 \n",
+ "osm_city_nearest_name 0.42 \n",
+ "osm_city_nearest_population 0.00 \n",
+ "osm_crossing_closest_dist 1.00 \n",
+ "osm_crossing_points_in_0.01 0.00 \n",
+ "osm_culture_points_in_0.01 0.00 \n",
+ "osm_healthcare_points_in_0.01 0.00 \n",
+ "osm_historic_points_in_0.01 0.00 \n",
+ "osm_hotels_points_in_0.01 0.00 \n",
+ "osm_leisure_points_in_0.01 0.00 \n",
+ "osm_offices_points_in_0.01 0.00 \n",
+ "osm_shops_points_in_0.01 0.00 \n",
+ "osm_subway_closest_dist 0.32 \n",
+ "osm_train_stop_closest_dist 0.79 \n",
+ "osm_train_stop_points_in_0.01 0.00 \n",
+ "osm_transport_stop_closest_dist 0.88 \n",
+ "osm_transport_stop_points_in_0.01 0.02 \n",
+ "reform_count_of_houses_1000 0.01 \n",
+ "reform_house_population_1000 0.00 \n",
+ "reform_mean_floor_count_1000 0.01 \n",
+ "reform_mean_year_building_1000 0.00 \n",
+ "region 0.07 \n",
+ "total_square 0.00 \n",
+ "date 0.00 \n",
+ "realty_type 0.01 \n",
+ "price_type 0.00 \n",
+ "\n",
+ " osm_crossing_points_in_0.01 ... \\\n",
+ "per_square_meter_price 0.57 ... \n",
+ "floor 0.14 ... \n",
+ "osm_amenity_points_in_0.01 0.82 ... \n",
+ "osm_building_points_in_0.01 0.11 ... \n",
+ "osm_catering_points_in_0.01 0.80 ... \n",
+ "osm_city_closest_dist 0.22 ... \n",
+ "osm_city_nearest_name 0.64 ... \n",
+ "osm_city_nearest_population 0.60 ... \n",
+ "osm_crossing_closest_dist 0.00 ... \n",
+ "osm_crossing_points_in_0.01 1.00 ... \n",
+ "osm_culture_points_in_0.01 0.62 ... \n",
+ "osm_healthcare_points_in_0.01 0.81 ... \n",
+ "osm_historic_points_in_0.01 0.76 ... \n",
+ "osm_hotels_points_in_0.01 0.71 ... \n",
+ "osm_leisure_points_in_0.01 0.71 ... \n",
+ "osm_offices_points_in_0.01 0.76 ... \n",
+ "osm_shops_points_in_0.01 0.79 ... \n",
+ "osm_subway_closest_dist 0.20 ... \n",
+ "osm_train_stop_closest_dist 0.09 ... \n",
+ "osm_train_stop_points_in_0.01 0.46 ... \n",
+ "osm_transport_stop_closest_dist 0.00 ... \n",
+ "osm_transport_stop_points_in_0.01 0.77 ... \n",
+ "reform_count_of_houses_1000 0.67 ... \n",
+ "reform_house_population_1000 0.59 ... \n",
+ "reform_mean_floor_count_1000 0.41 ... \n",
+ "reform_mean_year_building_1000 0.17 ... \n",
+ "region 0.55 ... \n",
+ "total_square 0.18 ... \n",
+ "date 0.17 ... \n",
+ "realty_type 0.31 ... \n",
+ "price_type 0.09 ... \n",
+ "\n",
+ " osm_transport_stop_points_in_0.01 \\\n",
+ "per_square_meter_price 0.46 \n",
+ "floor 0.12 \n",
+ "osm_amenity_points_in_0.01 0.69 \n",
+ "osm_building_points_in_0.01 0.04 \n",
+ "osm_catering_points_in_0.01 0.63 \n",
+ "osm_city_closest_dist 0.15 \n",
+ "osm_city_nearest_name 0.62 \n",
+ "osm_city_nearest_population 0.49 \n",
+ "osm_crossing_closest_dist 0.02 \n",
+ "osm_crossing_points_in_0.01 0.77 \n",
+ "osm_culture_points_in_0.01 0.34 \n",
+ "osm_healthcare_points_in_0.01 0.68 \n",
+ "osm_historic_points_in_0.01 0.52 \n",
+ "osm_hotels_points_in_0.01 0.50 \n",
+ "osm_leisure_points_in_0.01 0.58 \n",
+ "osm_offices_points_in_0.01 0.61 \n",
+ "osm_shops_points_in_0.01 0.67 \n",
+ "osm_subway_closest_dist 0.18 \n",
+ "osm_train_stop_closest_dist 0.06 \n",
+ "osm_train_stop_points_in_0.01 0.25 \n",
+ "osm_transport_stop_closest_dist 0.02 \n",
+ "osm_transport_stop_points_in_0.01 1.00 \n",
+ "reform_count_of_houses_1000 0.52 \n",
+ "reform_house_population_1000 0.45 \n",
+ "reform_mean_floor_count_1000 0.33 \n",
+ "reform_mean_year_building_1000 0.05 \n",
+ "region 0.52 \n",
+ "total_square 0.05 \n",
+ "date 0.08 \n",
+ "realty_type 0.20 \n",
+ "price_type 0.07 \n",
+ "\n",
+ " reform_count_of_houses_1000 \\\n",
+ "per_square_meter_price 0.16 \n",
+ "floor 0.03 \n",
+ "osm_amenity_points_in_0.01 0.63 \n",
+ "osm_building_points_in_0.01 0.03 \n",
+ "osm_catering_points_in_0.01 0.57 \n",
+ "osm_city_closest_dist 0.09 \n",
+ "osm_city_nearest_name 0.55 \n",
+ "osm_city_nearest_population 0.20 \n",
+ "osm_crossing_closest_dist 0.01 \n",
+ "osm_crossing_points_in_0.01 0.67 \n",
+ "osm_culture_points_in_0.01 0.37 \n",
+ "osm_healthcare_points_in_0.01 0.53 \n",
+ "osm_historic_points_in_0.01 0.52 \n",
+ "osm_hotels_points_in_0.01 0.52 \n",
+ "osm_leisure_points_in_0.01 0.40 \n",
+ "osm_offices_points_in_0.01 0.56 \n",
+ "osm_shops_points_in_0.01 0.64 \n",
+ "osm_subway_closest_dist 0.12 \n",
+ "osm_train_stop_closest_dist 0.03 \n",
+ "osm_train_stop_points_in_0.01 0.12 \n",
+ "osm_transport_stop_closest_dist 0.01 \n",
+ "osm_transport_stop_points_in_0.01 0.52 \n",
+ "reform_count_of_houses_1000 1.00 \n",
+ "reform_house_population_1000 0.55 \n",
+ "reform_mean_floor_count_1000 0.38 \n",
+ "reform_mean_year_building_1000 0.06 \n",
+ "region 0.43 \n",
+ "total_square 0.05 \n",
+ "date 0.03 \n",
+ "realty_type 0.13 \n",
+ "price_type 0.13 \n",
+ "\n",
+ " reform_house_population_1000 \\\n",
+ "per_square_meter_price 0.19 \n",
+ "floor 0.10 \n",
+ "osm_amenity_points_in_0.01 0.62 \n",
+ "osm_building_points_in_0.01 0.03 \n",
+ "osm_catering_points_in_0.01 0.60 \n",
+ "osm_city_closest_dist 0.11 \n",
+ "osm_city_nearest_name 0.48 \n",
+ "osm_city_nearest_population 0.24 \n",
+ "osm_crossing_closest_dist 0.00 \n",
+ "osm_crossing_points_in_0.01 0.59 \n",
+ "osm_culture_points_in_0.01 0.15 \n",
+ "osm_healthcare_points_in_0.01 0.42 \n",
+ "osm_historic_points_in_0.01 0.19 \n",
+ "osm_hotels_points_in_0.01 0.56 \n",
+ "osm_leisure_points_in_0.01 0.39 \n",
+ "osm_offices_points_in_0.01 0.59 \n",
+ "osm_shops_points_in_0.01 0.65 \n",
+ "osm_subway_closest_dist 0.09 \n",
+ "osm_train_stop_closest_dist 0.05 \n",
+ "osm_train_stop_points_in_0.01 0.13 \n",
+ "osm_transport_stop_closest_dist 0.01 \n",
+ "osm_transport_stop_points_in_0.01 0.45 \n",
+ "reform_count_of_houses_1000 0.55 \n",
+ "reform_house_population_1000 1.00 \n",
+ "reform_mean_floor_count_1000 0.31 \n",
+ "reform_mean_year_building_1000 0.04 \n",
+ "region 0.36 \n",
+ "total_square 0.02 \n",
+ "date 0.04 \n",
+ "realty_type 0.20 \n",
+ "price_type 0.16 \n",
+ "\n",
+ " reform_mean_floor_count_1000 \\\n",
+ "per_square_meter_price 0.23 \n",
+ "floor 0.15 \n",
+ "osm_amenity_points_in_0.01 0.25 \n",
+ "osm_building_points_in_0.01 0.03 \n",
+ "osm_catering_points_in_0.01 0.23 \n",
+ "osm_city_closest_dist 0.13 \n",
+ "osm_city_nearest_name 0.59 \n",
+ "osm_city_nearest_population 0.31 \n",
+ "osm_crossing_closest_dist 0.01 \n",
+ "osm_crossing_points_in_0.01 0.41 \n",
+ "osm_culture_points_in_0.01 0.12 \n",
+ "osm_healthcare_points_in_0.01 0.23 \n",
+ "osm_historic_points_in_0.01 0.19 \n",
+ "osm_hotels_points_in_0.01 0.17 \n",
+ "osm_leisure_points_in_0.01 0.27 \n",
+ "osm_offices_points_in_0.01 0.27 \n",
+ "osm_shops_points_in_0.01 0.25 \n",
+ "osm_subway_closest_dist 0.15 \n",
+ "osm_train_stop_closest_dist 0.07 \n",
+ "osm_train_stop_points_in_0.01 0.12 \n",
+ "osm_transport_stop_closest_dist 0.02 \n",
+ "osm_transport_stop_points_in_0.01 0.33 \n",
+ "reform_count_of_houses_1000 0.38 \n",
+ "reform_house_population_1000 0.31 \n",
+ "reform_mean_floor_count_1000 1.00 \n",
+ "reform_mean_year_building_1000 0.08 \n",
+ "region 0.47 \n",
+ "total_square 0.02 \n",
+ "date 0.06 \n",
+ "realty_type 0.14 \n",
+ "price_type 0.05 \n",
+ "\n",
+ " reform_mean_year_building_1000 region \\\n",
+ "per_square_meter_price 0.03 0.52 \n",
+ "floor 0.00 0.23 \n",
+ "osm_amenity_points_in_0.01 0.04 0.45 \n",
+ "osm_building_points_in_0.01 0.00 0.22 \n",
+ "osm_catering_points_in_0.01 0.03 0.40 \n",
+ "osm_city_closest_dist 0.00 0.40 \n",
+ "osm_city_nearest_name 0.44 1.00 \n",
+ "osm_city_nearest_population 0.05 0.97 \n",
+ "osm_crossing_closest_dist 0.00 0.07 \n",
+ "osm_crossing_points_in_0.01 0.17 0.55 \n",
+ "osm_culture_points_in_0.01 0.00 0.39 \n",
+ "osm_healthcare_points_in_0.01 0.05 0.46 \n",
+ "osm_historic_points_in_0.01 0.04 0.33 \n",
+ "osm_hotels_points_in_0.01 0.01 0.36 \n",
+ "osm_leisure_points_in_0.01 0.05 0.48 \n",
+ "osm_offices_points_in_0.01 0.04 0.47 \n",
+ "osm_shops_points_in_0.01 0.06 0.47 \n",
+ "osm_subway_closest_dist 0.04 0.91 \n",
+ "osm_train_stop_closest_dist 0.00 0.43 \n",
+ "osm_train_stop_points_in_0.01 0.05 0.26 \n",
+ "osm_transport_stop_closest_dist 0.00 0.14 \n",
+ "osm_transport_stop_points_in_0.01 0.05 0.52 \n",
+ "reform_count_of_houses_1000 0.06 0.43 \n",
+ "reform_house_population_1000 0.04 0.36 \n",
+ "reform_mean_floor_count_1000 0.08 0.47 \n",
+ "reform_mean_year_building_1000 1.00 0.37 \n",
+ "region 0.37 1.00 \n",
+ "total_square 0.00 0.08 \n",
+ "date 0.02 0.17 \n",
+ "realty_type 0.02 0.28 \n",
+ "price_type 0.00 0.27 \n",
+ "\n",
+ " total_square date realty_type price_type \n",
+ "per_square_meter_price 0.02 0.11 0.18 0.05 \n",
+ "floor 0.01 0.22 0.04 0.00 \n",
+ "osm_amenity_points_in_0.01 0.05 0.07 0.18 0.22 \n",
+ "osm_building_points_in_0.01 0.00 0.01 0.03 0.01 \n",
+ "osm_catering_points_in_0.01 0.05 0.07 0.19 0.20 \n",
+ "osm_city_closest_dist 0.00 0.02 0.06 0.01 \n",
+ "osm_city_nearest_name 0.12 0.22 0.37 0.30 \n",
+ "osm_city_nearest_population 0.07 0.14 0.14 0.07 \n",
+ "osm_crossing_closest_dist 0.00 0.00 0.01 0.00 \n",
+ "osm_crossing_points_in_0.01 0.18 0.17 0.31 0.09 \n",
+ "osm_culture_points_in_0.01 0.04 0.03 0.07 0.04 \n",
+ "osm_healthcare_points_in_0.01 0.06 0.07 0.13 0.04 \n",
+ "osm_historic_points_in_0.01 0.08 0.05 0.14 0.05 \n",
+ "osm_hotels_points_in_0.01 0.05 0.05 0.08 0.21 \n",
+ "osm_leisure_points_in_0.01 0.06 0.08 0.14 0.09 \n",
+ "osm_offices_points_in_0.01 0.05 0.07 0.19 0.20 \n",
+ "osm_shops_points_in_0.01 0.03 0.07 0.17 0.20 \n",
+ "osm_subway_closest_dist 0.03 0.06 0.09 0.04 \n",
+ "osm_train_stop_closest_dist 0.00 0.01 0.02 0.00 \n",
+ "osm_train_stop_points_in_0.01 0.04 0.04 0.11 0.03 \n",
+ "osm_transport_stop_closest_dist 0.00 0.00 0.02 0.00 \n",
+ "osm_transport_stop_points_in_0.01 0.05 0.08 0.20 0.07 \n",
+ "reform_count_of_houses_1000 0.05 0.03 0.13 0.13 \n",
+ "reform_house_population_1000 0.02 0.04 0.20 0.16 \n",
+ "reform_mean_floor_count_1000 0.02 0.06 0.14 0.05 \n",
+ "reform_mean_year_building_1000 0.00 0.02 0.02 0.00 \n",
+ "region 0.08 0.17 0.28 0.27 \n",
+ "total_square 1.00 0.02 0.04 0.01 \n",
+ "date 0.02 1.00 0.10 0.02 \n",
+ "realty_type 0.04 0.10 1.00 0.02 \n",
+ "price_type 0.01 0.02 0.02 1.00 \n",
+ "\n",
+ "[31 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# считаем корреляцию phik - она позволяет находитб взаимосвязи в том числе между категориальными переменными. \n",
+ "# направление взаимосвязи не видно, только абсолютное значение. Чем ближе к единице, тем лучше\n",
+ "\n",
+ "# выделяем интервальные переменные\n",
+ "interval_cols = ['osm_amenity_points_in_0.01', 'osm_building_points_in_0.01',\n",
+ " 'osm_catering_points_in_0.01', 'osm_city_closest_dist',\n",
+ " 'osm_city_nearest_population','osm_crossing_closest_dist', 'osm_crossing_points_in_0.001', 'osm_culture_points_in_0.01',\n",
+ " 'osm_healthcare_points_in_0.01', 'osm_historic_points_in_0.01', 'osm_hotels_points_in_0.01',\n",
+ " 'osm_leisure_points_in_0.01', 'osm_offices_points_in_0.01',\n",
+ " 'osm_shops_points_in_0.01', 'osm_subway_closest_dist',\n",
+ " 'osm_train_stop_closest_dist', 'osm_train_stop_points_in_0.01',\n",
+ " 'osm_transport_stop_closest_dist', 'osm_transport_stop_points_in_0.01', 'per_square_meter_price',\n",
+ " 'reform_count_of_houses_1000', 'reform_house_population_1000', \n",
+ " 'reform_mean_floor_count_1000', 'reform_mean_year_building_1000',\n",
+ " 'total_square', 'realty_type', 'price_type', 'many_floors',\n",
+ " 'city', 'street']\n",
+ "\n",
+ "# строим матрицу корреляции\n",
+ "phik_overview = train_data[columns_for_phik].phik_matrix(interval_cols=interval_cols)\n",
+ "\n",
+ "phik_overview.round(2)\n",
+ "\n",
+ "# визуализируем с помощью тепловой карты\n",
+ "# fig, ax = plt.subplots(figsize=(20, 20))\n",
+ "# sns.heatmap(phik_overview.round(2), annot=True, square=True, cmap='mako')\n",
+ "# ax.set_title(label = 'МАТРИЦА КОРРЕЛЯЦИИ ПРИЗНАКОВ $\\phi_K$', fontdict={'fontsize': 15, 'fontweight': 'bold'})\n",
+ "\n",
+ "# plt.tight_layout()\n",
+ "# plt.show();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " phik | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " per_square_meter_price | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " osm_crossing_points_in_0.01 | \n",
+ " 0.57 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " osm_city_nearest_name | \n",
+ " 0.57 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " osm_city_nearest_population | \n",
+ " 0.55 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " region | \n",
+ " 0.52 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " osm_amenity_points_in_0.01 | \n",
+ " 0.48 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " osm_catering_points_in_0.01 | \n",
+ " 0.48 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " osm_leisure_points_in_0.01 | \n",
+ " 0.47 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " osm_offices_points_in_0.01 | \n",
+ " 0.47 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " osm_shops_points_in_0.01 | \n",
+ " 0.47 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " osm_healthcare_points_in_0.01 | \n",
+ " 0.46 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " osm_transport_stop_points_in_0.01 | \n",
+ " 0.46 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature phik\n",
+ "0 per_square_meter_price 1.00\n",
+ "9 osm_crossing_points_in_0.01 0.57\n",
+ "6 osm_city_nearest_name 0.57\n",
+ "7 osm_city_nearest_population 0.55\n",
+ "26 region 0.52\n",
+ "2 osm_amenity_points_in_0.01 0.48\n",
+ "4 osm_catering_points_in_0.01 0.48\n",
+ "14 osm_leisure_points_in_0.01 0.47\n",
+ "15 osm_offices_points_in_0.01 0.47\n",
+ "16 osm_shops_points_in_0.01 0.47\n",
+ "11 osm_healthcare_points_in_0.01 0.46\n",
+ "21 osm_transport_stop_points_in_0.01 0.46"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Формируем список переменных с максимальным коэффициентов корреляции phik с целевой переменной \n",
+ "phik_max_corr = (\n",
+ " phik_overview['per_square_meter_price'].to_frame().reset_index()\n",
+ " .rename(columns={'per_square_meter_price':'phik', 'index':'feature'})\n",
+ " .sort_values(by='phik', ascending=False)\n",
+ " .query('phik > 0.4')\n",
+ " .round(2)\n",
+ " )\n",
+ "phik_max_corr\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Матрица корреляции phik показала максимальную взаимосвязь целевой переменной с пременными:\n",
+ "- количество пешеходных переходов в радиусе 1 км\n",
+ "- название ближайшего города\n",
+ "- население ближайшего города\n",
+ "- регион\n",
+ "- количество в радиусе 1 км точек кейтеринга, досуга, офисов, магазинов, медучреждений, остановок общественного транспорта и объектов связаннных с удобством\n",
+ "\n",
+ "Также обнаружены новые взаимосвязи переменных между собой, которые не были видны на матрице корреляции Пирсона:\n",
+ "- этаж и price_type\n",
+ "- название близлежащего города и население ближайшего города, расстояние до ближайшего метро, остановки общественного транспорта, регион\n",
+ "- регион и расстояние до ближайшего метро"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Modelling"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# add features\n",
+ "city_population = pd.read_csv('city_population.csv')\n",
+ "zarplaty = pd.read_excel('zarplaty.xlsx')\n",
+ "\n",
+ "def city_type(row):\n",
+ " if row >=1000000:\n",
+ " return \"1Million\"\n",
+ " elif (row<1000000)&(row >200000):\n",
+ " return \"Medium\"\n",
+ " elif (row <=200000):\n",
+ " return \"Small\"\n",
+ " \n",
+ "def floor_type(row):\n",
+ " if ('1' in str(row))&(row!=-1):\n",
+ " return 1\n",
+ " else:\n",
+ " return 0\n",
+ " \n",
+ "def add_features(df):\n",
+ " df['age'] = round(2021 - df['reform_mean_year_building_500'])\n",
+ " df.city = df.city.apply(lambda x: x.lower())\n",
+ " \n",
+ " city_population_clean = city_population.groupby('settlement').agg({'population':'sum'}).reset_index()\n",
+ " city_population_clean.columns = ['city', 'city_population']\n",
+ " city_population_clean['city_population']\n",
+ " city_population_clean.city = city_population_clean.city.apply(lambda x: x.lower())\n",
+ " df = df.merge(city_population_clean, on = 'city', how='left')\n",
+ " \n",
+ " for col in df.select_dtypes(include=np.number).columns:\n",
+ " df[col] = pd.to_numeric(df[col], downcast = 'unsigned')\n",
+ " \n",
+ " df['city_type'] = df['city_population'].apply(lambda x: city_type(x))\n",
+ " df.loc[df.city == 'москва', 'city_type'] = \"Capital\"\n",
+ " df.loc[df.city == 'санкт-Петербург', 'city_type'] = \"Capital\"\n",
+ " \n",
+ " df = df.merge(zarplaty, on = 'region', how='left')\n",
+ " df['zarplata'] = pd.to_numeric(df['zarplata'], downcast = 'unsigned')\n",
+ " df['floor_type'] = df['floor'].apply(lambda x: floor_type(x))\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_data = add_features(train_data)\n",
+ "test_data = add_features(test_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((279967, 82), (2974, 81))"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_data.shape, test_data.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " city | \n",
+ " floor | \n",
+ " id | \n",
+ " lat | \n",
+ " lng | \n",
+ " osm_amenity_points_in_0.001 | \n",
+ " osm_amenity_points_in_0.005 | \n",
+ " osm_amenity_points_in_0.0075 | \n",
+ " osm_amenity_points_in_0.01 | \n",
+ " osm_building_points_in_0.001 | \n",
+ " ... | \n",
+ " total_square | \n",
+ " street | \n",
+ " date | \n",
+ " realty_type | \n",
+ " price_type | \n",
+ " age | \n",
+ " city_population | \n",
+ " city_type | \n",
+ " zarplata | \n",
+ " floor_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " пермь | \n",
+ " 0 | \n",
+ " COL_0 | \n",
+ " 57.998207 | \n",
+ " 56.292797 | \n",
+ " 4 | \n",
+ " 19 | \n",
+ " 35 | \n",
+ " 52 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 32.0 | \n",
+ " S27289 | \n",
+ " 2020-01-05 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 60.0 | \n",
+ " 1048011.0 | \n",
+ " 1Million | \n",
+ " 41958.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " шатура | \n",
+ " 0 | \n",
+ " COL_1 | \n",
+ " 55.574284 | \n",
+ " 39.543835 | \n",
+ " 3 | \n",
+ " 24 | \n",
+ " 37 | \n",
+ " 59 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 280.0 | \n",
+ " S17052 | \n",
+ " 2020-01-05 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 64.0 | \n",
+ " 32885.0 | \n",
+ " Small | \n",
+ " 58066.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ярославль | \n",
+ " 0 | \n",
+ " COL_2 | \n",
+ " 57.619140 | \n",
+ " 39.850525 | \n",
+ " 1 | \n",
+ " 30 | \n",
+ " 67 | \n",
+ " 128 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 297.4 | \n",
+ " S16913 | \n",
+ " 2020-01-05 | \n",
+ " 110 | \n",
+ " 0 | \n",
+ " 48.0 | \n",
+ " 604128.0 | \n",
+ " Medium | \n",
+ " NaN | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " новокузнецк | \n",
+ " 0 | \n",
+ " COL_3 | \n",
+ " 53.897083 | \n",
+ " 87.108604 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 190.0 | \n",
+ " S10148 | \n",
+ " 2020-01-05 | \n",
+ " 110 | \n",
+ " 0 | \n",
+ " 7.0 | \n",
+ " 551919.0 | \n",
+ " Medium | \n",
+ " 43429.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " москва | \n",
+ " 0 | \n",
+ " COL_4 | \n",
+ " 55.802590 | \n",
+ " 37.487110 | \n",
+ " 1 | \n",
+ " 23 | \n",
+ " 64 | \n",
+ " 153 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 60.2 | \n",
+ " S1338 | \n",
+ " 2020-01-05 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 60.0 | \n",
+ " 12380691.0 | \n",
+ " Capital | \n",
+ " 100070.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 82 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " city floor id lat lng \\\n",
+ "0 пермь 0 COL_0 57.998207 56.292797 \n",
+ "1 шатура 0 COL_1 55.574284 39.543835 \n",
+ "2 ярославль 0 COL_2 57.619140 39.850525 \n",
+ "3 новокузнецк 0 COL_3 53.897083 87.108604 \n",
+ "4 москва 0 COL_4 55.802590 37.487110 \n",
+ "\n",
+ " osm_amenity_points_in_0.001 osm_amenity_points_in_0.005 \\\n",
+ "0 4 19 \n",
+ "1 3 24 \n",
+ "2 1 30 \n",
+ "3 0 0 \n",
+ "4 1 23 \n",
+ "\n",
+ " osm_amenity_points_in_0.0075 osm_amenity_points_in_0.01 \\\n",
+ "0 35 52 \n",
+ "1 37 59 \n",
+ "2 67 128 \n",
+ "3 5 21 \n",
+ "4 64 153 \n",
+ "\n",
+ " osm_building_points_in_0.001 ... total_square street date \\\n",
+ "0 0 ... 32.0 S27289 2020-01-05 \n",
+ "1 0 ... 280.0 S17052 2020-01-05 \n",
+ "2 0 ... 297.4 S16913 2020-01-05 \n",
+ "3 0 ... 190.0 S10148 2020-01-05 \n",
+ "4 0 ... 60.2 S1338 2020-01-05 \n",
+ "\n",
+ " realty_type price_type age city_population city_type zarplata \\\n",
+ "0 10 0 60.0 1048011.0 1Million 41958.0 \n",
+ "1 10 0 64.0 32885.0 Small 58066.0 \n",
+ "2 110 0 48.0 604128.0 Medium NaN \n",
+ "3 110 0 7.0 551919.0 Medium 43429.0 \n",
+ "4 10 0 60.0 12380691.0 Capital 100070.0 \n",
+ "\n",
+ " floor_type \n",
+ "0 0 \n",
+ "1 0 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 0 \n",
+ "\n",
+ "[5 rows x 82 columns]"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_data = train_data.query('price_type == 1')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_data.replace('1', 1, inplace=True)\n",
+ "train_data.replace('1.0', 1, inplace=True)\n",
+ "test_data.replace('1', 1, inplace=True)\n",
+ "test_data.replace('1.0', 1, inplace=True)\n",
+ "\n",
+ "train_data['floor'] = train_data.apply(lambda row: 1 if row['floor'] == 1 else 0, axis=1)\n",
+ "test_data['floor'] = test_data.apply(lambda row: 1 if row['floor'] == 1 else 0, axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {
+ "id": "811f6b6d"
+ },
+ "outputs": [],
+ "source": [
+ "N_THREADS = 4 # threads cnt for lgbm and linear models\n",
+ "N_FOLDS = 5 # folds cnt for AutoML\n",
+ "RANDOM_STATE = 42 # fixed random state for various reasons\n",
+ "#TEST_SIZE = 0.1 # Test size for metric check\n",
+ "TIMEOUT = 100 # Time in seconds for automl run USE TIMEOUT = 1700 for perfect score\n",
+ "\n",
+ "np.random.seed(RANDOM_STATE)\n",
+ "torch.set_num_threads(N_THREADS)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {
+ "id": "e6d51e72"
+ },
+ "outputs": [],
+ "source": [
+ "# train_df, test_df = train_test_split(train_data, \n",
+ "# test_size=TEST_SIZE, \n",
+ "# random_state=RANDOM_STATE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "THRESHOLD = 0.15\n",
+ "NEGATIVE_WEIGHT = 1.1\n",
+ "\n",
+ "def deviation_metric_one_sample(y_true: typing.Union[float, int], y_pred: typing.Union[float, int]) -> float:\n",
+ " \"\"\"\n",
+ " Реализация кастомной метрики для хакатона.\n",
+ "\n",
+ " :param y_true: float, реальная цена\n",
+ " :param y_pred: float, предсказанная цена\n",
+ " :return: float, значение метрики\n",
+ " \"\"\"\n",
+ " deviation = (y_pred - y_true) / np.maximum(1e-8, y_true)\n",
+ " if np.abs(deviation) <= THRESHOLD:\n",
+ " return 0\n",
+ " elif deviation <= - 4 * THRESHOLD:\n",
+ " return 9 * NEGATIVE_WEIGHT\n",
+ " elif deviation < -THRESHOLD:\n",
+ " return NEGATIVE_WEIGHT * ((deviation / THRESHOLD) + 1) ** 2\n",
+ " elif deviation < 4 * THRESHOLD:\n",
+ " return ((deviation / THRESHOLD) - 1) ** 2\n",
+ " else:\n",
+ " return 9\n",
+ "\n",
+ "\n",
+ "def deviation_metric(y_true: np.array, y_pred: np.array) -> float:\n",
+ " return np.array([deviation_metric_one_sample(y_true[n], y_pred[n]) for n in range(len(y_true))]).mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {
+ "id": "11901a60"
+ },
+ "outputs": [],
+ "source": [
+ "task = Task('reg', loss = 'rmsle', metric = deviation_metric)\n",
+ "\n",
+ "roles = {\n",
+ " 'target': 'per_square_meter_price',\n",
+ " 'drop': 'id'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 800
+ },
+ "id": "c77216e8",
+ "outputId": "ae6a7952-b341-40ed-f000-00f38639be74"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current random state: {'reader_params': {'random_state': 42}, 'general_params': {'return_all_predictions': False}}\n",
+ "Found reader_params in kwargs, need to combine\n",
+ "Merged variant for reader_params = {'n_jobs': 4, 'cv': 5, 'random_state': 42}\n",
+ "Start automl preset with listed constraints:\n",
+ "- time: 100.0 seconds\n",
+ "- cpus: 4 cores\n",
+ "- memory: 16 gb\n",
+ "\n",
+ "Train data shape: (4493, 82)\n",
+ "Feats was rejected during automatic roles guess: []\n",
+ "\n",
+ "\n",
+ "Layer 1 ...\n",
+ "Train process start. Time left 89.13144850730896 secs\n",
+ "Start fitting Lvl_0_Pipe_0_Mod_0_LinearL2 ...\n",
+ "\n",
+ "===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n",
+ "\n",
+ "Linear model: C = 1e-05 score = [-3.7152774]\n",
+ "Linear model: C = 5e-05 score = [-3.7152774]\n",
+ "Linear model: C = 0.0001 score = [-3.7152774]\n",
+ "\n",
+ "===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n",
+ "\n",
+ "Linear model: C = 1e-05 score = [-3.3165922]\n",
+ "Linear model: C = 5e-05 score = [-3.3165922]\n",
+ "Linear model: C = 0.0001 score = [-3.3165922]\n",
+ "\n",
+ "===== Start working with fold 2 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n",
+ "\n",
+ "Linear model: C = 1e-05 score = [-3.6910217]\n",
+ "Linear model: C = 5e-05 score = [-3.6910217]\n",
+ "Linear model: C = 0.0001 score = [-3.6910217]\n",
+ "\n",
+ "===== Start working with fold 3 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n",
+ "\n",
+ "Linear model: C = 1e-05 score = [-3.6205108]\n",
+ "Linear model: C = 5e-05 score = [-3.6205108]\n",
+ "Linear model: C = 0.0001 score = [-3.6205108]\n",
+ "\n",
+ "===== Start working with fold 4 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n",
+ "\n",
+ "Linear model: C = 1e-05 score = [-3.2158222]\n",
+ "Linear model: C = 5e-05 score = [-3.2158222]\n",
+ "Linear model: C = 0.0001 score = [-3.2158222]\n",
+ "Lvl_0_Pipe_0_Mod_0_LinearL2 fitting and predicting completed\n",
+ "Time left 86.83387517929077\n",
+ "Start fitting Lvl_0_Pipe_1_Mod_0_LightGBM ...\n",
+ "\n",
+ "===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_0_LightGBM =====\n",
+ "\n",
+ "Training until validation scores don't improve for 200 rounds\n",
+ "[100]\tvalid's l2: 0.175464\tvalid's Opt metric: 2.38008\n",
+ "[200]\tvalid's l2: 0.137539\tvalid's Opt metric: 2.00419\n",
+ "[300]\tvalid's l2: 0.126562\tvalid's Opt metric: 1.85852\n",
+ "[400]\tvalid's l2: 0.122176\tvalid's Opt metric: 1.77832\n",
+ "[500]\tvalid's l2: 0.120404\tvalid's Opt metric: 1.74085\n",
+ "[600]\tvalid's l2: 0.119403\tvalid's Opt metric: 1.71643\n",
+ "[700]\tvalid's l2: 0.118939\tvalid's Opt metric: 1.7027\n",
+ "[800]\tvalid's l2: 0.1189\tvalid's Opt metric: 1.69712\n",
+ "[900]\tvalid's l2: 0.118543\tvalid's Opt metric: 1.68706\n",
+ "[1000]\tvalid's l2: 0.11832\tvalid's Opt metric: 1.68094\n",
+ "[1100]\tvalid's l2: 0.118228\tvalid's Opt metric: 1.67885\n",
+ "[1200]\tvalid's l2: 0.118142\tvalid's Opt metric: 1.67488\n",
+ "[1300]\tvalid's l2: 0.117963\tvalid's Opt metric: 1.66822\n",
+ "[1400]\tvalid's l2: 0.117946\tvalid's Opt metric: 1.66472\n",
+ "[1500]\tvalid's l2: 0.118028\tvalid's Opt metric: 1.66498\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Time limit exceeded after calculating fold 0\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Early stopping, best iteration is:\n",
+ "[1336]\tvalid's l2: 0.117866\tvalid's Opt metric: 1.6654\n",
+ "Lvl_0_Pipe_1_Mod_0_LightGBM fitting and predicting completed\n",
+ "Start fitting Lvl_0_Pipe_1_Mod_2_CatBoost ...\n",
+ "\n",
+ "===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_2_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5690497\ttest: 0.5902307\tbest: 0.5902307 (0)\ttotal: 9.89ms\tremaining: 19.8s\n",
+ "100:\tlearn: 0.3220134\ttest: 0.3603304\tbest: 0.3603304 (100)\ttotal: 241ms\tremaining: 4.53s\n",
+ "200:\tlearn: 0.2853510\ttest: 0.3517094\tbest: 0.3517094 (200)\ttotal: 498ms\tremaining: 4.46s\n",
+ "300:\tlearn: 0.2576051\ttest: 0.3490464\tbest: 0.3490393 (299)\ttotal: 737ms\tremaining: 4.16s\n",
+ "400:\tlearn: 0.2379688\ttest: 0.3474727\tbest: 0.3474727 (400)\ttotal: 1s\tremaining: 4s\n",
+ "500:\tlearn: 0.2222526\ttest: 0.3449390\tbest: 0.3449390 (500)\ttotal: 1.23s\tremaining: 3.67s\n",
+ "600:\tlearn: 0.2078858\ttest: 0.3447732\tbest: 0.3443742 (549)\ttotal: 1.49s\tremaining: 3.46s\n",
+ "700:\tlearn: 0.1960459\ttest: 0.3442549\tbest: 0.3441099 (652)\ttotal: 1.75s\tremaining: 3.24s\n",
+ "800:\tlearn: 0.1857116\ttest: 0.3438032\tbest: 0.3436641 (740)\ttotal: 2s\tremaining: 2.99s\n",
+ "900:\tlearn: 0.1759209\ttest: 0.3433300\tbest: 0.3432126 (844)\ttotal: 2.22s\tremaining: 2.71s\n",
+ "1000:\tlearn: 0.1668783\ttest: 0.3428658\tbest: 0.3428565 (990)\ttotal: 2.47s\tremaining: 2.46s\n",
+ "1100:\tlearn: 0.1586743\ttest: 0.3419130\tbest: 0.3418887 (1090)\ttotal: 2.69s\tremaining: 2.2s\n",
+ "1200:\tlearn: 0.1510258\ttest: 0.3414564\tbest: 0.3414564 (1200)\ttotal: 2.94s\tremaining: 1.96s\n",
+ "1300:\tlearn: 0.1441223\ttest: 0.3414049\tbest: 0.3409616 (1261)\ttotal: 3.18s\tremaining: 1.71s\n",
+ "1400:\tlearn: 0.1377195\ttest: 0.3414637\tbest: 0.3409616 (1261)\ttotal: 3.44s\tremaining: 1.47s\n",
+ "1500:\tlearn: 0.1316497\ttest: 0.3414028\tbest: 0.3409616 (1261)\ttotal: 3.67s\tremaining: 1.22s\n",
+ "Stopped by overfitting detector (300 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3409616327\n",
+ "bestIteration = 1261\n",
+ "\n",
+ "Shrink model to first 1262 iterations.\n",
+ "\n",
+ "===== Start working with fold 1 for Lvl_0_Pipe_1_Mod_2_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5768234\ttest: 0.5574083\tbest: 0.5574083 (0)\ttotal: 3.21ms\tremaining: 6.42s\n",
+ "100:\tlearn: 0.3247511\ttest: 0.3336189\tbest: 0.3336189 (100)\ttotal: 246ms\tremaining: 4.62s\n",
+ "200:\tlearn: 0.2892813\ttest: 0.3252905\tbest: 0.3252353 (193)\ttotal: 536ms\tremaining: 4.79s\n",
+ "300:\tlearn: 0.2614113\ttest: 0.3229165\tbest: 0.3227958 (299)\ttotal: 767ms\tremaining: 4.33s\n",
+ "400:\tlearn: 0.2413630\ttest: 0.3221904\tbest: 0.3219651 (324)\ttotal: 1.02s\tremaining: 4.08s\n",
+ "500:\tlearn: 0.2248640\ttest: 0.3214037\tbest: 0.3211377 (457)\ttotal: 1.27s\tremaining: 3.81s\n",
+ "600:\tlearn: 0.2108310\ttest: 0.3208134\tbest: 0.3207517 (597)\ttotal: 1.56s\tremaining: 3.62s\n",
+ "700:\tlearn: 0.1988404\ttest: 0.3209918\tbest: 0.3207120 (654)\ttotal: 1.8s\tremaining: 3.33s\n",
+ "800:\tlearn: 0.1880008\ttest: 0.3214166\tbest: 0.3207120 (654)\ttotal: 2.05s\tremaining: 3.06s\n",
+ "900:\tlearn: 0.1778877\ttest: 0.3214287\tbest: 0.3207120 (654)\ttotal: 2.35s\tremaining: 2.86s\n",
+ "Stopped by overfitting detector (300 iterations wait)\n",
+ "\n",
+ "bestTest = 0.320711959\n",
+ "bestIteration = 654\n",
+ "\n",
+ "Shrink model to first 655 iterations.\n",
+ "\n",
+ "===== Start working with fold 2 for Lvl_0_Pipe_1_Mod_2_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5724626\ttest: 0.5753481\tbest: 0.5753481 (0)\ttotal: 3.17ms\tremaining: 6.33s\n",
+ "100:\tlearn: 0.3239383\ttest: 0.3399819\tbest: 0.3399819 (100)\ttotal: 266ms\tremaining: 5s\n",
+ "200:\tlearn: 0.2867294\ttest: 0.3285307\tbest: 0.3285307 (200)\ttotal: 512ms\tremaining: 4.58s\n",
+ "300:\tlearn: 0.2589989\ttest: 0.3255054\tbest: 0.3254546 (298)\ttotal: 750ms\tremaining: 4.23s\n",
+ "400:\tlearn: 0.2388187\ttest: 0.3247287\tbest: 0.3245618 (398)\ttotal: 976ms\tremaining: 3.89s\n",
+ "500:\tlearn: 0.2222920\ttest: 0.3240440\tbest: 0.3240057 (499)\ttotal: 1.24s\tremaining: 3.7s\n",
+ "600:\tlearn: 0.2086809\ttest: 0.3226891\tbest: 0.3226891 (600)\ttotal: 1.49s\tremaining: 3.46s\n",
+ "700:\tlearn: 0.1960510\ttest: 0.3210332\tbest: 0.3210024 (698)\ttotal: 1.74s\tremaining: 3.22s\n",
+ "800:\tlearn: 0.1854055\ttest: 0.3204972\tbest: 0.3204972 (800)\ttotal: 2s\tremaining: 2.99s\n",
+ "900:\tlearn: 0.1764789\ttest: 0.3205735\tbest: 0.3200469 (851)\ttotal: 2.28s\tremaining: 2.78s\n",
+ "1000:\tlearn: 0.1678033\ttest: 0.3209024\tbest: 0.3200469 (851)\ttotal: 2.52s\tremaining: 2.52s\n",
+ "1100:\tlearn: 0.1599763\ttest: 0.3211878\tbest: 0.3200469 (851)\ttotal: 2.79s\tremaining: 2.28s\n",
+ "Stopped by overfitting detector (300 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3200468666\n",
+ "bestIteration = 851\n",
+ "\n",
+ "Shrink model to first 852 iterations.\n",
+ "\n",
+ "===== Start working with fold 3 for Lvl_0_Pipe_1_Mod_2_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5652387\ttest: 0.6041895\tbest: 0.6041895 (0)\ttotal: 3.64ms\tremaining: 7.28s\n",
+ "100:\tlearn: 0.3086002\ttest: 0.3976876\tbest: 0.3976876 (100)\ttotal: 264ms\tremaining: 4.97s\n",
+ "200:\tlearn: 0.2756635\ttest: 0.3880626\tbest: 0.3880626 (200)\ttotal: 501ms\tremaining: 4.49s\n",
+ "300:\tlearn: 0.2511624\ttest: 0.3852988\tbest: 0.3849651 (283)\ttotal: 754ms\tremaining: 4.25s\n",
+ "400:\tlearn: 0.2321052\ttest: 0.3859294\tbest: 0.3849651 (283)\ttotal: 997ms\tremaining: 3.97s\n",
+ "500:\tlearn: 0.2175329\ttest: 0.3865094\tbest: 0.3849651 (283)\ttotal: 1.26s\tremaining: 3.77s\n",
+ "Stopped by overfitting detector (300 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3849650533\n",
+ "bestIteration = 283\n",
+ "\n",
+ "Shrink model to first 284 iterations.\n",
+ "\n",
+ "===== Start working with fold 4 for Lvl_0_Pipe_1_Mod_2_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5819025\ttest: 0.5416817\tbest: 0.5416817 (0)\ttotal: 16.8ms\tremaining: 33.5s\n",
+ "100:\tlearn: 0.3282525\ttest: 0.3302178\tbest: 0.3302178 (100)\ttotal: 292ms\tremaining: 5.5s\n",
+ "200:\tlearn: 0.2935099\ttest: 0.3155985\tbest: 0.3154939 (198)\ttotal: 559ms\tremaining: 5s\n",
+ "300:\tlearn: 0.2640830\ttest: 0.3098426\tbest: 0.3098426 (300)\ttotal: 817ms\tremaining: 4.61s\n",
+ "400:\tlearn: 0.2425758\ttest: 0.3053681\tbest: 0.3053681 (400)\ttotal: 1.08s\tremaining: 4.31s\n",
+ "500:\tlearn: 0.2263103\ttest: 0.3027193\tbest: 0.3024671 (488)\ttotal: 1.35s\tremaining: 4.05s\n",
+ "600:\tlearn: 0.2119579\ttest: 0.3012696\tbest: 0.3011144 (582)\ttotal: 1.62s\tremaining: 3.78s\n",
+ "700:\tlearn: 0.1987671\ttest: 0.2997913\tbest: 0.2997913 (700)\ttotal: 1.89s\tremaining: 3.5s\n",
+ "800:\tlearn: 0.1877085\ttest: 0.2993222\tbest: 0.2991978 (796)\ttotal: 2.15s\tremaining: 3.21s\n",
+ "900:\tlearn: 0.1779556\ttest: 0.2988217\tbest: 0.2988217 (900)\ttotal: 2.41s\tremaining: 2.94s\n",
+ "1000:\tlearn: 0.1691039\ttest: 0.2991610\tbest: 0.2988179 (904)\ttotal: 2.67s\tremaining: 2.66s\n",
+ "1100:\tlearn: 0.1609237\ttest: 0.2994798\tbest: 0.2988179 (904)\ttotal: 2.92s\tremaining: 2.38s\n",
+ "1200:\tlearn: 0.1537691\ttest: 0.2996605\tbest: 0.2988179 (904)\ttotal: 3.17s\tremaining: 2.11s\n",
+ "Stopped by overfitting detector (300 iterations wait)\n",
+ "\n",
+ "bestTest = 0.2988178835\n",
+ "bestIteration = 904\n",
+ "\n",
+ "Shrink model to first 905 iterations.\n",
+ "Lvl_0_Pipe_1_Mod_2_CatBoost fitting and predicting completed\n",
+ "Optuna may run 1 secs\n",
+ "Start fitting Lvl_0_Pipe_1_Mod_3_CatBoost ...\n",
+ "\n",
+ "===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_3_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5696811\ttest: 0.5907190\tbest: 0.5907190 (0)\ttotal: 4.77ms\tremaining: 9.53s\n",
+ "100:\tlearn: 0.3328412\ttest: 0.3613108\tbest: 0.3613108 (100)\ttotal: 196ms\tremaining: 3.68s\n",
+ "200:\tlearn: 0.3027316\ttest: 0.3519878\tbest: 0.3519878 (200)\ttotal: 368ms\tremaining: 3.29s\n",
+ "300:\tlearn: 0.2794207\ttest: 0.3487270\tbest: 0.3487270 (300)\ttotal: 582ms\tremaining: 3.29s\n",
+ "400:\tlearn: 0.2634165\ttest: 0.3479933\tbest: 0.3479933 (400)\ttotal: 775ms\tremaining: 3.09s\n",
+ "500:\tlearn: 0.2504892\ttest: 0.3473465\tbest: 0.3470908 (452)\ttotal: 961ms\tremaining: 2.88s\n",
+ "600:\tlearn: 0.2399381\ttest: 0.3461447\tbest: 0.3458834 (593)\ttotal: 1.17s\tremaining: 2.72s\n",
+ "700:\tlearn: 0.2293243\ttest: 0.3446773\tbest: 0.3445460 (694)\ttotal: 1.35s\tremaining: 2.5s\n",
+ "800:\tlearn: 0.2201892\ttest: 0.3445020\tbest: 0.3441724 (728)\ttotal: 1.56s\tremaining: 2.34s\n",
+ "900:\tlearn: 0.2122752\ttest: 0.3439144\tbest: 0.3438057 (883)\ttotal: 1.76s\tremaining: 2.15s\n",
+ "1000:\tlearn: 0.2050941\ttest: 0.3442635\tbest: 0.3438057 (883)\ttotal: 1.95s\tremaining: 1.95s\n",
+ "1100:\tlearn: 0.1984862\ttest: 0.3437866\tbest: 0.3437749 (1051)\ttotal: 2.14s\tremaining: 1.75s\n",
+ "1200:\tlearn: 0.1924283\ttest: 0.3433972\tbest: 0.3433483 (1198)\ttotal: 2.36s\tremaining: 1.57s\n",
+ "1300:\tlearn: 0.1863886\ttest: 0.3431625\tbest: 0.3430420 (1288)\ttotal: 2.56s\tremaining: 1.38s\n",
+ "1400:\tlearn: 0.1807727\ttest: 0.3427477\tbest: 0.3427039 (1394)\ttotal: 2.74s\tremaining: 1.17s\n",
+ "1500:\tlearn: 0.1756772\ttest: 0.3423999\tbest: 0.3423999 (1500)\ttotal: 2.95s\tremaining: 979ms\n",
+ "1600:\tlearn: 0.1709954\ttest: 0.3426386\tbest: 0.3421255 (1541)\ttotal: 3.12s\tremaining: 779ms\n",
+ "1700:\tlearn: 0.1662324\ttest: 0.3423893\tbest: 0.3421255 (1541)\ttotal: 3.35s\tremaining: 589ms\n",
+ "1800:\tlearn: 0.1620149\ttest: 0.3419531\tbest: 0.3419456 (1796)\ttotal: 3.52s\tremaining: 390ms\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1900:\tlearn: 0.1579787\ttest: 0.3420298\tbest: 0.3418931 (1815)\ttotal: 3.71s\tremaining: 193ms\n",
+ "1999:\tlearn: 0.1538067\ttest: 0.3423697\tbest: 0.3418931 (1815)\ttotal: 3.89s\tremaining: 0us\n",
+ "\n",
+ "bestTest = 0.3418931473\n",
+ "bestIteration = 1815\n",
+ "\n",
+ "Shrink model to first 1816 iterations.\n",
+ "Lvl_0_Pipe_1_Mod_3_CatBoost fitting and predicting completed\n",
+ "Start fitting Lvl_0_Pipe_1_Mod_3_CatBoost ...\n",
+ "\n",
+ "===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_3_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5749238\ttest: 0.5962756\tbest: 0.5962756 (0)\ttotal: 3.23ms\tremaining: 9.69s\n",
+ "100:\tlearn: 0.3538624\ttest: 0.3750769\tbest: 0.3750769 (100)\ttotal: 205ms\tremaining: 5.9s\n",
+ "200:\tlearn: 0.3247688\ttest: 0.3579003\tbest: 0.3579003 (200)\ttotal: 397ms\tremaining: 5.53s\n",
+ "300:\tlearn: 0.3067646\ttest: 0.3529264\tbest: 0.3529264 (300)\ttotal: 596ms\tremaining: 5.34s\n",
+ "400:\tlearn: 0.2906869\ttest: 0.3494385\tbest: 0.3494385 (400)\ttotal: 773ms\tremaining: 5.01s\n",
+ "500:\tlearn: 0.2787513\ttest: 0.3480935\tbest: 0.3480935 (500)\ttotal: 940ms\tremaining: 4.69s\n",
+ "600:\tlearn: 0.2682686\ttest: 0.3470561\tbest: 0.3470561 (600)\ttotal: 1.14s\tremaining: 4.56s\n",
+ "700:\tlearn: 0.2590796\ttest: 0.3463205\tbest: 0.3463073 (694)\ttotal: 1.33s\tremaining: 4.36s\n",
+ "800:\tlearn: 0.2514009\ttest: 0.3451208\tbest: 0.3451208 (800)\ttotal: 1.51s\tremaining: 4.16s\n",
+ "900:\tlearn: 0.2444905\ttest: 0.3440930\tbest: 0.3440841 (895)\ttotal: 1.71s\tremaining: 3.98s\n",
+ "1000:\tlearn: 0.2378982\ttest: 0.3435150\tbest: 0.3434602 (992)\ttotal: 1.9s\tremaining: 3.79s\n",
+ "1100:\tlearn: 0.2322087\ttest: 0.3431667\tbest: 0.3431153 (1075)\ttotal: 2.1s\tremaining: 3.63s\n",
+ "1200:\tlearn: 0.2265210\ttest: 0.3425909\tbest: 0.3425467 (1188)\ttotal: 2.29s\tremaining: 3.42s\n",
+ "1300:\tlearn: 0.2213036\ttest: 0.3420838\tbest: 0.3420157 (1288)\ttotal: 2.46s\tremaining: 3.21s\n",
+ "1400:\tlearn: 0.2163428\ttest: 0.3418436\tbest: 0.3417805 (1394)\ttotal: 2.67s\tremaining: 3.05s\n",
+ "1500:\tlearn: 0.2115906\ttest: 0.3414778\tbest: 0.3414778 (1500)\ttotal: 2.88s\tremaining: 2.87s\n",
+ "1600:\tlearn: 0.2070074\ttest: 0.3410625\tbest: 0.3410625 (1600)\ttotal: 3.09s\tremaining: 2.7s\n",
+ "1700:\tlearn: 0.2026507\ttest: 0.3405289\tbest: 0.3405001 (1679)\ttotal: 3.27s\tremaining: 2.5s\n",
+ "1800:\tlearn: 0.1988309\ttest: 0.3401400\tbest: 0.3401251 (1798)\ttotal: 3.48s\tremaining: 2.31s\n",
+ "1900:\tlearn: 0.1952427\ttest: 0.3398704\tbest: 0.3396758 (1880)\ttotal: 3.68s\tremaining: 2.13s\n",
+ "2000:\tlearn: 0.1915851\ttest: 0.3396242\tbest: 0.3396191 (1999)\ttotal: 3.88s\tremaining: 1.94s\n",
+ "2100:\tlearn: 0.1882232\ttest: 0.3395831\tbest: 0.3395527 (2074)\ttotal: 4.07s\tremaining: 1.74s\n",
+ "2200:\tlearn: 0.1848821\ttest: 0.3393809\tbest: 0.3393421 (2165)\ttotal: 4.31s\tremaining: 1.56s\n",
+ "2300:\tlearn: 0.1817400\ttest: 0.3391877\tbest: 0.3391877 (2300)\ttotal: 4.5s\tremaining: 1.37s\n",
+ "2400:\tlearn: 0.1787974\ttest: 0.3391625\tbest: 0.3391625 (2400)\ttotal: 4.69s\tremaining: 1.17s\n",
+ "2500:\tlearn: 0.1756702\ttest: 0.3388642\tbest: 0.3388608 (2499)\ttotal: 4.88s\tremaining: 973ms\n",
+ "2600:\tlearn: 0.1728922\ttest: 0.3389253\tbest: 0.3388231 (2508)\ttotal: 5.09s\tremaining: 781ms\n",
+ "Stopped by overfitting detector (100 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3388231235\n",
+ "bestIteration = 2508\n",
+ "\n",
+ "Shrink model to first 2509 iterations.\n",
+ "\n",
+ "===== Start working with fold 1 for Lvl_0_Pipe_1_Mod_3_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5830078\ttest: 0.5633221\tbest: 0.5633221 (0)\ttotal: 8.6ms\tremaining: 25.8s\n",
+ "100:\tlearn: 0.3589969\ttest: 0.3533597\tbest: 0.3533597 (100)\ttotal: 196ms\tremaining: 5.61s\n",
+ "200:\tlearn: 0.3307810\ttest: 0.3362463\tbest: 0.3362463 (200)\ttotal: 377ms\tremaining: 5.25s\n",
+ "300:\tlearn: 0.3120042\ttest: 0.3301034\tbest: 0.3301034 (300)\ttotal: 551ms\tremaining: 4.94s\n",
+ "400:\tlearn: 0.2950774\ttest: 0.3273159\tbest: 0.3273074 (398)\ttotal: 732ms\tremaining: 4.74s\n",
+ "500:\tlearn: 0.2822802\ttest: 0.3260960\tbest: 0.3260960 (500)\ttotal: 908ms\tremaining: 4.53s\n",
+ "600:\tlearn: 0.2718319\ttest: 0.3244773\tbest: 0.3244541 (597)\ttotal: 1.11s\tremaining: 4.42s\n",
+ "700:\tlearn: 0.2631367\ttest: 0.3236967\tbest: 0.3236967 (700)\ttotal: 1.29s\tremaining: 4.23s\n",
+ "800:\tlearn: 0.2555498\ttest: 0.3241221\tbest: 0.3236444 (718)\ttotal: 1.48s\tremaining: 4.05s\n",
+ "Stopped by overfitting detector (100 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3236443836\n",
+ "bestIteration = 718\n",
+ "\n",
+ "Shrink model to first 719 iterations.\n",
+ "\n",
+ "===== Start working with fold 2 for Lvl_0_Pipe_1_Mod_3_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5784948\ttest: 0.5817224\tbest: 0.5817224 (0)\ttotal: 15.4ms\tremaining: 46.3s\n",
+ "100:\tlearn: 0.3581544\ttest: 0.3571058\tbest: 0.3571058 (100)\ttotal: 211ms\tremaining: 6.05s\n",
+ "200:\tlearn: 0.3282811\ttest: 0.3383338\tbest: 0.3383338 (200)\ttotal: 395ms\tremaining: 5.5s\n",
+ "300:\tlearn: 0.3097713\ttest: 0.3322135\tbest: 0.3320776 (297)\ttotal: 573ms\tremaining: 5.14s\n",
+ "400:\tlearn: 0.2938576\ttest: 0.3299511\tbest: 0.3299511 (400)\ttotal: 769ms\tremaining: 4.98s\n",
+ "500:\tlearn: 0.2805774\ttest: 0.3287883\tbest: 0.3287883 (500)\ttotal: 946ms\tremaining: 4.72s\n",
+ "600:\tlearn: 0.2703739\ttest: 0.3269768\tbest: 0.3269716 (599)\ttotal: 1.13s\tremaining: 4.5s\n",
+ "700:\tlearn: 0.2616548\ttest: 0.3258652\tbest: 0.3257247 (696)\ttotal: 1.31s\tremaining: 4.29s\n",
+ "800:\tlearn: 0.2540794\ttest: 0.3248115\tbest: 0.3246713 (780)\ttotal: 1.49s\tremaining: 4.09s\n",
+ "900:\tlearn: 0.2470055\ttest: 0.3240086\tbest: 0.3239652 (896)\ttotal: 1.68s\tremaining: 3.9s\n",
+ "1000:\tlearn: 0.2407159\ttest: 0.3232904\tbest: 0.3232768 (993)\ttotal: 1.87s\tremaining: 3.74s\n",
+ "1100:\tlearn: 0.2349685\ttest: 0.3226389\tbest: 0.3224739 (1072)\ttotal: 2.08s\tremaining: 3.58s\n",
+ "1200:\tlearn: 0.2291905\ttest: 0.3219706\tbest: 0.3219349 (1195)\ttotal: 2.28s\tremaining: 3.42s\n",
+ "1300:\tlearn: 0.2242843\ttest: 0.3216330\tbest: 0.3216282 (1299)\ttotal: 2.48s\tremaining: 3.24s\n",
+ "1400:\tlearn: 0.2193816\ttest: 0.3213742\tbest: 0.3211652 (1345)\ttotal: 2.71s\tremaining: 3.09s\n",
+ "1500:\tlearn: 0.2150231\ttest: 0.3210364\tbest: 0.3209320 (1480)\ttotal: 2.9s\tremaining: 2.9s\n",
+ "1600:\tlearn: 0.2108812\ttest: 0.3206202\tbest: 0.3206202 (1600)\ttotal: 3.08s\tremaining: 2.69s\n",
+ "1700:\tlearn: 0.2066767\ttest: 0.3199278\tbest: 0.3199118 (1699)\ttotal: 3.27s\tremaining: 2.5s\n",
+ "1800:\tlearn: 0.2027083\ttest: 0.3198993\tbest: 0.3198815 (1726)\ttotal: 3.48s\tremaining: 2.31s\n",
+ "Stopped by overfitting detector (100 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3198814937\n",
+ "bestIteration = 1726\n",
+ "\n",
+ "Shrink model to first 1727 iterations.\n",
+ "\n",
+ "===== Start working with fold 3 for Lvl_0_Pipe_1_Mod_3_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5710290\ttest: 0.6096694\tbest: 0.6096694 (0)\ttotal: 2.86ms\tremaining: 8.58s\n",
+ "100:\tlearn: 0.3444958\ttest: 0.4152803\tbest: 0.4152803 (100)\ttotal: 224ms\tremaining: 6.42s\n",
+ "200:\tlearn: 0.3159040\ttest: 0.3996337\tbest: 0.3996337 (200)\ttotal: 420ms\tremaining: 5.85s\n",
+ "300:\tlearn: 0.2999551\ttest: 0.3924528\tbest: 0.3924528 (300)\ttotal: 601ms\tremaining: 5.38s\n",
+ "400:\tlearn: 0.2851919\ttest: 0.3881614\tbest: 0.3881614 (400)\ttotal: 768ms\tremaining: 4.98s\n",
+ "500:\tlearn: 0.2728422\ttest: 0.3868374\tbest: 0.3867807 (490)\ttotal: 936ms\tremaining: 4.67s\n",
+ "600:\tlearn: 0.2631856\ttest: 0.3861455\tbest: 0.3861455 (600)\ttotal: 1.1s\tremaining: 4.4s\n",
+ "700:\tlearn: 0.2550000\ttest: 0.3857666\tbest: 0.3856754 (668)\ttotal: 1.29s\tremaining: 4.25s\n",
+ "800:\tlearn: 0.2474017\ttest: 0.3856947\tbest: 0.3856013 (792)\ttotal: 1.49s\tremaining: 4.09s\n",
+ "900:\tlearn: 0.2405549\ttest: 0.3855558\tbest: 0.3855096 (888)\ttotal: 1.68s\tremaining: 3.9s\n",
+ "Stopped by overfitting detector (100 iterations wait)\n",
+ "\n",
+ "bestTest = 0.3855095628\n",
+ "bestIteration = 888\n",
+ "\n",
+ "Shrink model to first 889 iterations.\n",
+ "\n",
+ "===== Start working with fold 4 for Lvl_0_Pipe_1_Mod_3_CatBoost =====\n",
+ "\n",
+ "0:\tlearn: 0.5877937\ttest: 0.5433457\tbest: 0.5433457 (0)\ttotal: 2.46ms\tremaining: 7.39s\n",
+ "100:\tlearn: 0.3635841\ttest: 0.3502432\tbest: 0.3502432 (100)\ttotal: 205ms\tremaining: 5.9s\n",
+ "200:\tlearn: 0.3350821\ttest: 0.3277620\tbest: 0.3277620 (200)\ttotal: 394ms\tremaining: 5.48s\n",
+ "300:\tlearn: 0.3163325\ttest: 0.3186592\tbest: 0.3186268 (298)\ttotal: 581ms\tremaining: 5.21s\n",
+ "400:\tlearn: 0.2991828\ttest: 0.3114547\tbest: 0.3114547 (400)\ttotal: 765ms\tremaining: 4.96s\n",
+ "500:\tlearn: 0.2853312\ttest: 0.3067744\tbest: 0.3067744 (500)\ttotal: 961ms\tremaining: 4.79s\n",
+ "600:\tlearn: 0.2746053\ttest: 0.3043128\tbest: 0.3043128 (600)\ttotal: 1.19s\tremaining: 4.75s\n",
+ "700:\tlearn: 0.2651369\ttest: 0.3024191\tbest: 0.3024121 (699)\ttotal: 1.4s\tremaining: 4.6s\n",
+ "800:\tlearn: 0.2573767\ttest: 0.3007672\tbest: 0.3007672 (800)\ttotal: 1.61s\tremaining: 4.43s\n",
+ "900:\tlearn: 0.2503800\ttest: 0.2996296\tbest: 0.2996044 (890)\ttotal: 1.85s\tremaining: 4.3s\n",
+ "1000:\tlearn: 0.2433897\ttest: 0.2990408\tbest: 0.2990408 (1000)\ttotal: 2.05s\tremaining: 4.1s\n",
+ "1100:\tlearn: 0.2373764\ttest: 0.2977549\tbest: 0.2977112 (1099)\ttotal: 2.24s\tremaining: 3.87s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1200:\tlearn: 0.2311849\ttest: 0.2972003\tbest: 0.2971525 (1196)\ttotal: 2.42s\tremaining: 3.63s\n",
+ "1300:\tlearn: 0.2256356\ttest: 0.2963918\tbest: 0.2963918 (1300)\ttotal: 2.62s\tremaining: 3.42s\n",
+ "1400:\tlearn: 0.2204745\ttest: 0.2958023\tbest: 0.2958023 (1400)\ttotal: 2.81s\tremaining: 3.2s\n",
+ "1500:\tlearn: 0.2158370\ttest: 0.2953656\tbest: 0.2953011 (1496)\ttotal: 2.98s\tremaining: 2.97s\n",
+ "1600:\tlearn: 0.2115284\ttest: 0.2947484\tbest: 0.2947203 (1567)\ttotal: 3.14s\tremaining: 2.75s\n",
+ "1700:\tlearn: 0.2073122\ttest: 0.2944945\tbest: 0.2944290 (1697)\ttotal: 3.37s\tremaining: 2.57s\n",
+ "Stopped by overfitting detector (100 iterations wait)\n",
+ "\n",
+ "bestTest = 0.2944289789\n",
+ "bestIteration = 1697\n",
+ "\n",
+ "Shrink model to first 1698 iterations.\n",
+ "Lvl_0_Pipe_1_Mod_3_CatBoost fitting and predicting completed\n",
+ "Time left 37.47469425201416\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Time limit exceeded in one of the tasks. AutoML will blend level 1 models.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Blending: Optimization starts with equal weights and score -1.7954214419609544\n",
+ "Blending, iter 0: score = -1.4724501895733335, weights = [0. 0.86662203 0.05572809 0.07764989]\n",
+ "Blending, iter 1: score = -1.4661120519816215, weights = [0. 0.7553367 0.10243508 0.14222825]\n",
+ "Blending, iter 2: score = -1.4661120519816215, weights = [0. 0.7553367 0.10243508 0.14222825]\n",
+ "No score update. Terminated\n",
+ "\n",
+ "Automl preset training completed in 66.05 seconds.\n"
+ ]
+ }
+ ],
+ "source": [
+ "automl = TabularUtilizedAutoML(task = task,\n",
+ " timeout = TIMEOUT,\n",
+ " cpu_limit = N_THREADS,\n",
+ " reader_params = {'n_jobs': N_THREADS,\n",
+ " 'cv': N_FOLDS,\n",
+ " 'random_state': RANDOM_STATE})\n",
+ "\n",
+ "oof_pred = automl.fit_predict(train_data, roles = roles)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#deviation_metric(np.array(train_df['per_square_meter_price']), oof_pred.data[:, 0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output['per_square_meter_price'] = output['per_square_meter_price'] * 0.9\n",
+ "\n",
+ "output.loc[output['per_square_meter_price'] >= 200000, 'per_square_meter_price'] \\\n",
+ " = output.loc[output['per_square_meter_price'] >= 200000, 'per_square_meter_price'] * 0.9"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {
+ "id": "e0d6dfe7"
+ },
+ "outputs": [],
+ "source": [
+ "output = pd.DataFrame({'id': test_data['id'],\n",
+ " 'per_square_meter_price': automl.predict(test_data).data[:, 0]})\n",
+ "output['per_square_meter_price'] = output['per_square_meter_price'] * 0.9\n",
+ "\n",
+ "output.loc[output['per_square_meter_price'] >= 200000, 'per_square_meter_price'] \\\n",
+ " = output.loc[output['per_square_meter_price'] >= 200000, 'per_square_meter_price'] * 0.9\n",
+ "\n",
+ "output.to_csv('raifHack_ki7.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2974, 2)"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "output.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [],
+ "name": "housePrice_AutoML.ipynb",
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Yapiki_publ/Read me.txt b/Yapiki_publ/Read me.txt
new file mode 100644
index 0000000..4f3107e
--- /dev/null
+++ b/Yapiki_publ/Read me.txt
@@ -0,0 +1,39 @@
+Решение основано на алгоритме LightAutoML с дополнительным обогащением данных из открытых источников (Росстат).
+
+
+- Проведен EDA инструментами pandas_profile и сравнениями корреляции метрик разными методами в зависимости от фичей -
+Pandas profiling - https://drive.google.com/file/d/1xQl3LvpX9J0G6gJoaBjzRcBFKZi6QZXz/view?usp=sharing
+- Введены дополнительные метрики: премиальность этажей и типирование города
+- Использованы дополнительные стат данные (среднемесячная заработная плата и численность населения по регионам, Источник: Росстат);
+- Учтен дисконт, закладываемый при ручном расчете (использовали вариант из публичного решения, ссылка на github: https://github.com/BatyaZhizni/Raifhack-DS)
+
+
+Для обогащения данных использованы дополнительные датасеты:
+
+
+1. zarplaty.xlsx - Среднемесячная номинальная начисленная заработная плата работников в целом по экономике по субъектам Российской Федерации за 2000-2020 гг. Источник: Росстат, https://rosstat.gov.ru/labor_market_employment_salaries Ссылка - https://docs.google.com/spreadsheets/d/1S1ORmz2W4QTG-d8odUOqT6Czu21NF2Vw/edit?usp=sharing&ouid=108685579276627434305&rtpof=true&sd=true
+2. zarplaty.xlsx - Численность населения по населенным пунктам России. Источник: Росстат, https://rosstat.gov.ru/folder/12781. Ссылка: https://drive.google.com/file/d/19hJI_zlTZboxSh_JwPrWt8vYx9lkNlM0/view?usp=sharing
+
+
+Для обучения использовали модель LightAutoML*
+
+
+1. LightAutoML project from Sberbank AI Lab AutoML group is the framework for automatic classification and regression model creation.
+
+
+Authors: Alexander Ryzhkov, Anton Vakhrushev, Dmitry Simakov, Vasilii Bunakov, Rinchin Damdinov, Pavel Shvets, Alexander Kirilin
+
+
+2. Библиотеки
+!pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
+import pandas as pd
+import numpy as np
+import typing
+import torch
+import matplotlib.pyplot as plt
+
+
+!pip install -U lightautoml
+from sklearn.model_selection import train_test_split
+from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
+from lightautoml.tasks import Task
\ No newline at end of file
diff --git a/Yapiki_publ/city_population.rar b/Yapiki_publ/city_population.rar
new file mode 100644
index 0000000000000000000000000000000000000000..4cad56640f48e616e7e2e15fc706d253f89c5886
GIT binary patch
literal 5238874
zcmV((K;XYpVR9iF2LS*^$S4a90R;&F2mt}`-@k%@06J@sPB#Jr3!>k@f`9-7(7V!&
zfB+!O+APw5LjV(FX>@sCaBpyRY+-b1Z*DGQb9M>?0?o=5&OWl&0gLENbwGARL_;)1
zKvz`v#7xlT2bN-B2aes3qH{v=Ejd)2%1pkL%d7C`nIKUlRvStH9*ZR;fD$qsfBmozquKUy``*vb+3NOq
zIeq_UpV{8!>wB9H`1FI#@ACBdf6#r0v#}%d<@hb>eBRHLuYdx_(>Dvc4uE?61Hz*~S$hwte*^0fYvJ8phtxM4()M=-
z=nJZwx%UG6JHh@9+nw$AhqOAw=`Nqq{U6?tgo?L_92l%Jr
zVUP$^KGHvhmo<@K~|ZCSupg^86bU8~_`K@?ZlFUmB_lX}KWG
zK=nf_XbZ?&P_{S#aqnD5LR=rR@SrjPSA=iJeWoXjuP4hpSTSh4oN&$As$O
zoG?nrbW1W}rjTPmuuSkBFa=AK9Wzmlu87%1Y)633py|@rS2AkdY1P>*(D;w@-eB!Z
z@0Z3tlsnJTSHywVUD)0~a)Z?6{vrO*N|whP>6vBXSnqU(C&q2KsUr=M)~70iwF?bWY_~D
z4?3YK2wYQ;bO2xsMvb6E7z|q#HlbkZQQD{Ax1+=Q%S1ix?1?4a^4HVjyHS9}yUOz9
zgD=E8qY?8%Mq%jU>w_Ws4@NNv1RjBD)9RP;`Oh->q6!isPn2^I0$lT0X0#_b%6$)>
z6jy&JtZEPjwCKeaV*oH+h;wVecmfoUOFC3|46+Z;-gJDyz0sV7!P?l1F1_RS*3_8q
zkU_z~HgJ0|L&iuBtJiif0?0cFh`!OwyjmTrm#lVtzHNYZr;M~N?@S^2N6`95=Uvey
zo{1gezyAF-7--*^>y&1b7F!*G3t)P!EUN+uFW{DrFjdi!Dw+CP}Y1t8GNs02=>^56tUWH&KMB&4jKop)XzQrrRitFCI)
zSOEZ^;JC7l$6F9{{{VG6ZJu=FT}l!9WR)RSY2n3`}g7aVYU3vLjJ2E!#emE5I^My3xzl
zeZAzO5JD;(F|rrfH{@YxX3!AD0rW%F16tf!uv{D>S7roN1wps!wYlz3FFv~(+b#Wu
zTj2ly5-%+W6P@HR1W|;&By}fboCOZIE(8an8Ru}#pW(e5lQI3CAw+_Gk-0D=4Z$By
zdMQwxrD7i>xR4x=@r8KAi&TsS9+ogpp#x<65`1VGl(hhkszWFTgf9OdmBfmL|EW5HhAKe}rV6mMGkwAQ
zpawnub4CxOLcbu0??2XsPnV_h*$1EtIgAk02D}LAnM1gzt=RQolE||FK`6gqNV$YS
zaaAvom`ttzLH<9S4ac`lB7o?TzAU`2rbuy=7)|`9S)ro}4MSZ(>LO0ToHZqi_F!zC
zEebt`sg=P46hmV12?hh9R(=*yGzh4GADB@x^7aR_(V_2heUdm~ps$&T6Ov_@u4rdP
z4KR{Lohd#ABa^@^4_}BHen`HkA(k#_I|~yY&k0=~^qudjsBcR$-h(8km*FI;J>v;V
z^K@Tt&_746y^Bn3yJQUp%@}ExjfonvJy*de!i|aV*m^A0VrEsrauCb<7KrVWgfP<8p)KS?il5xThM!WW4gg(fAP2D%Zmv5jB9|6Vc1q
z=9!!c;|VKLNyTi)d9jzIsA)%ga@>X_oiGcaA6{T<$3U+gasxVLdZ%@e!G0phUe@~l
z+`vh`UWaI<*^MPxq%Zeo*K4`*k%FG}6HRV#Kz+5=
zJC3rgmvmryki3UfTJACCsIxJ$*k1FCT`*TL%A!nagwat}alXsHXGy7?(1_3;SaXA}
z8nC9gk1Nc{FnsVW8Au{xRkjV5H!F@)iCgQ{3SPqN^PZ;|KTrujzsvESR``8NxheP{
zND@BtYObCnnyxuolw-dRtX7pWC1svlh>P?+y9c=E+WZPr&ft#Vvn%13#0HO%wM8E3
zr9u=m#ydbFsl=f4dat;`0kVVAk7-znNOAsCUV-hh4TSB2{9qfezSp?$Mbt4I^AoZc
zz!?H}7r#O#4*8r!Mqy-U%fQj^+?_&EJ~L1gw%iJpDUPRWk@eXxhlYO(R~o{dx@(oEvz~E_=*Id%8=3*VJ2s;58YK280Un49?`>@>v0xSYAStw#U
z=bw%14+r8Vj1OE#VdA)l>_L9%-smF*X_8rxCdua{t{eVC(j%-vFZe1=eRULe$MzDa
zA4z2V7K(3);vbts(fRkw^~7ZUpJ$6gQm_>Wetsr>BPfy32z`MtqGp!6jK9pee|wlq
z_m@-Y9qlPf=&+<1flykt5c2vuUd}`I9cq{Y1%KjZx~gggjMx@7Qwd~^hDdBFN`uvd
z742`un&EjJp0bnzS&s4hKP_6ALCx=$thTBjj0!
zuFShyv9e{z{Ig8NP%2n3e$e*)-i&^X&ld1WU4LhWc=Fi-*J+p^phUP{_*u6*)zL_f9|c5EqCK$GbZ=rn
zd2zc#@v;C>Bsqso?r9_2Z33HUDGu=EX8iOX>%Kt`FHlAO&;w&e#o2i;>zwOqD5#8%57+7+KIGWQuRjgs3e0B%b>
z9aV{M!+EVquyRiTHd3MF6h(Z}a|Iu#ptO*&3rqo>JU%>mB`TU}w&cO4k@-7`9&SQO
z%!s1^1L!I^r>lb27y$nWFOta
zFa;FB*-^4o2kd`H>j)@Qcj#l!U5(ZS1bLl4Uyni~_%z640-N4MEA#{Nd67LlL#v2u
zkX;tR3H}xaKL=!vp_K`lHh0p)o~6wIu<30qBFn9Z=o6rB^?33rl>)LH331c@d`qFc=rMDM8B^L{3?=#ei1Ch;&KN
zN&}U|MUWl-EOAe`HN8-C!w4&-8&RoPg4N6STGV@MzL7;_+zs;%J
z1&Bf{Pw#&OQ%DztHsoG3*C}qAzigoUvui+Uzbr?Vz!712>*@dfw9$T|&vnB{CVVc8N_5^5P3xgJzjD3qUtO>Ba-
z>H%ly^IaiCySrbvj3NcVlZ>~M{(v5T)V}5>36G`p5>+8s+*&-S$@UBL#KuxOKBaC5
z_2R^_00t5wD|tgWBq#VUT-j_{6Y<`lLukC{^sDLJ_H$(B0>;=Gw22^63~yN;o5-O&
z$6%JqOwcXHN;9{D|8ON~nOrsJ8v|08P%qgS=}RUk{4v}e0*sj(huR1~V?zVuFET&_
zf)D}d^9RWX>>V~KC^w0;*mg)tBrx9Dt18RgbwXMSwtP@&ub
zfSr;aZ-{h_B#+y&RJ%p?lOqJMd*jexl8tB1NLN5l&mniGG{ZvBSpn(`8B(}DYrA
zXJoGHdgMMv8GIHNv6*5ASv1=0g7HC+5SyY_+mBfs1*6727zR}UaaJuTb3xBuN6>gB
zB>F!F&V{5Ry`4NC>*MSIOgE=0=V!Pn=eizH0A4R
zh2+uM-sHWZxwYf`psATI28e_8gDG9gN7WJe)|$gd4PtRoH3%E%_w15F)0yAP&>*Z+
zX4(F!up3T&guW0{o=W1b0E1*WKa)}R;jWXoMWSB2a)PS{{Jd#GmI+-kxSWo1~gvVj4CwUHiNo+D&}h;)}_y_rZJaX%(-8Md0{K+Jwgj
zIsP_(T+7z@g4ICv*B+ZD42$_*I1?=CX-Vm!uY2CX`jR3YdFKgdQ9n3mbLB6YyVncI
zG1rY21%WpWB~+u;ZaOn>1hPf|o7mvQ3h-U%I2rM0$rN{vH)2T@I@rX2*oO@#Ba#I=+qWZvx(z)Z*OdK(wF)x;x
znB2BxG24Q|0p&x1G1oDjeNOXDpmSzcS-Vk7r&SNXW*y0DA$VvhINmwi)
z;#j0X0C*vm!mLmj!&1)Y+BS;4%9VQS227`aaWYlShUsLKY%?Sn0i+2l0(8n0q(YG#
z6IOy~ae7}`_A?R5IHnV8$2{%+g&BLIst3tWY4OvukeW2DHY|kk_eM=i5Hd^-K`nCDiYqp{KyAipj{ayBx%fN;zc*
zs|2%R1KWxn@gabf-VIE2fG8_eFD^sC=J%=QN7RX2b#|X59mV5Yak0R8y}sqtTOzIzU9t#O_z%VYlcoLgq
zPA)6&jH+2HAS^q^A-&1DI{QIGTZ_rGP#u5ue}HC(-O{N
z+jeL{UD5|?13if*bzPuMoD)&kZ2CzLELPw$S%_~&g1WKulo~)pjT6QmY)3nd$;iX8
zM2G+v<~9c%t8&2XZx?!f#&TD)(vb-Rz?h4q9gXIb$c4W_%Ef8LpxHbEu!59|FsD5j
z0e=8G9>&Su8d0ld`cuuO>(O@i*(Hj3OujjN^02*$44Dc>v__h>)m&${L|we6$;%
zxlylrFV{AV4rBlRR4NHuBSolEk6@UViG$Tr?@K$H#VPB-akBIcraTM|FS=~!JI73`
zHgXG(dHazg{yQv9eKd^)*kt3)A@?K0l4i_;hjCn;gP&mQ9u3=|Pa^5dT-Y=v7bjTO
zSPpg`Xch$>))_{j9x|elxRsQ%h-h0Ji(rjdYu?Aag{!XVDRAEyp$V{t6lIaW-DKFv
z$Leluy8$e_0EwCD#&D@97pB-gp;qOLleQ&+&E`CV@Ibao!~DbabiK+o1W}L79a#KV
zc47!y=>|kRXWhcK1IKZ%V&nz{oC+B8Z^v}WI~AMx0^fwOYodZ(H?b20(u!~!giBbe
zp$FbW4ErUmcY8z(FKFzGag^}cV8tszDG2A?oW)F*p7f(oV@E_l@Vk5l&K;|ynOzDQ
zljqWnj%)R^I<|XDAm|1d@)HWzJRgH48LYO<1PD$Rja(8jVDpLuu>R^~>XhCrP8Zv-
z9ZvmMoMAC4;YC6!%YeygP`ObUTl_BzOq>^4U~H4Ne=zhtk7umbpna1v!?C>$yViyl
zS|PyuEVW!u&mpOS(~XdI%qJCp3|3x0^@l-z*&J>-MLx&(Hkjy~JXl-!sB75&70}JX
z^&7z3>vuwKgTN;$_WU&px&yf#_zKC=BZb(^Pd>DCD_C9^+Cu`@i)YZz!u2pkyZ!3<#Nm=otC2H?*M=;GQ`U
zw*1?($Y9Z67>BM}13gnx2@ngs#(9~2dacz>sRU498Bn6>hDqE5s}!{u4Uh+95+o|a
zUy68Tt)ra
zFwI44TW#qMG80UMD
z4@o&jCjjFht6jmaxcQ{JZ>s8kqa=TMVJoR~zts_2dd9;@%Fg~l1Q_Q2W9SqUgvf;m-V$b^a_#mK
zj^{413I<`XbEF1hk1hEdH##Yl^p*|U7YKyf%_bx%3rtpnFg}!>2mx4;UMG-J^I5<#
z&-}xlZTtaWLYs@&=Ru=dY{}S7gW$Tj%Q#D65+88tlBMjnY9J@EbS7C9T!DSg_6(l=jjus7@t%y{1&D>b6&Lhcl7x!TLhJtYP
zmh(|v5aI}aMG@rD3HW^Q;)3^*_2%5bTZ~heKr28ASp=-OXXdIYhuO2wUxRmn(j~Xc
z%pBmiO|p+Y#$mjiF1}&y5ba$HL_mpfTX`CVub1>B>}MMcAm4GLcW9rIeV)AwZ`+ME
z@NFSoQc-4vQ*zcw)f2lMY8LCFntx2l7SMzbyUWH
z6PM8?F9~b|e28M&3ul6HdF)fs-2g@4kHG1k6*@x3+2TYaW)#8G=95e{3u@=vL)}KVqtzu7WkLM1O%~z7
zv`=XC`FYKZ`#m
zlItLkR}^yQkM^0d{vJSc)?=Y@iK}~|a-9~hSk9lm~TPkqVC+emXsq1ITVOPal=3$5Wm$76GM7-lg^A>bIc!^A;B
zRy7KttiZO9N3|^Q8aYy|Mb4mAL<}Iim|623oXu;
zl&Pg!KZ3?dY3L+T+H(*H^To+%Z|-LckDMUsB=xqmv)GS9319}y1eaJg*Zk}_KG&j4
z?b863pF>mK7763nOe_NReBxv)DB0#2MnyV<4VE{pTVRf5IgxU?$i3hZ5!EYw);K#r
z`%hO(8z$YuqfvL05Lo%s;{#+#*KBOJBdLXA}A0eRG
z--($t^kxL^VW_Y(A|b?loS_36(XdqALK!N&^=Lb1{bwG)v&j&eOuQz~e(EUi!Nqy5
zlS+b;vd27&AnOx(+S7*$1gbJ|qIPH9$4>neZEAtTKSJQxTW~41ZhCif^QL5US4TsfPr<}Wn9J-
z=zBtp;U{QdxMJFC>ChVfN9p|M{gZrUv$~G}rm2=aRAn-O(fj0R4qv
zzBDcJzNVe
zdBlH5Pn|&&0|J`r5-01;ZV0w{CjtpdeA;`#mkEY;2px3i8(ttj;G
zq8q$YYCLvvl|ZVPNDP|gysEN`K6&GtlB$HReD@q2CS1}`+x0YK#X?l@NM1?fpnQbs
zEIn5#CJvR7BgZ`z5cA-S#Jh!q9C=9kj;7_D*XJXOO{&N~$MP0|qaA#wV>yQ4rqxh;
zBj;&7gXW(U;AnaP$|k}R6cLSA{<4p*=VIaY^&k{Z30c6>n?sg|NZH4|@^{T(BRs+heQ
zS%Tl}D$L>~t~Kbs*ztI*$gDH-=u-kJg^mJ55(8J!2e1X{S^=B6B!NX`1J$i|3UT&=
zC_AQn2QmkwQ42pVXPFv{5amUw>o_F;Mu!w(HgU+XVZ6zLJBbtV65({d
zW%bmO@}g)DHu&(05~_%AUTZ*$s+T1^(&3^ao-#64=g2QyqigFt$8@w1&1e-5d?K0L
z*xs?F)O}fP+m2R~vL!Ae9pu{<$;W}!4Rnkoes3%b^rl1aobB@XZ#{Kgtw8{QO;nmu
zA~nWoCWQiC1@KWJ97;ZMvUUu^o{QQG;iK$VK3AaRV4W9gHy-6_Z@Q+#(a_%C_
zh)Jr|3BOwaE|a=57M65T7$7jPF2UQf`+H^eXk)vV(pl?D-|7)a84EMXRx*#}t2u&R
z#fHc-G~q4?7Da?#j=2JUy`VGd6J_^Iy5bIbse=U?y+JKBfXTo++)6ixs;4J2MP-Ui
zD~Ph@FCxK2I5*)FgBtSnczf*ZQQJPE60$1OGz4avnM`Fd%0}ouPBJcg*vSuSw#X*o
z9tI{mR8R#s3*Has=^no^UlF5faItS?0wbEkK~M)xtt?0*p!QhAA2G+UB%M(69weTh
z<}Ub}Ab$1O+z@HB8PR62htNG+*SN@uy*!VE9(x=);@&=jXs)_#@laRMX&%89VmjT%
zadKjfQ^lqXMpl$lV`ZASmE`Mvx%=snVKuu(!EoB8>6>nvlxj+%Y6}N}YgE*(A
zWVIpP{0zTwN
z#F3hw0b8ABfk9Zfg(~oO0zv585?3At0)0zOSvvdRw&$|M7rrifsc)j30
zkN@f(*y7V5-@1OYrAYF=iC3sF`ekJmwGvDRsvRV`nh&E=B5_Y9F1#rXeifJ+=XgHR
zd>i~G*(tq)Hv7l#xsY3#vyz5m&8mDvxj9BFrJ)bke`o@ShNZIzdm~z}Bd@)SId#Rw
z7sf#msR0;)qea1+45%5J$;u}&EG#1yg}8>$N~RL88jjl@3RH|yE7D$r!;DgI)P+EW
z^BHfUXTNfxCXI#)TFENqXn=|tgtZqNcB3_PL>7-nM8rg`3#&guFxhy{Sh-9MjMi|+
zZ<>rQn4$Zsx*)U=?~(_dv&QkHz`r5hu`mKqle|eJ=$u$ZJ5e>k7FZhQCAD?AjB2!|
z1Zoa2#oQ8i;Wao3^67g%@j?|nA*+F8z3KeIc#ZHZ$k7ATCqQnN1DM%VZp>VS!#=>_
zMT;!UEFM3M6;JohJEA12+#1BM#93!mN9(tkzyNf~g{)>RGqd`Yaar)xI7z4=jFM*>
z`(YNNknbhLS%+o?-ZL4hfN{LwffHk|+cT#X0J8;Npw!JLvR3pnfCiCQ#H++V+&-4MUFcYmNXlWR~YP
zt%6B;D?D*eLIhF`rI(AtbpTo*VWyZb+1q}*ow^$P>!cbc?p4S~deFOqRz
zfpu|B>6Mz?l{`R)M&-odKb`=if52a_e>-|zvo$x9O&|Cxdgn6oIFWG3j!0rTnd>)n
znbfETdW>j1&gWjIifQfB_IJ6;8v!wTu+dBoNt`Px2OlJYg_^olw9rq(WXqVO)<%J?
zA!GR5q;hPRNPiDS4+7InQ3*uJ)+0G
z(Y)olhJiMTS3*8eF@~(%ZU%Dz@w5{O3=2kk6^a61)yPx-nF%Mcy1owF^m8o{GzEeX
zL|M&ANNUY%FwhXmY7fme`K~Z8fD6>4+QRVEJZ_84F@0JX3jTTNy;fd~Oz>cz9Zqgq
z8T%>`^um;j5M106=rIvb0cNZPs}E>eqt~vs*R%Zu8yawEgdPPFMx-zTMotg}DwuHD
zBAAm@hbDo?34ky|4(mXRt5>GkPqDkjB}5rnpPPMvqWgcteFjPw%F1cPqGwxnL+-sy
zlG4TCv1eFaWswV3Z&=My^-SQeIHH-{h5f>7wJk%bfuL3rOw^c7olMB}91Rzha_lEV
zpy0A3b=LP8!YPsa%P78zMXNY>R-rrd#7v~~zxHfk3=zq+Go(ep8HFyYV|(Ld8Ffl4
z0PM#-D#O8zXI6K|&7AURU@as>=2m($9!W0?F~ZF8GD5}U`e%V*bdq2pNlBER9Uelv
zU+cBA!D=N+-ji^W$F2DR<8l7bBrm{q03@#-
z8KKDLAZrvt<0QrjyfA&|r&c*!D0n^x55PA{#pM)YM7ow&z_29bW1br71U_fVA7ROV
z|I60iAa(9eOo3y-SX$LUlD0mI=pibEfLnqZ&@DI_h{4aKmuuyZ0Gm_qEWLSmv_+u=
zRV;7_2Wh6y2&6i)vHFDjDGpj;`B^Aov!DyH!2_e$rCty!l(Ej~kzUY$f0BtSi3Fl?
zvM4#H3eq^d81@m&=Cf%k0IZW0O~?-C6x#OeRpF{y7E}R%ivN0c1z8-tWMLMBB@=eh
zB~Md&QxWDG$UC6ZS&57vDcTh5jt(>bzcG%=)7KMG-AM4Ej&PopFGhx`YETM#eS)3L
z9T4d0&vXBeXW}}BnW_K$k^sYKro1lX<^Dt30ss5eX;A{nLJh|Nkz$kGP;~{$P#XzH
zs+Eud1Xv0*F&GgOS#nB2e5Ll@DLr;6Qn0A*^=L=cTj7Nkn*>4ngg*zDJdKiEO6fg$
z!y1N-*3$OkB*E7=eBM*|mX9a)8PHDtn#vkr8f=v9q)eY~P3tAT$3K%yC{%2rbLoa?
zD5WG7NVjW*nOA}B^Z+4k9}xw{2PB|yPWAk#kWDj;$%wu`HVsI@M^OlLOm3ohX!$2u3{&xy=A
z9W@`I-&6vFs4dvce6|Iph>KlHZ3`u+l|274#s`kA5tK>)=Ojn|trwi;j7H=cFesXl
z#Fr?ZWmc9T0kVO)pS3j#ARR})cToC7gN^MyCF(xXAqL+%c2P+YXvelNzp$DsiYx65
zdfBxlI?JGF-J4*sn0}ptW02|I|9hIl>dC@7i{}{xjiHyqIdcRbw~S4Kk~mngMk|df
z7u-R#SZq~h|3YRtl$OUb=lRUXCM4`?hOmvPJ$36~uEl(MAPzj+355dN{MZ5jVJ6d{
z8S@9A*H%k54~nDJVsqu4Xo9wYUgqWDBMsNW(8WuqQ*%DC-BP~ePf3#xa@2)FB{Auc
z%ktPZK+Fq>(I@u>&DSTB?He#|CCrX_%@ex{?R}8p6-D##
z-}CLyK9Jy+ZVcFT=*=*J^%4s)ztqnohR6`LA_J@qFJgu9mJOq?q|XW-z(9wunNBzN
zX_w@@WQi5y1r8^c)7CmAjwWQws*xKFlv7VLg*f;Vkp@D+mjwT_+Mcg|jr?E>L6s;BO013BQnpV4o(lMljK>>mMA~$2Hy2*?uYHRH|+Dc4)r|>VKFbEXU$g7@d
zG8l3PyoAWhvLAmz(IZa6Qd>~vTnxCtQ7re#+;iGjAbrD2GkE$$H5#Q(sb{R4sab3F
zlLU2mGH|sRrPt`Jo*&p9ZWxp$K4Q)mwy#*9cdl&2#nRp~Zgxy3(A6-_oL{hiNpku!P?+s0|8Izmc7`+)hPc>fwf
z@~~N_8S$mnYJ6t#OldLzxSo{;*i%)UBYX&s`@(?lhi{2K*rZchzl^RD1ZsgTg_}yy
zQigFV3aBDCceWZSG!Q7Mdjps2E_7R6#FcG*A*IKH$y8F^q)IcUhSYurs;#@HvY91p
z_&UK~1o82<^*k5BKX}dOdfv|^VAICjwzAMfnVe#2R_SzD45N|2j9u>F4v
zqIr$Yr9|VreUUY#c+Q;j9;&W@j^iz!6c`4V<>v=S4U2PM=e#6eyGNg!%#jDtazP;C
z8k8qbd$w|FxTX&eCYUvTFb1RRG10BK>gv6Z=2|HEbWD!sPA@QN)TS!gIQkohT)b}&{n5Ip6PEO-i>4EL??oy2hXQ8siS9}~${&48w
zZq`=Mu8`(KXe{CTV5yET9m=d4VzCSrEj0iGk31sD?4?WImZdyedI!^vQ|$3NMGu=xy38=x@`HDFz^&dwd0%SWf%
znaX&J_<5B?Bq*~y2^M84%r_;MbW{PPYQW7Q43a7RZY6zJ9<)crhUR7{=m3|ZG81mmKN>Vjsx
zRW>R0#=hxhuikQPpL|qCr20=T@fjzVX+A>r|InX6nK3jnatOSZI8EhPy13juTqUr<
z@JdN7Qz^O{j%8&_fMMeeDM
z@9kY{>=Lyf7VZlN_ns1Ydb%8wRB}z>c#LioAW=gO;V!HZ;yKLn(W4fu^^z92>ev{$
zHcws9W34f`*?Wm&Ve$uAv#a$689#SN+2dTG^8z=e+#PFCrK}X_$JdqmA;E0
zPq{~9Z;O|)Q;Z>nHC_omUQ`f<;%|%|mDO@AlO3m3;P?sFL+Jf<0??^=KDOy|Y1`=d
z9rlvgoO|FJzz;{v#%OIXjIb-0GeJ_Fs-bfAh#gH!uX=auyGq^W+tohfOxl`wfeV^%
zMInNf+%Oy^QP
zb5+TnM#-sy!i}I78U^L!TkzM_R^bV)@)>elxw_e$rV6`xdUqgKc?MHHXBkO1>@G=;
zCtaqdqKLTBAh3tKT@d#WH@q*g+qY9dV{H$srR?s{)f~~_e)rD(eYdXsA{xV5;ab
za-m-vCY>Hi%I8Qx3>=7us^0x#9N`()o0sC=p~wmy{OK8Q^txYUCs0t){ASN+JynD(
z?0Pq|*bGc1cvy-T!}W<^=h0JN`H|?x(e$Bns=cH_T3z*5+~t9xMqN|1Bkd1*_q{61
z`}U?~2gZqtWZ!;)kxn%26Q5eF)o@wn)I0niq5sd})u87d#hZ}YU>=Iggk6@iOI1E_xV{~c5yK_&u7^Y-0
zM(g6V2|5bk6zP){|Cr(jJEmEl&S;S#`gVy}BU-tfFb)nFM~w{hHcGk-suSkXm*7gl
z!EcK@RsBRzicO!Ke?SOjbxC7YGW_e+HpxXbv25L`EWK_T$Ci%a!iPD91_<+fJk*V(
z+LE#f&KVlwz$}=dZg8+prS4I~5HrCC9>Kc$6V%i7Z&6;Lt9iCWz#8EQDG0c&VgxHQ
zzelR+blZ;t3KPsd!xv)LpzUHSj@;h5vIeG$#EJay(nG>gNOJtJ^f61y
zLh%-vw^hXQ5983X*|1|?7MX!gB$t$|7z#p2eN~kH#c{r{S88i(h(mx2it}j28M_NS
z)2B-Lkg{TY52|
zXt2Ar?jeygr>4AD%SQrHF)_Nni$fwtu4Lq%mJ1QGieF;@a6li09W?6&>erXQbW>w(
zMLCddN52J~0Fo-4DFS{`>7SV4ZNy`=7n}!9g0|O7k=HzRa#0r!Umn0sY|qirrQKm0
zCl)~_%@Q(!sL=2knEdiqa83kZ6z?J+*K3P4#y+0NC+8rNJiTGU2GM8*oj9n5Y#x-C
zMSvojWUF)V|04vgohu-mtgux*fVmZ12RB~AnfUJ$Lyj)IiB63~cJCE4_#IM6`O(u)
zs=x^n)8)1ZWD<^Ax)jCQ*qmL$x7@|RQ25C?=KE+ZF(DfgyMD*z_e0jCJuuLxLr5=(=B=
z>~Ge-m5l7zZpx#2a=H?Uc03&p(&$SFhi3#!U9iv1YQ~D5-|k!2cxghTkZq+zim^1e
zEh3huELc#b$Ty_)xRmCWmav85Odrf%T*qhrU6K|xsZ2s)$nREWcUKdVCeEvd3v=e};oc&46_8z!ZLfIF;K
z=_gVwS*|TFzpGIIv&`5~p5;?+lQz|+9M|vbmr%PgmyH7f=#`
zhvSFv!ocqGtPq9JtX9!a&ivT+fHdzS`a#fuG1wa6%b35Puf%uG#T{}<$@yrh66mM+
za0qg^Y{VTh(!fe+@3$JY#22ZIKU!de`^>-Hv6V2hw_ql1bnDRhY;2<-m>$5B8)FhY
zBO>Yw%kTSBRr%>GF`o#2S6^v4*#$CsRXug_VK$WcUzQ<&^{vY@||WW65N>d
zFRqJ%OfzaAlo@LEqn7z6wBk1*MKnbe$@U7apc1JKn^92f@^_9yGz$BiLx&GIrI39>
zjveFjr96H%5*FL9NH@q$=1JrfYM!zd$Ureh5;#3u_HDS?w$-#~i;z}bBVh6K)B(TQ
zy7}j}X~;CcOE{`}kauP`$S4Fv&TPn#O980aB8#6`!DJd#-$g=3@u&2;to6RDLY3h<
z8fm4mQ?SY^>)6c``RFhks=+J2SyNI-V!`S~2w~cBugcZ#fY_=m!Wo(DBzQL`x}utg
zTeKgu?d%;Du2L9JYk?vnyj#3p`0Q+ynq9JO!QYicmwe*h6gP5hoRmlQ$HDZj~$5^6Q
z==OSc(rc7fj{L_M(JdJ8fGc1O!NzQNZKwC#wlT#{$=7E7GYohI##_}BA9>aEy-Akk
zb39e-$kJ%!
z-gd)dQYa3PsL##Zpk!b=_a?T@Iw;y;0V$#46x5MNWBo~&&2!)tIe{o1t!9`*|^8D~#kEh!H0s9YXqxh001mr54htY1izm7EvcR;`
z)`#*Zt4`xk1`5fRBJoQ&LX;Y}eKnzw_x-||11vvp|5!ko28A;(6C*R}7JA=a(;lTr
zaJGXm!gA@b3X=Up9VK|ojDQH2m5-D3ZJ0u#U{0t02APEO&pJUW`n24jZL=}O^nzY3
z5lopk%Vyc4%MnPnYcG$TTj}*1-oj!+5C&BF)UIr4_N<>My)H1}zmm0z-cra}Ti1lFIx9rn3y_8kI8iaY5_0f;^gQ>JoDr
zPz%bra@K6(AGm!FlXnsjqx8*Js8z;`qY4(#aiyF|YJ9Hgxi}s!nQ54al|b{aY_`XZ
z$_{?HsBB95Pga8oiTsl)D$p+yn%c7K@Dvy=S2l(VWpSmhhfC4_6L>jakeva}{4~Mr
zsLBT#3!L;v#!;6>$|Wvkw+s1wT5m(p>Z#AKpk;4A4)itca{PCgpQA+(q1vYL;0(&t
zuP!jsL>5^Smm&zaS5n}Ztz4U9>neV;bUB@+IGb$~z{8=+K&?VyRv0P~dgD1rW0Jz?
zR$=v2ld!1U!LKz;Mf9&*R9V6Jd>Alg
zWK+{7p-ffv5BYab>7ja6s`LNTn!s8#(0tFl6&psfHcAQD_~T^+(vJW;-a08`
z@+V(kG{dVIzQJW4rs?l}4Wv`=ZDbD0aFf_NKvu2!V>ObpzCu9<)b@CJ#8Q45N|+l;
zaP{+{=E-Ze3zc1?Sy{3&_7ptaQ7{K%GW{#N)q8uno9byKT5K8Gl6Qq76oEnTlT_Ae
zSA4oQPi4Y6aI~9T6Jza_t$k_2PeFQ}q%WovO@W3|BSA|#Tnd}3-Cfj7Xw(b{CvCWJ
zG{o1|W#03(iwSJvk5@?E?|gKM8j8;`(KL2?{M=f?L}EvB4^?i^cEj%3YYQ&H39_r^
za2t3{xULL!9+YgfW01-pY9fVHfj@_q#~+5sNl_1FpWv7l%cLd3+$vST?$@CilXbd#
z_N!llYAS5sxW><02tl(VWOt&8)2!~Y_
zkj8OY5k$vBE8jUMo(@X|gqAYOpf1i8p@Ue~{*XgQR?a1bVB+3vn1_N3g_jnhV|8?!
zt|`@$W+7EaXQuEwjEI4w`nOwIyuQcdENctHS%snbP^6hwEj(2gXpoLu&GR!o4U}#i
zC{ziVr`8mHKSr&ggaP
z5Xz@q>x~xggSIdR#x@dYF;?`j1FlZta*=tMBN;gB5Ds-J;-R{}ZRGXqda?62keZ%Lb^WFSAf9iPKAO7dWcuVDENQ
zB-GMQ*$zxL&mcWn2$l5<6|HSh>+fku;bQFw-QO=xxC_J$h827Ry<`+>TU4Q>OP210
zo(q-=&1gY-Evbt}@PQH}x9Wt+kGI9k@$TH-34y0WAb7&gLKK|?eK$JhmISrt>Lk!>
znxKj>LK_feT9msB9L@@`gguz#>=)qah_{0T(94d06|GQ#ip!~&Q)+*<
zo=>B)qw?wbZ#%}U-VV*{Aw~Z{scKE%mVJB#{)S7+&LH*cLHuw)<*v%--Ad$f-Qkw*O
zu)WVUNucrsVySExW3k|Nv^YAeZCE4iKye;Dy~*hn)wvQ3&<2wP#ODhOD0eKWC@nKg
z$3;c(1+m7(sPJ%YVTZYUQp3vXn9M1=C;}*o+57}KvR-pB9tYqjra*oJEf1reSz_vos1d85K$B%A(PqK!?((hIWkXe^gB9}LuD0s0u%VwR4vx5E+D_pbZBDg
zZ&Yw$VULDbv6xSd3aJ@6+VGVonrAC3l@(+S(~l)&5fLa%oSQ#n8FOhp?DN1Tu&Wfo;h7fA<
zxKn@7Z-|ln&tvu^E!s%NEr4(X!X}*|Q>`ecz`F;qDekQ&WhI6hpBpfOLsZnU_85@2
z@d>{XWI~(2=V`cfCaRRGEQ~r5tF%azMq?IIUFNc)*n;@X-4CxZ+WPk!sMeeW;W=?;
zhG0noU~e{Vgu4cM=~aJl_QhI56Tx+NM&Zw<`!ib-mJz)X?Sc(3qn)g>Hc!1T2o*OI
zhSi!pXk_)w!nN2Jxo@1PD#>Ih>UNq5RzMOa1W&ABQthw6
zZs8E<*@~Z;EHx5$d?VddsA{80OAB0u0IZ(M6oPg>+<^Zx#DF%5@3!!@;3#wdnrdYH
zTiVDo=|8Hw0PGsS#+bV*IVrBL`6L>!wnOD=(3iYSwrWDiaC@N-%QL~Z!FOQb6DCgw
zPU>(pC;u^=Q@xIz_qH>H8bchn6v7X&SfG7tX^x2gn=}WvdHK34g+XsXg@mgF!7l>u{5VJ#UGUEZ_zR6DaS<#ILKTgDRux91vIz2VihB98ga-
z%`!t_ISdqtCX1UJEc7EFQX?s>)zQ!&cK$&}**X_@!dVWvF_VplUAWg`tWChiN;2wN
z$q6jM7_-Zf)@*gIY#|$eQ>umh{N{faiURB}&9m%QV>sdYW_a0M4Ye-X
zkHY$g5G>)vU!(;YpTwA2VwhE-S-Slrp(}5)Xa8z
zpH7jLtV@Gc6iQ7c3rLb5Ao*aOsBAq8skb)^c98i7)X$G{TF+*U(m9>gXD!7p?VqS(g8TR*7H2KTidaV(+Q?aC*R>YYqv
zD4DchAuVIMr^KOYXEL=pv;Nz?QJfz-wmuc9S^kpyBi53pw6({NuQ
zY{xAMIm2m~oF3^^PVY_#^N_^s5C&-AkBRVK04CO`Qt#8X>{$<9)(b4k&FY%mxfHMt
z$bg-+ZF#G)Jh;D)$yIp|Z+3VKB_5QmPg%_BFvf(KFjzF({Qx}AAxae-l}6E#mSYWO
zEQ`_kH^`#=2R_fZv~Vui>RqaDz~+pPiEsXyX}LRtXOVx_qI3i8k>Z;U=(c{gm^~1N
zEhbL6yDq^)S_QgO6v^7g$?4r^2e9qjki#F;)zDe|dzzJ+n~_5#FsQtulP7H(C?->|
zO6k$Su;J$5DOgXJe;YpJq7klUVZ|^7cpggC`r{KBJYHJ{aQx7E;s0tEt6uaI{>J5<
z&QVienUL+k|?twt~041MWPL>mE1ZR0I#4Io#4b({4K$p96M5{XLl!0qXcC7*
zxuM>LS|NjlniGNeOQ7w6#(JxnvMGHh?D>In?+)BCxZpMf1I~
zlGxc@yN4OE4J7|;%DY&Kle%ox`{v8TonRp$JzKV^EkqMraIVVsI#W^0Pl-un9n{4G
z?MW~Xa{tlERt{w|;zYGRd(YG?y)|7z20tBlw-bv21Y-{w?>`r(a_o(eOs2+4lKqa?
z)eT)g5BC@`)7`ZIeq}Fm3=H^ha?cq|a-B^cONB>Es+wMtHkjOCbOCxWMOSCM`Wu9Q
zQPJcYyL?nDEi0<&vXn{;*bH^kRR@@u%#hUXsnuBYdU9{)^!os)Bu?F8^mn1gq4KyB
z93?g`iM9|r4n2#Ii~TAWGzmzj0r#s8EWh-G(`Y@s-cy0rI&hr%*V6f>rLbQ!Jyn0S
z7f_{K93J26Azum_KWM=B<2@ZeK5NDN=9BD=wmMl=c@iIX?V8NF!E-cihzhT4FN*NW
zcVJl&w#!u3d#JB4DiAnjBNhKZ43i>)oTFWp!6`gWZ^=3O2?eKHQKJaNBax{PQBTfg
zC-=BkdCcs38AQMoq|;0ZzY}NoU4so}Rjb@EhtOfwt>?W*$fwi9J3jgGt!r-*?eZ?Ldb#LqvAcrXIFE6e;oi~_j{;pzfPQS_`=Gioeu!L9H8?Utb$Odr
zs@!%zaeVc;FgNWlU`|ey;S0y~c177^eUQ%&R$7-}X6vHYwHKKka%r#j(CfyBG4D?|
zfhd|X&!$xXmcCB-E?|5;#``FMCXNkIN-!f$)!R<=S>6x%T-
zd%#=ig|*RES|HWyWMf(#7x|3|rC#+UB?S2^J%;r94Uo6W6D1)iJr|UIe0KFsB&AJS
zxXfxul*E#){RlLliBnW%U5d@V7YU^Pl+ge!L+~j^q`?6ka<;8}XNpcvNbfYxS$nj5
zrZs@d&6uZ02Fs;vF>T&ZdEHzr8t@hVyxl~%O&%U;kM8;}ytqnWAOkg(|2hd^Q7*Db9}Gag6VRa$38jngH3AtY*aXkGi01rsYgVQ36)
zF&(fzgLW3}Y}+&?k$dV6YIQeXg3wYRj)t`NcSPgd1-x|FzJ#Slz(fv(TqK=W0&Rq6
zMb`KD2Bj)2o%4)j$pj1>!`86(YU}(To}$+q`q`(8K4wcQ;04-QT@@!(my`ub~9o^_j)
z(TUz(Kt>~H9#|*&$(>7wLQ+8Ok0|Bs?uG-fGM}=;9-h#GoUnH}FgbG8uHIB>vjx*zK}cd;VF8}~qX7`D@wBcPXgTBs)Os|urN)o#>KL2`H|kazPpb8P*f#XPz|H#@JdW6z`!9J2F&O8
zkYC(V%?@*U2L1vd<9D~M^fn^q@06Vv8;Zq-ZdWXgmD`oO5?X_9GWss2p{OhI+BiLF
zH5l2fdYWO8lY)^|-f{xW%E?J=5N0N4%L2vd^j%=ja|Va{eV&=rN0qADeqtmu^4{@H
zh5n4MxBi08D-fqHF>NLe4B2fc>DEy=+Q@of>8l-r7_1R~FV($0$TblA*Ke|`jY;08
z(>w1B0gFC?!&X%~-#AD$m8HkOHy9vXtH}d}1~${Nu;X>g2hdC$IcE)#f|Z)Q0&-j7
z?0T?E-l170VBj^%1t|s~sS~t{)QoTb6Up4}2Ybr2FSzXt~n#S%il>nSt
zDXomhVZ!XjzZQtE-FSiSs77rhNei5!rQ!{0*0{6dI@G^O3qOEL)1XE$w#vX*3UEWN)Nxozve{f1ZKNG@vdn
zpABi5xXYgmTj=4e;H43fmk>7@xZI{q*?nIjzgTX>Mo?(~<)L}}XTZ}`r{*I)=#wXQ
zSW3$3k3TT5F-{AK(v7ez@AVjz#
zq-k%6OKp=$BqKAsMN|maYO4=lu!WDl-CaeYSbN@=v%5v=m$p})*%D;sP(=dPJ_0Od
z@~6|X2?=%syiu^WhK`Tv+W&r{%RTr8e9gkL#z)eBVGO!{TPDCjOIL-0e0Z-61{FNH
zdZ-leBBNJXQ^Jo!f()>|G0)iGN9HE^mD-c7zPX5|R^P6%n#Ap?G1bg~
z(8bi@Tfk^k%L8SWf_b#_dPFj5AFPTlcFS8&Ui3QK!IVN|flRV1`U8;B`Q%00GF_IF
z`^$@*1GA|6bl35Xw`tiXwVU{f)HItaxO$L~h65(~MM9x1m6WqRw|Km48E{+t*qLEA
zu$-O83Lg!B$jQJb6zQ`OD(NqblH7*jASRfN8e`Q^soItk8o_~gcenJChhs|PXv7d`
zV)AR+ObXoSh;`-oH7q=eaqPjQhNFNQ5n@r!WVC)%Ys
zYLNYn!wd6ElY*38mbh6rBjdokq4zgCt+M$*pW1sD7t4Oi8)523vRq2Jzx*7X!EzE)
z=40V=0)Q)
zv%7p0W*%(i>e$RmWg>oTuN{~cWK`(OIGO`0j#?ZS5lo*KCU0FGJK71!3$-nkVh`Xm
zVUEUe!V!g@OE}AT&CG7N$b$7l2>)5uF?*1@ydgv2rBEzDEf8#3p0Xx$_e=
zi90g#Vf~6^w^%4=8V64F1k1q27
zpw$EAyB*foNa|X5Mw}lRIJHoPirH6e)#CKYc#sZ&nvM$^+hEs#ufAyI>Bw1~-cYOE
zQb=pHo9NQHZ6bGQBDB2mN*9?T%>}zHWGozOI8vAHE<=^w3q!FxW(`uv*+%UW8a2hzk%CzMy(KLXPJbfZDy8V@69S&ePA0Os(q}}k-!U|f
zrgJSwVuvR;#KqQP!%`e4&p*^T-H=sPq__H9%D@<4v96
zLonm*3|-y5?GG56T;GRc0TvF76G*{?=~MNpd;Mg(t+?WqNv!6Jm{$ccR;C;tIJoN3
zBF@!i1z0LT6d$~-_)FH0#iJDgRIXYRj@y4gM17J{0!{*Na%06bLz}UIa!k)nqtGd3
z=8_yW9RdzHbe7k8lWb0)A*rnqX9Pn@vy7GW2mk2a3+L?!PKB-y@HmbaoZ4EDyE%sy
z(+mgGV69*@8ll1H9d1DRj0hn!QtUl`tCZgW5dj1
zT4S@9d{m;RTC2*Oao5UWX+n5^E&l{v@*f2tUb(248IOhO#fIqKzVNEsk?ec8VSfp1
zB{D@{R@<;2t&g+!3jPVBlhw<1bhP#c*VN@;c&|ldhxA)Ui)s-PHIihIl6K7vo9f9U
znt;YSoazkQf`-x*V*t9_%5yUMK~wH5s?@qLB6Z4~yDaz&uL<$SRGOunFXN2-M}QdZ
zK-4xd;_|$d`WoQOICDhVfdn$6iV*NeUqd-{P`m^4OLbXF@*#o-v#!-R>)wHsh**&g
z{tmpJ%@x1A-o;c@Eo}Dn*rk!}<Vq3*UShmSph$jIdL}SM7#$KKi%v^0jf-iVa~;uz7cT1j_b=-FJ?LORcj!KskJjy~T-qjk$?0^VoW{0w}oh<`*4-Nn8|
zPm}f3G#aUfS((D=BYu$$7JG6&(D-j(TNYgwiubqL%FWI5y|mtix-t04q{JCo#Nq}*
zhz$EQy11r*UIMKXUU^0m*2i+C9nz_o3g4RNWlnN$D>Wx=al3M7`ieGYM
z-j*zOCZBFiyt68V>ik`0Zc0as5}e?nZ<)6kb^N!Hjee%OS~s`3%|yY(PA(
z9dF^rw~4V+;oiHr<_2#0Pn=i-B*WClA^2aj3f!dW3q67;yJPFxBw8Ujgc*ELkAs;4
z%sn3a=Z?FH=EUbh;KV^hoiC>g=!(m$g3?x-o+5lCA64(}!P@26LepKmIQ1KkO4N89
z*a@jafnZ^NGO23`2_jIUOi)?`(*1U1`OS$n_ROuc7KuVoM}khWn3z~~V(yM2@Wm_x
z;h1p7XkczTJcktJd=JF(7?zu`{A+@kh@5WS&YlB8aRC`!#6n72!^;7)<`B}+(bz^`
z*Rpw1n-&J*_Rd4K%wA+&1_rauB?Od3Xh5J
zqhKIu3jYz5FnTFKF)XFGM);f3P>iygwpNpKbR3f@2Wx*meFF4~JiSRzM{xQxJ@D4-LkSiqN3s&=u
z56$1-Xmkh|2jACmZH^m7#5m|QK^M2edn=S~+SSk2GwsDq-sb{9f^hi+%f5!m(#S|H
z@=3|}H>nY;)Ia~RE7mts3
zOAX%jbP3nQX;(NmpfaRmg9{Z(#w5ulg(4(f5WnSv70NX6R$wk)};h32fuwzs@Xk;*Q
z)xo+Y4>6DL>zw;>)|!e;hAJlJ=OWWj-V%ufPVCXl&rco`H?)nQ5RLE|qQ8~G!kp^#a9jWgsqjR>m+wO@CX#0I{Fx!z
zI9l8@cMA^Hdd^KG`N?Q+zrEz%P<{#c%X99c{6)0{XdZDdethDGjVZ=pfgojpC)WOJ
zd2-*tB67Kur)b6Ak9Vd;7zXK&QbPj^JT>O3r!oQAvjacU>I9P{s&QB;TQeADm)o-ePw?AWbl|R
z^to+N5wTRubdZJvH!w)$!MjW&Sb=<*+dosUtp2lCDa@qTjhVC?y{8;C%_WOkNNM7t
zucbo+kfruBSt5cWpp2Uyc09k;+VJy43q#4WZaCuRA%Bvn=!wb(wP6`$Z9<)P|2XWM
zSi)lspJ94Yz!^$45r9zqwK!`|s4>fw*4xmOmZS)UJ0Hc6_89cTY1`~-U7YjnVRfW)6z;4q_YNALI*EI&Jvt6-mxdf#R
z@lJ)bM*4HYb)ezVp8>b3e3Ux#^R@DfOa?$~fFQrIg8CYUn@2NL@gh#_eQoVxqzzh@
z+Ge^Gp1JSnpBN>!n$}a3L!@28cco;l3+%!lO4c&L&OAtkeq`VK)uVbiK6~8Eh+ke5
z<|AvW{~;Lt@Q82`<1nK`qnn58jPwQe4;L@R&5{j+Jo>18F)>BTfG$ctJnG&aZ1eBg
z>CM(sgy?QPr>N(cPi@0S1*r(fH&ul=$DU41x=7<5L7vFA<5-%#T)Jg2#bPOy^SmwN~Zw&
z3E{QwJBsp76$I1F=Ag6ZE|Tnrj)H%Bh;%Tea%v0!SRUh&w2@<7uD
zAVp_z5T6zS4^~@*wkVW@BDXH0H(fX1N*51TNQ@8n#VZDd7MnyGu`WF42uqCMP6N%g
zCj;65i4HEqWbm%c1A?OQxb}AICj7w#6Wna>-?;0ZME!*(QU?_rMzK?4+mA9H*J!<&
zXg6+nV78DkS38PE;OKS#H*vYObL5eCGQ1NY5gBvgfNPLkE>B6Uc(k#_P|;er19vh{
z9hH|jI(yZ{n__PKRCan{Hq|)xT$^)fLBP?vQ&~gcK(GM9CrFf*2
z%W1~Flz^Mscl6Svw@a%opqVyNNl!qZCB~s_I2!NY?Jz8n7;aeiJeo1QD2?3rt0+5aCeSDu{a_DXQJF|EY1O{}P-LT@d`b8ZIB
z-1`+J{$@g3OF{OAEybHW4`q(GYX@1WU%xv&++B0MIHf6lUE=JKmq)GigBFnVgQPC(
zIq_EoYLiER+QG#>DzJF`I7HL`h1E+0;klyiX|$VuO$~cCH{q2vJ|uxZ@Is25JiV~!
z)EH80{&VBMTJV*4a|U-hiN@Rx_I2NI*DFb;3}3_}?c8*MnEfAD2JxiD>nkg3VmO#^$BF)y~tCgX)2Jd;frs
zMRtcYr-5z*l%S6bNayN{xaNKJ=KR0}3$u=Lp`MD#*Yq4*#UW%0?{5>>*cH}V_S8_T
zK^9G@jhOa|WOP_A%uYeGr0Eb_8qEd3kK6dx%bK1?bnFCX&wHIPxHAb^Lf)dF8=`CV1+_Q@O0~8tSjYHMg8HAT
zGk4}pX-n0#)V5@&RPj9(vdI4?pxn3jOS&1nLQh^j72U{IY(2;by5V
z2d6;wIpK=;H+x#&=BnsROte6#(JM_vFR?QsaR8hPfE6=HaD1AVoz3A7;^*KjW`z3#OjRmQ_3I3TrN&1v9)#%*F%um4
zDs*~fV$+KgSBH;W8z8+A)rctJi4!5E^F#-z6yT{gnCTm
zcZ&WzR5XI7<}j)iM&m?+qRvv{;{Czunn3`JuqLlQ7@n=DgvUerp8qwp@k6+4E_4;p
zsLy|*HoWV2)=pj}h3F!rpbRmEztL8N5>SuzK>xH!L_RZP(9#`MI0!R3*J&LcXn%BI
zHzAigLL3M!cE}z&D7>cg$dZN-H?E@SbHAJ@0hKk+yM^SkmfX*rt*zmPuyA
zZ8FP&f?Y~ak-Od6vLE+7_6U`al;DsF?<(NC23L*L6E&9`C#Q6|3QvIuz^}*DCtv7z
zPZ(dyeezN>@a=d42MTTfnxQ^Et>HH@3K;O7b-AV4(P!%1+EiQ?SJc}6f^jZAGq)wN
zIayuR$aIz_%Ya1-I^B<~haUgQQPKO7uwJd_HQBPo3!VNA2v0sN&BEv0@RGMn3}Gl|
za@kzB@S7P}nU?)?_@jr3_H$$^)_w9>M-LucLd@Cl$XjR00Jb1QXX&`w8hG3)
z+ju^9Dsngrc*i#M#v}?K{uo1sd>>20J1rxK>u&YAeh7ZbMiK$P=qQLXdW&@Uj@4xn
zFgmd&gNNRf|w9qlV!w_GsindX8Kez&%iU
zZXSRFNi_H;O|H1#3{}K;tQ6tZItgE4y^*sD?Q5kkE;|e8we_oOy`eAFmd`0dZ1nB@BA=cm8L^>72DW_^tySJ<^<
zQ2^p)*v)OR@Kx||IVt{-E6fj>kWfk4)-Z+|Mvv)dS1lL|ZxxmI1K9ZO27L`+DmMK(
zFouI3?B}_xmW1fbxIXmQMU`9Gcj);dZhL{9G9bf=oK25BxoTqw%1cARS?Lzk`66kS
z&-3`bXL0fj=TsdC5ONe}%G+=jUAe<1lyHaJwj^Ada4kyC!2yP~EMNhhYH_3Qu6liY
zT@d7CM1vgw?|nkS6*(>0$}G8IDJ=`QRnr%7gFXYMH3VlKB`F#ohrf=L%i0yVi}j5w
zo^atQSzNl2cMkuDVh`$Hn&V#{euIST^EVkEHD)DtYbC!>+yMOtV(t|^FigL3CpE=N
zrINBv(c*qL^6>lc!&unUy?F`z3YSo=1wl7qH8OS7#3v|lk!f$9P8Z4@L`C9vSOrk1
zO|IfDIzLnNiatbXJ_lP*NlGB89r*~EHMy}+DXH@O=5A5-J&H6Q!dbYmB~ta_Za3@T;@=?P=XYNs@>jWZwRf(Q0zsF$
zPm%gl#@sTAs{p~305|##ut&f(clQgHY3KT&BX}9xW<3=Q$I&Wz%~OpwHS^;%X$w`z
zA73rV4#aFL1q*&0>21j>TtaWz*+DgX3*KmBu*zW4J6G{$0F<;c>SQ
z;4sVUl4^JnA233D|49A}$j=eZX>DXF*90iugE_;$Pi=T}$2ccGnk7cGVg)H2lK
zKm@$F^=l|+KVJRxQ}G(iKG5E&vCNPwJ>OJ(4ft1S(X1xaej7i@b;wce)wlb_sa$LA=SU(e?HewNA}1y3n&
zYh|&rUCb4y&+Y7;I_&jpxit%tWs&-cj$?GF7x9{ZoARdf=eQ!-a5I4OSx^QS8@D1_
z0a&OFJ8ezudbOJtf48=;x4R|i^dnheIf^c)@{4jRr@t9xsSZtc^Oz21*yZEZ57xLF
zBa@e@SzrCQ4*aN-N)h~SW|FkHI5JLHE(XlP6mPqFqpXq_Zux~{zXj?hG
zbobHTApG!Wu{v8DCZ;l=9{|94{3^I5#*>3TM>_PFYTbLq)*i^=_~mkcp!Gb$i599j
zkAOFeP+>v2Ixv_TvW4mFdaAt1U}la>266IoT*z5oAJlpI{3mPLIJLesI98M5^8Gv)
zgsSbM%OWW8iSzUR5VoOfAVI6bp3q^YPP6g4wjska3WX&IKPTu+|
z{E8DKynmQaiyAMOh(R^bipbUlE|Db{t-KL^gn(P0=5pUxOGP!GX8xE4p#rw38$7Cs
zT$t=1D!Z3*#g5fIixHg+0*M9J;64Q6Zs)VT&SRJ2pROKm@|xA(w7$h9$+oxKn2ioq}=FreD!kSK0#%chK9LQSczmCA^dG=w(i;F3Z=C7vYrjO+bzz1dUiH8Kkf1)=-26865b_
zpkjKz^d0y~I^AJHvs&`8S2vz{mtW_Z5gVBHGaEx5Y0C_byM4Su{(ITX4$f#wWu?Y*
zoT;U#X@>5Jy;t)RRNT8MU!9FHbxs4$q*IN$Hz@(eQ%TywBISYk_HUmz#8MPY#QxPMn)M)2Amel(`kVrMV2j
z48S4vYdLT~kh53QF2di}m6N7%heh?M+flHe>0!kXm}N>!9+6qTvEc!XIIU>iaIa?p
z`Zg!;-_epFz)k!#riQ=uAd;hQMII;=&O;L&^U0G#+A=yLwE6hOmF*d{d5A_
z{)#m|2s5uRc*umaE~om48E~*yuHObfg*DyY{;GB%^rnwb0+sbuG=N=8Pw&GpXg!v7gkFICbXNrOfAo>8$>;B
zI@$=hV0m1~vH_IQn(@)R(Enc=G-NBY5W>Y4>9?4=S_>%LmMkkU<|0RA0!aVSi{h2S
zhg{-E+keq`1hWO?<0(oaWVvvZda%aiw)u*l@`MVML%q0P*nHtV4mTAb&QE@-i$41vdbfV8R9F3ZcW6p+oWcL2!+nd%#(7toA>DFC|
zy2dqjMMa;--V*{ci-X}s!Ghbudt-A2TzRPWrP#(CyAr7!CFZ;IG^wy~Ux6>TVdN#*
zgXzSe^rt~njmTC`zfMkcs4v{Djt6X`gXGB_}6`gGa2xg!6u@TgZYe#`P469Mmi9Bl-B@m}<)fVNeK~}EA
zk0e*0>M~2TWmAerz%N)ezZT;~MhbRYifHA+NVGe-IrR<|&em=}M>|etaumX(ib}k9
zOfAFy`oJC_+EL$6%VT9I4uZ=?YWVkX`{6+q^x&Z!3NVi7uE|nPe7!mg^@GFW@<9Iw
zI$a@U%a|+JhpME*q)N{m5rTY4-(@%!XFEYGnQ$Bskh?Wm)osXHACOv(`e!L+GP75c
zl8Sk;ICra(T4P{i4P16{&EmX)f%T*-oSK|@jOs9-I%gPS3zyVq+Ee@
zVRpM+0AneR#`3!r?v+*5RS`L_}00RbW?i!l{q=7tL}YTbjMi-+O{DhY~KQVR2JYQ8D1~
z@2AteEFk}aYr)h$OqOs-d^aP$~pC&+(teB{EQE4dTqal`Gc
z5B?rKz){^%TMBO3;eCG<0@JK6%(4$5rIGS+^&CGE{fUpl`W!$2wkA43J}!A!fmk>P
zg`2L7)y9^G;a0BZGTo+XfT-KPQR)4tj$GPEt69w~e`h2l2dI=?XBIWOlFwSSETzYJ
zxe4;wqhH@3bA)j6^*XV||^O
zK4G$ZkF8gh%vF+Wc55+5*bCqgovFTC@e;8MV86d&>haNP60ldsC{}Qnsw21um73tT
zD+|h#jr+Tw66-{l`ZqMdt*`u{8=(fS;Et%}W~3DW3i-2N!ET$hMSpzZkKg(0xin=D
zx0+GY&L+v(6`_jjqI~U)9n%
zm`_u+Nu<(g;+uoru$oA!bOcWq%RY8fpyS;5TZ?|_UV{3%3*d{~VNg}ky$O|Wx*|O%
zXZrHXzw;R79r_VZ?;Jh7SA81qO<6%J56NLEh!PT%B~Y?6H>O`^2M@O;n7TC>I}eUz
zTh}u}TPc6^7MsAC&G_zYD1EPe6{mtMw+b9g&N2p*lEP1
zS(|H&M*}0t*`VVn=tC&BcL%Z|fj?3F-PF@&E1?CbX6Fq236a?za{#c9bw&O8p`Zn$
zLP}-mHCGG`Ra!MRS1k8cqrO|W&{YcWlq8>^?C4(f2E~-v7PGOIYb2+YLQB$yAJ=&n
z3epgwzH--^aceWH=-zZi+L&2|g#^2RKC4XWUPdh@_z7lmF&zDAu6TN&q(b>mO=V#8
z-Hk4Afgx};RTfL|Fs+hBmgmU3&ba#Yr$D4j$riMj?$qMm(OW&>8;wsjaed>=li7Fj
zGqgEfHAChla@xAk^U(VEv?5=T7nxWqab!O}JTzX;bCAKeDKu}TYs^I!KO=4}?84Os
zw!}&=YeLFFjB}R&>Dq&0%-Vc&r;k+r{}h6{8b$GQ*N+PjJDyQBuw|epP_Up-1vz}1h-lE*xJp-vkvhWj_3I*YU>gDJYEkM1c4WDm>df`ajT31|FIkh!Gf7AbdP4g
zY+VZ8qL4eI_^3>p2l4@u#1IP29)tKJ3?BtLOh>3#C>vlv1s(xVJ%$&jGT-bB(gss?
z4{C@0AN%@Nn2YFm7XB6Lg78M95~dcxX^WNe1JZEPx}th)r<2~Bfj;SLW_1PSfA)@g
zfs+h@|5}1|w(}x^IN+a%Ve(T>*O6-?T)KBO?SuS)9KL|~4h*Jr;xav*lW(^p;Rvm4
zCvw3*a~YzLGSOw6kBaJamG8LN6yaZ&@}+VdR0@g4Lp%OY=n6A6H5{QX2}`;knI@V9r>2
zrOlOeJaxa{{L|@7`@VuABWiK(s67!cQaTX9_>jM~Bgb4xw%P>VRQ_|c%n;QeiJ=p-
z1royZ8Z_HUE|q+jaGiw**(#
zcY|0H6O2kFwaJbEAc93!As4N&?K+62B@3E4f4bQ5BSOz~zK4I&_ebv*hX>@jMrRou
z8l%1R+=V%awpKNVi;d6U~upMumAa=7I}wNBuhc@)=9I2c;K6
zay<K6Njp7pfL?;2kh(&(+t)d9HKh19uXQCxzsp(3ujf^3QyKDG}-IK03X0JQzL
z|5;(D547(0g#bMM4Ray$OXs(U0%bQ=wV~-}p3IG~QrOx$tYr=fU0d)NesB8iab>05
z)LE3MZ7Ha^+|{Y9qPp60m8pC;F53CJvZq3Nix|ro>ZRp+?~+bQp{+|pu-L2d4WVM*
z4RN`Z^gL`jf65W@kk;RwWI!+~>36ZAh+JCv=dY&)s4fX?wNrks7fU{jHO9A}s7uC^
zP`?PPd|wnFps)Qolmk?{a%`NscBZz3)GeY)#3#Uqg^m}wowtgmg~=UQxggnV63jHN
z;?n-x8kVm8$UHH3Z=){M9QFMZ>;p6Kze&@NvcVN8A(PO0tENV#&brM96C+f!W=3db
z9|a<#sSZA(Mwap)yVf9>UM0QQ?otHV>pzpR#Q>N3YvkJ6_!MezskQtY!-{|Ie^FaM
zwPBJV>O&=2ff5~RV*yq}nYp~xPgwM*-_|Do+ReWQ@10K{l>9Jr2$B9L`k!sKz{KEW
zX=n1dRDcD2w0&MwT_<012Nt+0e`V@kP}775t$WA$ze&Ix5BFoMcN<40vj?vkXkPhLk3DJak0F;3`uA79=1_Q3;5M#oY)-!asn=e|!~nTfITX+}447P5>tmUQg=
zK9n`Q^So$590Do;;3c2NNduz@E*CYK6k?G{9|jtk)yeOI=uqiliMA-#h0BN=e>nf8WiA8P
zv`YB85~$FUEC}xF)m0IPu?V-hgf{Jqdv`uO!0`9StIe~)!n{`ZJXGVesL%r*1?ay=
zVrKXoU+RrGv6~0C$-PO~rQt%a%ydYh*j}z%E^)djc#zF}FMj84^5eR^bMJ@EpXk(v
zOtP!MDyY2c0KB50JKa?d6X!;_Fk4;Q-C#q!xqyk3a`Dku$Wd^f$2i8$=M15EVpE>D
z=bm<{x{b*dd!&zF22t$`4R0aeV)OuKiRCwaoyNkmvRYe3V)8M$xr4q;jc%#0GVCC3
z$p!ww>B4}=(3KGTI$5WshUpdj%d~NHF*vG?qGq+SPi2Q@#!D(${zVT9^7T-=yvUX0
zwaP>95@<&kWIx!NIQ)i9RjrRpHn_8lB;G`L%JWXGH+pX_=%ODWz2BUh~-o%YvH)DPiM_l(!IX2ltUY4Qfwlw(qm&
zie^u^*r}@xmXS4<OB<>l5y|2xES1z>_L>f8?OW*Pp3c|B|F`#H=4JLhY3j_RGwY
zmNIpbfB{2*#C@b6zVTSkA5%cdkAzKEZ?j@?V8Zmv3U>u}I&89stG1l_*9;7g7qsHl
z4a<8380me6;(*uvte;+sp#TB!}@tP2N9NL$hhArI)4@)M3yC{vl%iC4NHbcGwPY*Gk
z7tL(%s8Y|9NhJA;WLhS)2-IZY>6WJPXg@u6@R5rt;l+wa(_|J=vD0964l_X!v1wWv
zRr@xpf=8uB?l;gHd(cnCyD_1f?_(6cc}x`xx~B(MYX_lxfFm0r
z+zeinN5|w%ihZCVnsgh~Spq$&WKdBB3Xnu*s+u`)jSAgzH16th;Z>xCVoK-2{poua
z2~q**(^=;an^P)+yUSL%Q90=oJtP_3SP0+0h6y)h$tPe_Aw)&_W
zymVu*wUr^gNOU_hK66Bx>r?-9j)*^#xrlFq=R>GKuF|e>FfNOGKmk)J8`I(!Kq5g@
zK>p?gM4*rq;dOE;9t0=GuB^tueWECH2j{4w*-j_}g`p3`?I5lhX>Z0-?9u+r=J^QQRm7-FB?_-|)VoKu}`>n$Xy`1jtOv3IQsc*)*fltpclNj6K&wZ6U%)ndu^R
z{!?eU^WQTlDA);Ow?YbuZ0pC>cH;+;lT|PSB4*OD*#nh3Jv|{34)fQ&QemMpB;!5GSh8+w
z2LRtC(LOglFpFzlE47-Z3AcFykl=aRkUe&D_7|6ZL
zve)_kgjez$Cznz@a5P6ueZht{EFaeqNcv9H5#)JTymt86TeFr>;#f_e$pOhMIQwK{
zP|2)AOwC{qB}O%@>uADglPQ+z)ev{!{XO}D#MWy;YzL^OtVOi66}_h;H~rj#5dzN{
zqcOo{!Zc*GHnQ3t6BKRf6cF37PB5?)M(eKE8kFI9Q*~01^4ERRzO>L2@d+xxjdgdWn0sZ
zt@if4-?%Lo_Rx};{(`fUY8g2mAEg!n6?SX@g!AzB46cz(tadsqP1uss^h1S_JsYU}
z`N^J~6+!fS*!q_!%DD`=SKrRY6<%3o!dE%gBE7OLG+JN8m@|m}X*Ei^F0>;&>#cH?
zX2Ldn@eho7F6_RppLT9g9*&@8s~YFVCpNi;n&>ZqZA^6G-UP0K(+zvWB~G}^R~ot1
zz82eORL{}82;t+x*sU|egN~91dPGpCE}}_=!Ed%<{1WQW#H_OLQ)Q)40*CO+f)DEfi$<4(8a6fqz~F9xT}8o5gn<_b
zzhB4}NYR`x-M!H$RD@iyJNF%1n114e&DIM-mcsGHqlj*QB{h!KNdGwCabLU6L+3x-BM9veKK@4Udr3Ha+rV??9iR
zW-X+J*RfbKy(N5D>LlQ4^s98l<1*E)?B0WW_I@8z0!G`FYrVW)^@6OLhvhPLvFT*p
zw=e~1$7zX;>4sc;k`ZxGN623xHYmT3@=`Z9ycMbzcgG=@tya|w4O`VzVzNeO*uKg%
z_^K7#Z-4a5`(LNI6=JU(l)(iw)m0TL_;8>OVii@Y)Fv$(Z-k|q^QS%5mW=hu7&J@SF;VU{q*k*Y;
zFS&OMBF|@pIE;Hn($6pOHndKdRg+<7z^a|v*TXQnL!U^oFL{)@JN0W{
zz+{wfn$SKYy{7>9+kK0Qjh;mTK*Bh>r=mFXM4?&51P__)0HtWv9Q<$3>2ZKW1)3}q+_pN0ZTX|C!HI6wS*6FLT2_E)dLup11RJu^i1Yf@oAL>SBd0ZzAHM
zNV-nE%20+d7nk{3oN2?rVQ=-${Lr6zB0HspH5!vm+CO~!TSd|r`4e`NA^(A8Cl?eY
zn~-s6+i6lzI1xcK>5yv0_Jwu%s03zB(+g7l^3#nj2$s*cVQC}vn>+b}i=h4-bWe;3
zm#Zzoj37v;yGYLcrDT}0Nv-vQ5#d~Po51-pUY`BuE4vz}VJ8WWXvyFxZx&vwiHlA^
zJ)J{w)uI`!m!{j?I9}+1b2-n;qDiOxPSnN%&Q6NE%2O7s-~GC}+n@dbewP7$?`b>y28rD>f_+}&)z>v)R7>jyU}3D
zao8!4viRp-H(O&Wr&FpHf15Gq)Nb=YHxcy5$Xjr3oS(OnJ2gAD5l0cnL6GR+*!gT_
z^@oq_)ic*PZ;#?nkkECz>QV5kjStUzE_U97nYmVd*sWJJ?@}5Pz)0ZUm-z9Cq8LCP
z31^5*?N8W0O3LH~w;xc%cJJiJFM`R`D23pQ`}r8Ot`->ShT$Qo#q=B*Q$E43)$JUH
zSN$FNX&Bay1+z6Mg@BsCqS>F@9aKB4T>ZATRHSEi)vd0M8}gY|;HmzmbYb7N`u00c
z_*w^{lEP(K6tJs%+-5x~nZ+GOHKFs2Hzae@1T6@)Eke|W)U{8-XpU?eZQw_{B1bF&
z0=F$~h=tQ+g}Hj=o?i(br;c%tX9b@=Vnk){+b2eFc?47K8~Jy<^^^>Dh_6nJ^1jeF
zS`Z4frLREc$zsQXJ2>xrg!NCMME^Hx{V9-U4-}5@TK+L8?+%C1GWS+|dGeA>i0{nv
zX_Wll5HHxEDE)`4rzN*IK0opj)?WXiuSq0pM*mwW#G>)}N~r%huAx|lBSa!+=aLQ&
z$`eRWSCgxfEfE+q^<-Xo+M+n|=<4LU|MQ)}yJb)2HzADE09L7Zcf{SLtSjaXOSU9+
zTb2en$1cM-_h;hxFE^`1g(U294uRq=GX-o)hk!OtsZjKsFPj|Wx-iF|&O7HCN2?^L
z+`^gTk};f6Mv~UQwmSr9n)xwkjALM#mSg_Ef0a-GQXZw!;q^n^KFhTN$&mr}AGOQ<
zh*z)ZOhU(-BM?7v2&vKf*&PZz>ZJ`6-`)_{kyRLzP}^#9e2=Au)6XkNI(B9ab0ccx
za9;F+4y2b^{m=e;+NJzx)CDv8Ih4>eY6HUfy&e-JkFlz7T|-EG>Rx;rX{ij7U$fB<
zk%5TulI`;|AF7@(iS72xf6#_RAfQroorUO!^LPbLvL+c^tfq-D7Hv$ca
z*Kec@XkmY6efez!O0
zJJamA19yYGE|TeTKEX?xD{io1C#0S0L~id&(2LtSZ7cb>9YQzL!kWW4T`_8
zB6luYwkIA>APC|p3qK+@r;(1xTk$7c564l34}V6M;O+%L-ze>S`}?kCtBZU+Eq$%v
zLCuV8VGsxMW4z%^>>g4*Dm4U85hvs?C6eR&I8r%l_alv`(9>j6yMz^5Dj~S5A4SAx
zY7xrd74=$44amkr_ZNoG#__kmD)|z@`(o>v#gAjS(9h^0mkE9V~1hs}ciMPI&U{FtSes=>1eq%aW4u%TqSRie#>3VN$4Z2MI;SHZAK
zo3$+GoIeGDcu_Hq=0>uUkqRvY9BAJk+>R|^EmKn-rYrJ$|wnZ9f`?jgPJ);b>BLyKvQS-Oh`)CsAsIj9CNnCL6;
z?R=t`aC5h`lZ7Xb)LpnRF4qWTFKj
zppGfEz&>k9ZD9w%oDna0o0_Z-ZjAadx&ccZ;)lxww+-nc&LaK>?_
zXj{`BE=O{dd||DjG*1eP}XJ_rDK<_MiX1
zY6+VbWSkC+KByRrq_kgcnM&fqO7wCY%iwl1I6mKF!+U{!!(2Dncwz;+Muy6nLl{3m
zPBSI~-y-@{Js_iQ^e2qSd7ibx-@222Ujr)htU|3x2|M9~QRiF%sOz+Z;QhCt0^=
zD}MD$veG2~uogmfWXIsa3TKkN?p<(J69D(pgEj#1
ziD-rs>@rg)X0kQ&g4CT4ZF-mzAdH$3Ye^rgWpOj$$vMK_
zzx}`OmTi)GrGxEJ%EF`3{*2Tf8wZH_dBw?G;z(r66}yX$`c%)CK-20?!PkA^FU;56
zyaMw@{4r>A*d0z~_n?>aRQ1cyaed}0%$%8*6PIKLJ0>+hZo};6Z;q^-DWMMrA#efM
zE*Gle<}`Xv2*8%z-ZHhuth+_)J
zCa*|)(&*;-IRmgT*YsT`p;9c8PDa%ppN>bPFUv7|e(E!S696yj6hk`^Ie@uc^~Iw&
zTX=)qC&>p2BmKCw@L(T9l>{-}X%YjfQMBca+;-|{OZ@ZVkc6IJG5){Yfd%k{(WiCoj+rG5Hvuf2V9jG9W!(DQ8?rvI`$nKev_7l$1_(oCDcFB%4c2<4EhLKqIKvI?QmndMV@lr
zqtsaYMEcBo+19hANN+Ihk+`i0@KsEKtT@*zBhu3mN0);l=C*myh9!{@_@wA%iK&}`
zHOgIuzO>Y7T7_(hWedVcI*!UeG4x~;BfRe)?6k4Lu|mcQbP?5Ukm|lU>
zy?an5Z;1Kgj0Y$)*qHHha_q}G-6PN9P(qEenXR1S++&?8-5y&ive@330W_SUdU4n_
z{>w`X^oQpcdcBopjE1@(#tl%QWk`!RwgNz*
z7uY;|i+AXS)49BS88$hJ@$H%z8bgbDzLFbDwxeVGBn*~ySwFLiOS#2UmX#5FyPo2D
zMPc6g+M36S2fP32CAjLcaM#-;Mdj3zu>&)x%pXkwMc(7XAn?P2ys>Gdyi@pl*1!%N139Bf`t#Pr<|z36u(d95s!{eJ};9av3w2`+i|
ztk1kOvFOcnZOlFTQmhR@(aUl6M5f2<~^k*MI-`@3ymA-TT2tUC8kGv$EXZG0RDwpB?
zjdAQ&$ehYXV)3q4N2MnQhJu}jswA>lAGfCT;R*N4EJM9Fx`)%V+V^VNhEYz3xU@-B
zNyLW6u=T3i+6Kn0#|Ttf(XG=C$@h);%HW598F0GgF60}2Wd}{DRpq>g1RKUxiKXe>gQMuhNf*(8h{~*LogvskFAr@r
zo_|4ZCp+)9z2u6Yz%#K%U5Y(oJF3fQ&}x#RB*`eyl$k0^!C2}T9BD{v!Tdhob$QT(
z?ppp7mLzeQswfc0Egz~#=zIkgoTsdC9tW4Gc!sD
zsVDn4EY=3F+aETY*H7QddkW6-idy@_aX||+1tdW|T|ZMgXK7ViannRAJ{!V}O(Eey
zoW(eF?`G;-&eo18R+ueLk)tu8{5GACha7;#eObNfku5pu`ubuIEe
zkrpTc@g(;|RN8E$AVZcMxsYPaJMrHH$R1Qtxt-Z&PQHkZ#cf$&4p5^fT5gb4LC&gv
z+$?;_;HVB-MqU+XRG`;fgg*ifI9XR(C7KNmM$*y^h-U>G$y8?4O#$h0a5Yj~Em~g5
zOdIc?Y`ZfWw0ET8v>M4Ng2oQaR9=6B$*15)q;RsRqkq;JflPgb;8VIu$AMZ@5Ou)adTU|{;LG+y!lX)Ie
zK#L2WPx?pIF-Vj5o#{ujvyiMnq(1+4$zBhQMs#oqs;LFJ^t7k>_BoX2VLE%i&ENRi
zD({WZ<7-e?ft2D=6cWNTjS-bqQ(MvOs`!$H<+jD?;-k(%%MkzF|29;9mf1;I{Gpa_
zNiF_K-QZV-yxC`)nJWh#bg*SImt_mL{Jhs?JS-_5*3keH1_A*^MuXt8aSnP2r7Kp2
zn*GT??Q`Y*AUxsFu&w3Y$#0qp3yQ21zJH{V`%Kkc@Al^;u-V!=!$tIrtV^~`<~xR=
zxr{L6v*jbMvlbjzI!k#P8b4}?BD@0dAN<|(TMhDLjd;x@H8Uw`1=FYx>9y;pFC+?L
z7830P`P~P66KOLd
za<0}7r1HW%Fc-y^hfB~S>({`MQE9+T^}&Jok_#Q)Lmi}`|C1_X!)W%SU3SQNNc;(=
z+G-al^JkQf);G?3gK|s0#Yi?VdBvMp%qr^@%aTf)M4hHQ(|$uINtKjeFBqWi%m?8H
zq@1nZjq35t5~^z-9A7L}
zevU+kgE#PFjlw+snN?&QOz)SlJS^kOCx{jJ_1IB9^}D%4FVNf
zy*CByAoI#wB5h_`*+k}B%B>!grwV{M@}^_H+T{hq7#Ij`9lV1;AeXIH^ITo$5w9$O
zZtSg0_r@uI#CTddpuNKZ5%b4YKmVBTz{wbzp0H|mmZ@>~$;?dh>B3n$D!dj0Tm6XL
zA%GmBIc$RYduiuNOb+e1#mRJAfjbS0!wjn7vwlM8<~b2xcivgx!>6AQCUtfbBK+m@
zZP*LC5^Jo)iN~bwh6+&O5IrrQW*--r!Q{5QI!5F);P*yf8cQ1+w_If^1Elu?TkNu9
z3a4^s>aMbzqpZ_r1EnzaK(hJq#Zcdrxekf-C21E@QhIA7b0OU^h1E{P+9J$w(>TMI
z1Zy?BH#j@wUk`M&5x+js;X+Hz)s1d5wVN5ri>hY~=Gi%~7
zr}u6*_I~lql-s{s+e*Lex2KneEC5lPh>tHj$tvKhgAQmfD|IjXijg3#Hhwx&&~Z0;
zPhaWK4Jy$XSEavFGef?XPE8{L-bbXbS`v&iVIANoFW0YD;KhQlgw?xBHvxdp{GraG
z{MOV_M$cld=A|{1La6Iw(yd5}IU6M(=}zzT+)6RQqsMHE5SNr=vuW~^c%KUEuCZzZ
z4lcioKKy~ILD-#KgoGFUkJ=~aXTfMKllLuQ*kK0gg7l;WJ+d|4QpTuKK*gr1C!L_9
z(#fE&AaO-nOnhlgJ9l3nWjOtcC4+86D#U4pi$23B%Lj6s8rbxM+iuDEM_$(5^T!WJ
z$Lo*a@H&O`r-9*UHa(0FyXyuh&lDVS*YQ4G6n?k7neXY0I5YgLQU6{7{Ja6SINWv{
z_9RfkxT?4WBbR=JF6c~qJS|G&!I8iX%Y5?_1$D%5QowfmDCG=`^QqY59XeD>NRAT%
zTL|}G8e=XMl;`iCwl+oASR+S3*MHfmDzQ`|D5sLC=vdrE928A4?0~xcT?F)2ajMA;
zarS*pR0ESzw(OoDHJ`K~OF~FAap!y{cVOlA7m8n)d20atcn*0ZhacnURo$T!tR@NdnsDCePkePz^Y<8tc3-f-RGCy^NS4W=}r$H&|RF&DC(UeZ$jAC7q|S
z7pdb;;_RYLKFRWrv$j>OlY`blpL7`vKnVn)ijR0vNz6A!*^Y0G-z$);X?+<
ziaI&aMCw-O+Fu{#AiiF5B{cyk)HVc&cTP;5igVZ`ctc2lkq<bh~|<9qY}gw8+5y=3JT*!s#2~
zk;P$c%UqfInYK{!$Ga_x+8TB;`q8l>r@c^`wh_YK1u|oOIaz40lr}opuyAWUfEK#Y
zQOnptr90qTRl9Pi0r#|;$fL&bi&l$jFgT*9aW9?rn|5n#o}_(}8h21*CW
z;7B4sGr#M!f|Wo#y%D>NawNY>uu}dXpx+zWkec@UiTvrr%ssd25p8$+PiV2WD>c|d
z3IqX>0dv`~fvKt~ho{cWaDc|i*-Un!(NCYg)a=<6nhs52g>SgFz36Yu{8j_3>LN&k
zdYQQ!3sE;2o{=4eOl$U;_DO)uw;f6=XevCUOJJ&OQrLb
zv(JN$37+|1BL9u43aGFK_l>S5OAc?Tkh6S2?AMvj!^XURuCY?(x|Wy>7A0*^dj`Er
zZ|Zd-huzBf%Ws6lh*X+70tQspWUEK2mc7}f(sI%gM+~WL=KD-Md?Nw)O^E!|o4-1f
zG-ut!*}kL7Ts_xoAL~cFrt1j5XL?x<2%nSMR*$QW*CcF#j*0S0-p*B_)bp$nKMW4C
z_31wQVIM+2GPdb;QOz=5ORW==@53L|Vd#mnwySBA*`BwY8C=Z%%9?)h3X?seb~
z3oRkH`agt)XN%^%00F5`H8A)hu!ugRES`@iTA^q(-0cR_N%3j}LJ^_=RuaQDFv?Aj
zn-q^gm35fb)lgWEc3zrp0IdBv8A@XIHNRI)oFfF6=yfS;B5UmmmuJm#Gux(q8|_R~
zg$mP7!d;HPd0Qp;(ZI6Jp_fAyXQ(xaAwQD!|i?2?hfY8dH!Nh`>+=Ok3--_EoRUclA40m
z$LO8EP;XF8;tGl0mG6?9KVN*&E_QOo?vRIN#iOWZU21H&2gAtMymh!4`WtO397xHV
zBNd(6pAsBi_zOQ9YPxDm6(HwJBB{?Ll8Vf=^DSjVM=yKvJtI}c!oKttGopaAAt_uRyx@Hbdq
z=|akP#!KRwrQx9`E}f9P)FVj6z(eUdS4GHEl!4b7MnD1~_N4f^v3&RrIBlla-~DZE
zgr1=;C~7QG7eCsRT0m+vtmH{^uywVuzOPZMQnL3ggGTqPn7MP5tnC2D
zG*Fh(n%wc|Ku6V$d}B@}#KG7<`zve>PgAxmD~g_e!=SR+bCr!P%&g>SCs439!@^cU
zE5W6+@y>}Swxzbqn~dsL%;BY-9C+wJ!|I4vX8C|NHK&C1y(8WD_Wmh%55K;Yb?R%j
zwVDFkjqw#x-`T@}%^u(Xs2?4Ku|3zQEE{_60&)^Sar9g*e&hy0iqVo->0h
z8s?4MlMU+j`L!dvODrJIpK{{l-Kg~817d~j^s;9n2}G8Yx|&xe=(mGZ#0YInq&q3;
zUYuv>&AWmm1io%-pn+KN{%s1Un4;r&;RhvCn9*b3ru}7B-Bvg5{+tHecI{VRv|<*(Pl=~a?!fG|T@N@>Em
zY!*f(yr(=vC7a2OEiHfeEKn?Rh|;CHMW;F?LF42$RaT}r@h2TWG~(%=SB6XGqF5qZ
zpfAJFie#n)?YPQruJ&&-eRVyl`)ogx)E=mc699p+Ep~hTG(487tK*G~9RrB2I}WFU
zHo^18^xL#{iG9ZY4{X*pV=<-vaWNHz;h~>FBoc!fs>~~4_E!ObtW2+7oLdj$<14$q
zbhU|;;xP5oLeeXl7Zi8Ks+$ov22?krOECPh?%Ij=I*Lu1OLKp3#_!;M@1_2ad+ffU
zo>U2BGE&LjXt3>?Us|cz4-qqLflo@JI;CYEkG8mJQb8SnGcpwlKpI(S_Dk>zl3mp_
zF8n)4`3t$ZL6n+2Drnko1MD~R%$YA5S&^ACbp7UgZAZ5yacF=zQ%HoIMUClh|4|5h
zD0LrKGidRB;o5or3ujV;;~YK=ieH+qP?9Un249(5cg%WOWc>`%%wT2=sn63fdGFdT
z!MY`yD+ZGkRo>BMEmoT)YnqzfP7O&GA7N9+Ntd%>Qr|dn_A|SzxeWMI{sIOf=Kp&5Q%b*5LHfI}FqMg_WQeQNE8Y5Y%bGBEhN9NYlvo05Q
zJ@F;g#)8gl25XJwq%`K%?`Cc#Rbfuf2u{@p^qH6Zh*{oehQtC~_GdNDm`x*G?Wy@w
z0@$tAl*{vFoZ5P+_(A)6FnX+!5;1{zWKuqZBlF=8zRjVe>a?N12G@#TFvGQt+$N0_
zeFl69&hAD1ErKL>-S!uTbvNlnv&L?DyMD)VE+|UCF=3Mrnqt>V-6fNQ=F8Mh8T@@K
zJprCmX_JY!#6bb%L&cR(J$)
zB!-skStUoX>1wkm6;XOgF2Z#SfidT);VFl_pU`p3=<4&iQ>CSuu(p*i;J&(uaCYH-ZZbXTUn2p~D5|85qU~5LiMABEKns~zL
z%)a66GM96+FuriH2QFx8_(A72q_NpKpqQ{6eaK_Hp5SkhjtBj~rGIwFy8m~Rxs6Rj
zQz}8yN?B7{n7hq#WrJA5hbrg1hKl?6Q8<>`va%=$P9Q8Q4_@wvem=)C*GAtnGq|u@!2Y`X#1cC;`%LY=d6IP9?u>
z-HVh!^)frF-{@