diff --git a/lectures/_static/lecture_specific/match_transport/acs_data_summary.csv b/lectures/_static/lecture_specific/match_transport/acs_data_summary.csv new file mode 100644 index 00000000..29a0e219 --- /dev/null +++ b/lectures/_static/lecture_specific/match_transport/acs_data_summary.csv @@ -0,0 +1,352 @@ +count,mean_Earnings,std_Earnings +1331,14900.405709992487,16432.77000114295 +113,14945.044247787611,16555.977239779477 +536,16235.858208955224,14390.705658707586 +3788,16620.493928194297,15627.600938733565 +1337,16868.38444278235,20674.271930182003 +1455,17200.240549828177,15666.75943016007 +9900,17485.504646464648,16898.448536979464 +413,17855.375302663437,24934.411743555444 +317,18443.72239747634,18910.04500054648 +12370,18945.816572352465,21375.376002331996 +12031,19723.61815310448,19450.33475965967 +8501,19992.385601693917,26313.652148468223 +8800,20444.996704545454,16164.839628000927 +773,20644.954721862872,13843.979122054649 +7403,20739.541267053897,21147.46175638877 +583,21070.686106346482,13184.831707584104 +275,21266.98181818182,14862.656038103265 +1359,21793.024282560706,18438.68669968874 +2191,21903.929712460063,19776.452810936185 +58,22020.344827586207,11427.21619881441 +2745,22091.814207650274,18025.068526790623 +905,22172.475138121546,26617.647130774334 +654,23896.785932721712,26438.239579467132 +1373,24324.493809176984,21485.325471629778 +4016,24728.254482071712,19186.253404900126 +59,24757.627118644068,13522.576312760239 +1808,24881.766592920354,21619.594437915246 +5611,25004.48957405097,23614.704606262276 +968,25148.421487603307,38377.07442008185 +844,25229.348341232228,29279.511776925534 +276,25384.565217391304,16203.599964522593 +78,25448.71794871795,11606.690229141901 +445,25581.460674157304,27127.679937797322 +7586,25584.14909042974,23230.713830277662 +6467,26015.113653935365,20486.78966825072 +362,26178.701657458565,36192.85405241237 +455,26305.69230769231,36790.92885699715 +2155,26422.19489559165,22525.939195468625 +261,26515.708812260535,18824.594468394233 +193,26571.709844559584,33818.247365493444 +15991,26618.94715777625,25397.867018966135 +8857,26899.661736479622,25346.770850944646 +86,26936.04651162791,18352.143088771747 +189,27107.989417989418,21099.854849251024 +249,27184.176706827307,39977.39939420132 +295,27234.20338983051,21839.87282143491 +235,27331.063829787236,16073.20635525605 +611,27346.85761047463,16419.357945885145 +327,27385.443425076453,17875.40932739134 +16,27528.125,22813.8113778328 +2640,27693.99659090909,26681.361802938936 +166,27747.349397590362,22331.91891953525 +515,27771.844660194176,25404.165049131756 +2467,27878.57965139846,26307.11527752712 +481,28258.89812889813,16958.543502956967 +3803,28274.057323165922,21546.587241382982 +267,28470.674157303372,23340.82883106868 +67,28489.10447761194,46698.42203327656 +364,28600.274725274725,44244.289584147 +266,28997.48120300752,15988.964261640675 +90,29713.333333333332,20165.735763328958 +51,29776.470588235294,11574.29199969128 +81,29792.716049382718,22010.41747743246 +801,29881.373283395755,20583.989667013906 +1021,30278.530852105778,23218.795813219058 +177,30332.20338983051,17935.78926601116 +486,30465.637860082305,28259.00371625996 +3405,30653.36563876652,23401.127379700003 +422,30680.52132701422,27465.065363194262 +227,30769.471365638765,42474.80173020462 +246,30885.731707317074,55546.60613517166 +13320,31045.933183183184,26946.80530452291 +1627,31137.301782421633,24389.434720529644 +2115,31139.739952718675,24737.420133310752 +690,31313.75507246377,39546.37376933845 +234,31333.076923076922,19440.086409031836 +3528,31353.257369614512,30201.88617852633 +55,31432.727272727272,17733.19814636707 +2556,31740.395148669795,23858.236519580147 +226,31883.893805309734,20194.11051365129 +4184,31913.109464627152,20005.96503471116 +3216,32061.293843283584,37659.195701316865 +115,32449.565217391304,33948.000416305025 +124,32482.016129032258,30264.572488880414 +1571,32499.86632718014,22220.855809623023 +459,32765.054466230937,29315.317406751692 +1229,32881.47111472742,30461.849977536294 +214,32882.056074766355,29665.01036379667 +70,32884.28571428572,16767.59846185654 +216,33215.416666666664,40694.92326274207 +2473,33393.8819247877,23015.060530423583 +8822,33526.67671729766,27492.126718365995 +4217,33726.49276737017,26718.033992494773 +307,33792.44299674267,26712.79555993049 +3990,34137.160401002504,22439.7319837709 +229,34149.34497816594,40238.782484030686 +7184,34210.84632516704,26237.520916352805 +80,34211.25,18678.499070633203 +257,34308.21011673152,23132.80010145342 +371,34429.757412398925,37069.96194480972 +941,34452.96493092455,25627.40342289543 +5733,34578.5853828711,32184.155611732942 +127,34662.04724409449,17078.78696012971 +444,34668.80630630631,37250.29975389088 +927,34846.03020496224,26688.921318718658 +898,34897.29510022272,36904.565568355814 +912,35151.304824561405,37031.80265454963 +11425,35212.49426695843,32120.41867256671 +3685,35402.0461329715,23899.45809249939 +935,35720.95935828877,31137.61957437525 +25472,35847.70846419598,27026.4837865098 +125,35863.36,32142.15057301027 +1092,36288.919413919415,19371.86049014382 +69,36692.76811594203,29093.918151669208 +74,36800.0,26658.59752345963 +6188,36829.22915319974,27642.010924837155 +16323,36919.59504992955,36092.85778402987 +814,37040.78746928747,20638.92809201809 +260,37185.846153846156,30210.24643442601 +5042,37199.77072590242,37321.19404953267 +517,37514.85493230174,26726.8595597498 +17530,37572.28807758129,46212.2496496395 +9662,37583.05837300766,29650.724538505343 +375,37587.706666666665,27981.512490403795 +698,37674.140401146135,24760.513994609373 +9364,38058.768154634774,29711.909070934867 +605,38081.90082644628,23280.336767112032 +3214,38167.74113254512,23812.013413820972 +97,38174.742268041235,17783.972951601645 +94,38236.17021276596,42287.02450030028 +194,38319.58762886598,19153.0575714908 +8294,38398.73306004341,30491.30807084761 +558,38404.74910394265,26292.91925062153 +1159,38474.03019844694,42600.93738514701 +1208,38751.82698675497,54174.68548176281 +746,38856.635388739945,40640.23796181628 +1084,39038.4778597786,28601.32669679299 +933,39267.71918542337,32088.15983517525 +643,39439.28460342146,20534.148204346457 +529,39501.32325141777,36024.72238957143 +1391,39717.54852624011,31001.739732344027 +363,39813.691460055095,23568.937152323517 +381,39837.40157480315,29296.054715859427 +348,40133.96551724138,21622.060389069033 +229,40141.18340611354,34957.815699927174 +1374,40187.91848617176,25394.113583574104 +265,40230.83018867925,49287.03812706216 +423,40320.3073286052,35871.12588392239 +843,40352.9418742586,34984.772515512734 +207,40529.371980676326,18488.14469501662 +668,40725.37425149701,28373.754221909596 +431,40830.533642691415,31645.303379706 +280,40862.0,41453.58181687112 +1334,41027.22488755622,21706.293340332068 +111,41052.252252252256,21810.659807646378 +4703,41081.73931533064,30155.6280550901 +655,41335.87786259542,27797.56459945549 +2204,41376.19328493648,27692.15659180858 +175,41430.857142857145,26229.435663736567 +6045,41482.893300248135,36341.09627465994 +600,41527.473333333335,46211.735299325366 +2916,41577.743484224964,31769.736154906233 +691,41609.75397973951,29195.504519399656 +221,41659.72850678733,23336.66755672774 +964,41809.242738589215,25307.404897071097 +257,42811.284046692606,35015.27999474527 +175,42911.08571428571,23269.90880323775 +192,43063.541666666664,21051.575259319146 +117,43194.8717948718,20689.856110872162 +636,43281.588050314465,20160.23022366003 +1489,43330.597716588316,40353.07331609059 +277,43351.73285198556,24633.325751226825 +64,43659.375,18341.753207212383 +197,43745.93908629441,27462.099741286213 +888,43828.04054054054,28074.22863018439 +1069,44094.79981290926,29729.09800287853 +242,44177.396694214876,26173.4245868871 +91,44213.18681318681,26434.641450571416 +272,44218.82352941176,29516.530737921956 +167,44351.796407185626,58675.72744099032 +4611,44567.13294296248,30221.6451884297 +2132,44736.44512195122,31898.371997271137 +198,44871.919191919194,46820.006985967295 +1324,44981.56344410876,26774.104176795416 +5914,45026.84984781873,32824.83888195073 +66,45305.15151515151,28342.115725944735 +97,45441.237113402065,21805.325957670862 +745,45480.040268456374,24964.540446454812 +24474,45618.37149628177,37652.8508249016 +4008,45730.57634730539,25712.302187776186 +184,45741.30434782609,31541.84367018702 +1483,45752.27242076871,33880.20921041752 +292,45850.68493150685,35447.424630448375 +205,46430.73170731707,32978.16392752343 +1987,46552.1791645697,24796.0699878023 +480,46663.333333333336,19953.078511888845 +790,46709.91139240506,26102.47071751405 +724,47023.4544198895,33101.32035161209 +2333,47139.80325760823,27632.913326289738 +2760,47171.37463768116,29721.461675688308 +132,47271.969696969696,18757.46493391808 +420,47279.333333333336,28742.175401487362 +1344,47448.56398809524,34781.02022125067 +2409,47598.87920298879,33112.226785536164 +1182,47735.897631133674,53417.473248027534 +2753,48275.37595350527,25392.883035987077 +355,48282.81690140845,48664.66383493207 +920,48287.891304347824,24054.77273442469 +30711,48485.85624043502,25974.994170520782 +1510,48623.9821192053,58496.75850772877 +912,48924.59429824561,45467.12428353692 +1431,49157.00908455625,67020.1922750646 +182,49536.26373626374,26353.35749305159 +2785,49738.92315978456,22858.619677648265 +3355,49961.701937406855,27507.04969097614 +2910,50379.9587628866,32544.59237225434 +85,50483.529411764706,48670.047616722186 +352,50664.28977272727,39921.32811931963 +431,50763.96751740139,36132.32513053202 +727,50929.40852819807,26898.772377839156 +2581,50996.691204959316,34830.513679188836 +1558,51065.64184852375,46185.647507106645 +3991,51556.95088950138,44608.90529421183 +6397,52829.89057370643,26272.447766654015 +1298,52961.702619414486,30427.788117067503 +3310,53273.045317220545,27325.337718782423 +255,53495.686274509804,40788.49735834336 +1766,54103.9637599094,36140.820368584136 +1161,54156.770025839796,35941.12992327487 +876,55171.57534246575,48064.29058816983 +487,55279.05544147844,39941.18068275288 +126,55473.01587301587,74930.57737687431 +1692,55520.135933806145,37275.07901366944 +6575,55656.67634980989,52923.80018420368 +505,55812.158415841586,30225.258521198244 +87,55926.4367816092,31894.668431653958 +1195,55943.054393305436,26573.791302972855 +60,56070.0,27083.169456262134 +2927,56210.256235052955,37252.18885203767 +104,56230.730769230766,22789.881803682973 +452,56331.50442477876,54095.804251743946 +176,57216.47727272727,36990.113135879896 +445,57314.831460674155,45440.10219663298 +2363,57923.77486246297,39485.808205901696 +131,58089.31297709924,25045.909390890094 +2924,58194.476744186046,30824.85153938926 +318,58424.55974842767,89388.3005190242 +271,58573.87453874539,44868.13525051241 +162,58727.16049382716,30604.836675139595 +1418,58852.99788434415,53490.17855659326 +71,58945.77464788732,48006.99260560076 +569,59005.92267135325,32993.73118382685 +756,59329.40476190476,48712.277259477036 +322,59479.19254658385,34228.098283699226 +199,59486.38190954774,32873.24013763075 +2285,59627.90371991247,44712.67296198911 +1005,59827.164179104475,45586.46427960656 +62,60098.3870967742,53146.91821158291 +185,60263.24324324324,55709.45347798263 +1346,60270.31203566122,47423.885049741664 +254,60493.700787401576,37174.341416732925 +596,61021.54362416107,43967.92213390216 +166,61025.301204819276,54169.58744954755 +367,61102.12534059946,25381.578543313 +217,61188.01843317972,57184.118199863464 +268,61353.35820895522,34154.96997069272 +10816,61562.85965236687,55996.039197531405 +259,61680.50193050193,67521.4391458056 +155,61690.32258064516,29792.071145558497 +1712,62005.94976635514,82004.00011630662 +651,62063.133640552995,57119.25601766312 +704,62695.923295454544,51568.97804067012 +449,62741.98218262806,68033.4473468201 +1025,62830.55609756098,45748.607652384984 +587,63250.68143100511,62428.68316557238 +735,63420.91156462585,32404.27322144226 +162,63453.7037037037,31927.773962062136 +302,63934.17218543046,48465.51401749722 +663,64399.034690799395,95690.3002845804 +388,64507.47422680412,32780.25924528305 +671,64980.76005961252,39938.30308490889 +477,65159.95807127882,40091.931783288856 +212,65817.45283018867,41448.393026466496 +561,65985.20499108735,56380.00701872179 +1071,67327.55368814192,42492.081956091955 +1954,67511.62538382804,71960.90116183538 +5979,67784.04749958187,33363.825168154886 +5857,67795.41061977121,60494.66027027032 +525,68352.09523809524,42952.24731066717 +109,68780.73394495413,29726.978260384523 +443,69287.47178329571,32411.255550407794 +236,70715.25423728813,64920.68762121735 +457,71396.3238512035,49319.07527537085 +1476,71814.65447154471,53727.080294440704 +201,72201.99004975124,42954.59342165748 +889,72983.59955005624,52618.60901769051 +3955,73775.20960809103,83809.58210059197 +2484,74696.73510466989,72022.22140203201 +981,76734.77064220184,40100.00367796557 +5456,76824.88178152492,94015.14422905697 +1920,78107.23958333333,61634.14362029448 +1070,78638.73831775702,36784.3423953724 +999,79330.23023023023,92468.32050622275 +256,79656.6015625,43021.64179250923 +691,80269.60926193921,54467.91771833894 +1949,80882.58081067214,61989.21190635767 +133,81566.31578947368,36265.32857673165 +344,81724.04069767441,70318.5743263255 +396,81917.67676767676,54724.49654573229 +199,82389.94974874372,37013.348663561796 +405,82656.04938271605,31312.455912066045 +1242,83829.34782608696,43418.88769695796 +11079,84068.53678129795,78540.05144529899 +945,84390.61375661376,42853.130060461604 +154,85905.84415584416,66653.03119086035 +1038,85910.73217726397,92576.05740925772 +312,87788.94230769231,79559.47348408098 +4880,88004.68032786885,92082.56051412193 +2025,88097.89135802469,43108.77694566431 +77,88574.02597402598,58833.73853629576 +478,88867.78242677824,36799.042458869335 +1250,88905.3584,93591.33823782875 +1273,89346.09269442262,74507.52912062789 +359,89509.55431754874,54599.991408533984 +1482,90342.52091767882,79346.52467286953 +97,90355.67010309278,34388.97282400326 +460,91395.43478260869,38724.948762694294 +831,92410.26474127558,55721.05171320993 +2570,92907.28171206226,57043.36881076245 +603,93742.68656716419,85058.5568150245 +438,95017.35159817351,79864.18001371343 +105,98062.85714285714,65425.62168239192 +453,98758.25607064017,116134.01267427258 +292,99711.98630136986,53726.51594537541 +4496,100022.6859430605,56900.20721783729 +1690,102356.9349112426,56626.570667655185 +456,104803.48684210527,71311.26958940338 +2404,106926.18552412646,63719.8232169186 +71,110228.1690140845,73937.56668027153 +304,113066.94078947368,64585.43476419163 +505,113803.78217821782,60385.33964300992 +1835,116122.87738419618,116295.86226115789 +301,121848.67109634551,89534.09110628269 +1130,130472.74336283185,97564.62817055204 +337,145268.84272997032,105113.47047426814 +2825,146967.46548672568,148283.85967018566 +1874,151577.6152614728,163188.46424002026 +8874,156727.0739238224,135097.47769384919 +67,174783.58208955225,137227.9739979436 +1224,188562.70506535948,141456.3948276375 +7405,231459.67832545578,166980.91397517134 diff --git a/lectures/_static/quant-econ.bib b/lectures/_static/quant-econ.bib index 81ad8792..38baf626 100644 --- a/lectures/_static/quant-econ.bib +++ b/lectures/_static/quant-econ.bib @@ -3,6 +3,20 @@ Note: Extended Information (like abstracts, doi, url's etc.) can be found in quant-econ-extendedinfo.bib file in _static/ ### +@techreport{boerma2023composite, + title={Composite sorting}, + author={Boerma, Job and Tsyvinski, Aleh and Wang, Ruodu and Zhang, Zhenyuan}, + year={2023}, + institution={National Bureau of Economic Research} +} + +@article{delon2011minimum, + title={Minimum-weight perfect matching for non-intrinsic distances on the line}, + author={Delon, Julie and Salomon, Julien and Sobolevski, Andrei}, + journal={arXiv preprint arXiv:1102.1558}, + year={2011} +} + @article{sargent1973stability, title={The stability of models of money and growth with perfect foresight}, author={Sargent, Thomas J and Wallace, Neil}, diff --git a/lectures/_toc.yml b/lectures/_toc.yml index 37ddefa9..497aa68f 100644 --- a/lectures/_toc.yml +++ b/lectures/_toc.yml @@ -25,6 +25,7 @@ parts: - file: arellano - file: matsuyama - file: coase + - file: match_transport - caption: Dynamic Linear Economies numbered: true chapters: diff --git a/lectures/match_transport.md b/lectures/match_transport.md new file mode 100644 index 00000000..13f518fc --- /dev/null +++ b/lectures/match_transport.md @@ -0,0 +1,2462 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.16.4 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Composite Sorting + ++++ + +## Introduction + +This lecture presents Python code for solving **composite sorting** problems of the kind +studied in *Composite Sorting* by Job Boerma, Aleh Tsyvinski, Ruodo Wang, +and Zhenyuan Zhang {cite}`boerma2023composite`. + +In this lecture, we will use the following imports + +```{code-cell} ipython3 +import numpy as np +from scipy.optimize import linprog +from itertools import chain +import pandas as pd +from collections import namedtuple + + +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib.ticker import MaxNLocator +from matplotlib import cm +from matplotlib.colors import Normalize +``` + ++++ {"user_expressions": []} + +## Setup + + +$X$ and $Y$ are finite sets that represent two distinct types of people to be matched. + +For each $x \in X,$ let a positive integer $n_x$ be the number of agents of type $x$. + +Similarly, let a positive integer $m_y$ be the agents of agents of type $y \in Y$. + +We will refer to these two measures as *marginals*. + +We assume that + +$$ +\sum_{x \in X} n_x = \sum_{y \in Y} m_y =: N +$$ + +so that the matching problem is *balanced*. + +Given a *cost function* $c:X \times Y \rightarrow \mathbb{R}$, the (discrete) *optimal transport problem* is + + +$$ +\begin{aligned} +\min_{\mu \geq 0}& \sum_{(x,y) \in X \times Y} \mu_{xy}c_{xy} \\ +\text{s.t. }& \sum_{x \in X} \mu_{xy} = n_x \\ +& \sum_{y \in Y} \mu_{xy} = m_y +\end{aligned} +$$ + +Given our discreteness assumptions about $n$ and $m$, the problem admits an integer solution $\mu \in \mathbb{Z}_+^{X \times Y}$, i.e. $\mu_{xy}$ is a non-negative integer for each $x\in X, y\in Y$. + + +In this notebook, we will focus on integer solutions of the problem. + +Two points on the integer assumption are worth mentioning: + + * it is without loss of generality for computational purposes, since every problem with float marginals can be transformed into an equivalent problem with integer marginals; + * arguments below work for arbitrary real marginals from a mathematical standpoint, but some of the implementations will fail to work with float arithmetic. + + +Our focus in this notebook is a specific instance of the optimal transport problem: + +We assume that $X$ and $Y$ are finite subsets of $\mathbb{R}$ and that the cost function satisfies $c_{xy} = h(|x - y|)$ for all $x,y \in \mathbb{R},$ for an $h: \mathbb{R}_+ \rightarrow \mathbb{R}_+$ that is **strictly concave** and **strictly increasing** and **grounded** (i.e., $h(0)=0$). + +Such an $h$ satisfies the following + + +**Lemma.** Let $h: \mathbb{R}_+ \rightarrow \mathbb{R}_+$ be **strictly concave** and **grounded**. Then $h$ is strictly subadditive, i.e. for all $x,y\in \mathbb{R}_+, 0< x < y,$ we have + +$$ +h(x+y) < h(x) + h(y) +$$ + +*Proof.* For $\alpha \in (0,1)$ and $x >0$ we have, by strict concavity and groundedness, $h(\alpha x) > \alpha h(x) + (1-\alpha) h(0)=\alpha h(x). $ + +Now fix $x,y\in \mathbb{R}_+, 0< x < y,$ and let $\alpha = \frac{x}{x+y};$ the previous observation gives $h(x) = h(\alpha (x+y)) > \alpha h(x+y)$ and $h(y) = h((1-\alpha) (x+y)) > (1-\alpha) h(x+y) $; summing these inequality delivers the result. $\square$ + + + +In the following implementation we assume that the cost function is $c_{xy} = |x-y|^{1/\zeta}$ for $\zeta>1,$ i.e. $h(z) = z^{1/\zeta}$ for $z \in \mathbb{R}_+.$ + +Hence, our problem is + +$$ +\begin{aligned} +\min_{\mu \in \mathbb{Z}_+^{X \times Y}}& \sum_{(x,y) \in X \times Y} \mu_{xy}|x-y|^{1/\zeta} \\ +\text{s.t. }& \sum_{x \in X} \mu_{xy} = n_x \\ +& \sum_{y \in Y} \mu_{xy} = m_y +\end{aligned} +$$ + +The following class takes as inputs sets of types $X,Y \subset \mathbb{R},$ marginals $n, m $ with positive integer entries such that $\sum_{x \in X} n_x = \sum_{y \in Y} m_y $ and cost parameter $\zeta>1$. + + +The cost function is stored as an $|X| \times |Y|$ matrix with $(x,y)$-entry equal to $|x-y|^{1/\zeta},$ i.e., the cost of matching an agent of type $x \in X$ with an agent of type $y \in Y.$ + +```{code-cell} ipython3 +class ConcaveCostOT(): + def __init__(self, X_types=None, Y_types=None, n_x =None, m_y=None, ζ=2): + + # Sets of types + self.X_types, self.Y_types = X_types, Y_types + + # Marginals + if X_types is not None and Y_types is not None: + non_empty_types = True + self.n_x = np.ones(len(X_types), dtype=int) if n_x is None else n_x + self.m_y = np.ones(len(Y_types), dtype=int) if m_y is None else m_y + else: + non_empty_types = False + self.n_x, self.m_y = n_x, m_y + + # Cost function: |X|x|Y| matrix + self.ζ = ζ + if non_empty_types: + self.cost_x_y = np.abs(X_types[:, None] - Y_types[None, :]) \ + ** (1 / ζ) + else: + self.cost_x_y = None +``` + ++++ {"user_expressions": []} + +Let's consider a random instance with given numbers of types $|X|$ and $|Y|$ and a given number of agents. + +First, we generate random types $X$ and $Y.$ + +Then we generate random quantities for each type so that there are $N$ agents for each side. + +```{code-cell} ipython3 +number_of_x_types = 20 +number_of_y_types = 20 +N_agents_per_side = 60 + +np.random.seed(1) + +## Genetate random types +# generate random support for distributions of types +support_size = 50 +random_support = np.unique(np.random.uniform(0,200, size=support_size)) + +# generate types +X_types_example = np.random.choice(random_support, + size=number_of_x_types, replace=False) +Y_types_example = np.random.choice(random_support, + size=number_of_y_types, replace=False) + +## Generate random integer types quantities summing to N_agents_per_side + +# generate integer vectors of lenght n_types summing to n_agents +def random_marginal(n_types, n_agents): + cuts = np.sort(np.random.choice(np.arange(1,n_agents), + size= n_types-1, replace=False)) + segments = np.diff(np.concatenate(([0], cuts, [n_agents]))) + return segments + +# Create a method to assign random marginals to our class +def assign_random_marginals(self,random_seed): + np.random.seed(random_seed) + self.n_x = random_marginal(len(self.X_types), N_agents_per_side) + self.m_y = random_marginal(len(self.Y_types), N_agents_per_side) + +ConcaveCostOT.assign_random_marginals = assign_random_marginals + +# Create an instance of our class and generate random marginals +example_pb = ConcaveCostOT(X_types_example, Y_types_example, ζ=2) +example_pb.assign_random_marginals(random_seed=1) +``` + ++++ {"user_expressions": []} + + + + +We use $F$ (resp. $G$) to denote the cumulative distribution function associated to the measure $n$ (resp. $m$) + +Thus, $F(z) =\sum_{x \leq z: n_x > 0} n_x $ and $G(z) =\sum_{y \leq z: m_y > 0} m_y $ for $z\in \mathbb{R}.$ + +Notice that we not normalizing the measures so $F(\infty) = G(\infty) =N.$ + + +The following method plots the marginals on the real line + + * blue for $X$ types, + + * red for $Y$ types. + +Note that there are possible overlaps between $X$ and $Y.$ + +```{code-cell} ipython3 +def plot_marginals(self, figsize=(15, 8), title='Distributions of types'): + + plt.figure(figsize=figsize) + + # Scatter plot n_x + plt.scatter(self.X_types, self.n_x, color='blue', label='n_x') + plt.vlines(self.X_types, ymin=0, ymax= self.n_x, + color='blue', linestyles='dashed') + + # Scatter plot m_y + plt.scatter(self.Y_types, - self.m_y, color='red', label='m_y') + plt.vlines(self.Y_types, ymin=0, ymax=- self.m_y, + color='red', linestyles='dashed') + + # Add grid and y=0 axis + plt.grid(True) + plt.axhline(0, color='black', linewidth=1) + plt.gca().spines['bottom'].set_position(('data', 0)) + + # Labeling the axes and the title + plt.ylabel('frequency') + plt.title(title) + plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True)) + plt.legend() + plt.show() + +ConcaveCostOT.plot_marginals = plot_marginals +``` + +```{code-cell} ipython3 +example_pb.plot_marginals() +``` + ++++ {"user_expressions": []} + +## Characterization of primal solution + +### Three properties of an optimal solution + +We now indicate important properties that are satisfied by an optimal solution. + +1. Maximal number of perfect pairs + +2. No intersecting pairs + +3. Layering + ++++ {"user_expressions": []} + +**(Maximal number of perfect pairs)** + +If $(z,z) \in X \times Y$ for some $z \in \mathbb{R}$ then in each optimal solution there are $\min\{n_z,m_z\}$ matches between type $z \in X$ and $z \in Y$. + +Indeed, assume by contradiction that at an optimal solution we have $(z,y)$ and $(x,z)$ matched in positive amounts for $y,x \neq z$. + +We can verify that reassigning the minimum of such quantities to the pairs $(z,z)$ and $(x,y)$ improves upon the current matching since + +$$ +h(|x-y|) \leq h(|x-z| +|z - y|) < h(|x-z|)+ h(|z - y|) +$$ + +where the first inequality follows from triangle inequality and the fact that $h$ is increasing and the strict inequality from strict subadditivity. + +We can then repeat the operation for any other analogous pair of matches involving $z,$ while improving the value, until we have mass $\min\{n_z,m_z\}$ on match $(z,z).$ + +Viewing the matching $\mu$ as a measure on $X \times Y$ with marginals $n$ and $m$, this property says that in any optimal $\mu$ we have $\mu_{zz} = n_z \wedge m_z$ for $(z,z)$ in the diagonal $\{(x,y) \in X \times Y: x=y\}$ of $\mathbb{R} \times \mathbb{R}$. + +The following method finds perfect pairs and returns the on-diagonal matchings as well as the residual off-diagonal marginals. + +```{code-cell} ipython3 +def match_perfect_pairs(self): + + # Find pairs on diagonal and related mass + perfect_pairs_x, perfect_pairs_y = np.where( + self.X_types[:,None] == self.Y_types[None,:]) + Δ_q = np.minimum(self.n_x[perfect_pairs_x] ,self.m_y[perfect_pairs_y]) + + # Compute off-diagonal residual masses for each side + n_x_off_diag = self.n_x.copy() + n_x_off_diag[perfect_pairs_x]-= Δ_q + + m_y_off_diag = self.m_y.copy() + m_y_off_diag[perfect_pairs_y] -= Δ_q + + # Compute on-diagonal matching + matching_diag = np.zeros((len(self.X_types), len(self.Y_types)), dtype=int) + matching_diag[perfect_pairs_x, perfect_pairs_y] = Δ_q + + return n_x_off_diag, m_y_off_diag , matching_diag + +ConcaveCostOT.match_perfect_pairs = match_perfect_pairs +``` + +```{code-cell} ipython3 +n_x_off_diag, m_y_off_diag , matching_diag = example_pb.match_perfect_pairs() +print(f"On-diagonal matches: {matching_diag.sum()}") +print(f"Residual types in X: {len(n_x_off_diag[n_x_off_diag >0])}") +print(f"Residual types in Y: {len(m_y_off_diag[m_y_off_diag >0])}") +``` + ++++ {"user_expressions": []} + +We can therefore create a new instance with the residual marginals that will feature no perfect pairs. + +Later we shall add the on-diagonal matching to the solution of this new instance. + +We refer to this instance as "off-diagonal" since the product measure of the residual marginals $n \otimes m$ feature zeros mass on the diagonal of $\mathbb{R} \times \mathbb{R}.$ + +In the rest of this section, we will focus on this instance. + +We create a subclass to study the residual off-diagonal problem. + +The subclass inherits the attributes and the modules from the original class. + +We let $Z := X \sqcup Y ,$ where $\sqcup$ denotes the union of disjoint sets. We will + +* index types $X$ as $\{0, \dots,|X|-1\}$ and types $Y$ as $\{|X|, \dots,|X| + |Y|-1\};$ + +* store the cost function as a $|Z| \times |Z|$ matrix with entry $(z,z')$ equal to $c_{xy}$ if $z=x \in X$ and $z' =y\in Y$ or $z=y \in Y$ and $z' =x\in X$ or equal to $+\infty$ if $z$ and $z'$ belong to the same side + + * (the latter is just customary, since these "infinitely penalized" entries are actually never accessed in the implementation); + +* let $q$ be a vector of size $|Z|$ whose $z$-th entry equals $n_x$ if type $x$ is the $z$-th smallest type in $Z$ and $-m_y$ if type $y$ is the $z$-th smallest type in $Z$; hence $q$ encodes capacities of both sides on the (ascending) sorted set of types. + +Finally, we add a method to flexibly add a pair $(i,j)$ with $i \in \{0, \dots,|X|-1\},j \in \{|X|, \dots,|X| + |Y|-1\}$ or $j \in \{0, \dots,|X|-1\},i \in \{|X|, \dots,|X| + |Y|-1\}$ to a matching matrix of size $|X| \times |Y|$. + +```{code-cell} ipython3 +class OffDiagonal(ConcaveCostOT): + def __init__(self, X_types, Y_types, n_x, m_y, ζ): + super().__init__(X_types, Y_types, n_x, m_y, ζ) + + # Types (unsorted) + self.types_list = np.concatenate((X_types,Y_types)) + + # Cost function: |Z|x|Z| matrix + self.cost_z_z = np.ones((len(self.types_list), + len(self.types_list))) * np.inf + + # upper-right block + self.cost_z_z[:len(self.X_types), len(self.X_types):] = self.cost_x_y + + # lower-left block + self.cost_z_z[len(self.X_types):, :len(self.X_types)] = self.cost_x_y.T + + ## Distributions of types + # sorted types and index identifier for each z in support + self.type_z = np.argsort(self.types_list) + self.support_z = self.types_list[self.type_z] + + # signed quantity for each type z + self.q_z = np.concatenate([n_x, - m_y])[self.type_z] + + # Mathod that adds to matching matrix a pair (i,j) + def add_pair_to_matching(self, pair_ids, matching): + if pair_ids[0] < pair_ids[1]: + # the pair of indices correspond to a pair (x,y) + matching[pair_ids[0], pair_ids[1]-len(self.X_types)] = 1 + else: + # the pair of indices correspond to a pair (y,x) + matching[pair_ids[1], pair_ids[0]-len(self.X_types)] = 1 +``` + ++++ {"user_expressions": []} + +We add a function that returns an instance of the off-diagonal subclass as well as the on-diagonal matching and the indices of the residual off-diagonal types. + +These indices will come handy for adding the off-diagonal matching matrix to the diagonal matching matrix we just found, since the former will have a smaller size if there are perfect pairs in the original problem. + +```{code-cell} ipython3 +def generate_offD_onD_matching(self): + # Match perfect pairs and compute on-diagonal matching + n_x_off_diag, m_y_off_diag , matching_diag = self.match_perfect_pairs() + + # Find indices of residual non-zero quantities for each side + nonzero_id_x = np.flatnonzero(n_x_off_diag) + nonzero_id_y = np.flatnonzero(m_y_off_diag) + + # Create new instance with off-diagonal types + off_diagonal = OffDiagonal(self.X_types[nonzero_id_x], + self.Y_types[nonzero_id_y], + n_x_off_diag[nonzero_id_x], + m_y_off_diag[nonzero_id_y], + self.ζ) + + return off_diagonal, (nonzero_id_x, nonzero_id_y, matching_diag) + +ConcaveCostOT.generate_offD_onD_matching = generate_offD_onD_matching +``` + ++++ {"user_expressions": []} + +We apply it to our example: + +```{code-cell} ipython3 +example_off_diag, _ = example_pb.generate_offD_onD_matching() +``` + ++++ {"user_expressions": []} + +Let's plot the residual marginals to verify visually that there are no overlappings between types from distinct sides in the off-diagonal instance. + +```{code-cell} ipython3 +example_off_diag.plot_marginals(title='Distributions of types: off-diagonal') +``` + ++++ {"user_expressions": []} + +**(No intersecting pairs)** This property summarizes the following fact: + + * represent both types on the real line and draw a semicirle joining $(x,y)$ for all pairs $(x,y) \in X \times Y$ that are matched in a solution + + * these semicirles do not intersect (unless they share one of the endpoints). + +A proof proceeds by contradiction. + +Let's consider types $x,x' \in X$ and $y,y' \in Y.$ + +Matched pairs cain "intersect" (or be tangent). + +We will show that in both cases the partial matching among types $x,x',y,y'$ can be improved by *uncrossing*, i.e. reassigning the quantities while improving on the solution and reducing the number of intersecting pairs. + +The first case of intersecting pairs is + +$$ +x < y < y' < x' +$$ + +with pairs $(x,y')$ and $(x',y)$ matched in positive quantities. + +Then it follows from strict monotonicity of $h$ that $h(|x-y|) < h(|x-y'|)$ and $h(|x'-y'|) < h(|x'-y|),$ hence $h(|x-y|)+ h(|x'-y'|) < h(|x-y'|) + h(|x'-y|).$ + + +Therefore, we can take the minimum of the masses of the matched pairs $(x,y')$ and $(x',y)$ and reallocate it to the pairs $(x,y)$ and $(x',y')$, +therby strictly improving the cost among $x,y,x',y'.$ + +The second case of intersecting pairs is + +$$ +x < x' < y' < y +$$ + +with pairs $(x,y')$ and $(x',y)$ matched. + +In this case we have + +$$ +|x - y'| + |x' - y| = |x - y| + |x' - y'| +$$ + +Letting $\alpha := \frac{|x - y|+|x' - y|}{|x - y'| - |x' - y|} \in (0,1),$ we have $|x - y| = \alpha|x - y'| +(1-\alpha) |x' - y| $ and $|x' - y'| = (1-\alpha)|x - y'| +\alpha |x' - y|. $ + +Hence, by strict concavity of $h,$ + +$$ +h(|x-y|)+ h(|x'-y'|) <\alpha h(|x - y'|) +(1-\alpha) h(|x' - y|) + (1-\alpha) h(|x - y'|) +\alpha h(|x' - y|) = h(|x-y'|) + h(|x'-y|). +$$ + +Therefore, as in the first case, we can strictly improve the cost among $x,y,x',y'$ by uncrossing the pairs. + +Finally, it remains to argue that in both cases *uncrossing* operations do not increase the number of intersections with other matched pairs. + +It can indeed be shown on a case-by-case basis that, in both of the above cases, for any other matched pair $(x'',y'')$ the number of intersections between pairs $(x,y), (x',y')$ and the pair $(x'',y'')$ (i.e., after uncrossing) is not larger than the number of intersections between pairs $(x,y'), (x',y)$ and the pair $(x'',y'')$ (i.e., before uncrossing), hence the uncrossing operations above reduce the number of intersections. + +We conclude that if a matching features intersecting pairs, it can be modified via a sequence of uncrossing operations into a matching without intersecting pairs while improving on the value. + ++++ {"user_expressions": []} + +**(Layering)** Recall that there are $2N$ individual agents, each agent $i$ having type $z_i \in X \sqcup Y.$ + +When we introduce the off diagonal matching, to stress that the types sets are disjoint now. + + +To simplify our explanation of this property, assume for now that each agent has its own distinct type (i.e., |X|=|Y| =N and $n=m= \mathbf{1}_N$), in which case the optimal transport problem is also referred to as *assignment problem*. + +Let's index agents according to their types: + +$$ +z_1 < z_2 \dots< z_{2N-1} < z_{2N}. +$$ + +Suppose that agents $i$ of type $z_i$ and $j$ of type $z_j$, with $z_i < z_j,$ are matched in a particular optimal solution. + +Then there is an equal number of agents from each side in $\{i+1, \dots, j-1\},$ if this set is not empty. + +Indeed, if this were not the case, then some agent $k \in \{i+1,j-1\}$ would be matched with some agent $\ell$ with $\ell \notin \{i,\dots, j\},$ i.e., there would be types + +$$ +z_i < z_k < z_j < z_\ell +$$ + +with matches $(z_i,z_j)$ and $(z_k, z_\ell),$ violating the no intersecting pairs property. + +We conclude that we can define a binary relation on $[N]$ such that $i \sim j$ if there is an equal number of agents of each side in $\{i,i+1,\dots, j\}$ (or if this set is empty). + +This is an equivalence relation, so we can find associated equivalence classes that we call *layers*. + +By the reasoning above, in an optimal solution all pairs $i,j$ (of opposite sides) which are matched belong to the same layer, hence we can solve the assignment problem associated to each layer and then add up the solutions. + +In terms of distributions, $i$ and $j,$ of types $x \in X$ and $y \in Y$ respectively, belong to the same layer (i.e., $x \sim y$) if and only if $F(y-) - F(x) = G(y-) - G(x).$ + + +If $F$ and $G$ were continuous, then $F(y) - F(x) = G(y) - G(x) \iff F(x) - G(x) = F(y) - G(y).$ + +This suggests that the following quantity plays an important role: + +$$ +H(z) := F(z) - G(z), \text{ for } z \in \mathbb{R}. +$$ + +Returning to our general (integer) discrete setting, let's plot $H$. + +Notice that $H$ is right-continuous (being the difference of right-continuous functions) and that upward (resp. downward) jumps correspond to point masses of agents with types from $X$ (resp. $Y$). + +```{code-cell} ipython3 +def plot_H_z(self, figsize=(15, 8), range_x_axis=None, scatter=True): + # Determine H(z) = F(z) - G(z) + H_z = np.cumsum(self.q_z) + + # Plot H(z) + plt.figure(figsize=figsize) + plt.axhline(0, color='black', linewidth=1) + + # determine the step points for horizontal lines + step = np.concatenate(([self.support_z.min() - .05 * self.support_z.ptp()], + self.support_z, + [self.support_z.max() + .05 * self.support_z.ptp()])) + height = np.concatenate(([0], H_z, [0])) + + # plot the horizontal lines of the step function + for i in range(len(step) - 1): + plt.plot([step[i], step[i+1]], [height[i], height[i]], color='black') + + # draw dashed vertical lines for the step function + for i in range(1, len(step) - 1): + plt.plot([step[i], step[i]], [height[i-1], height[i]], + color='black', linestyle='--') + + # plot discontinuities points of H(z) + if scatter: + plt.scatter(np.sort(self.X_types), H_z[self.q_z > 0], color='blue') + plt.scatter(np.sort(self.Y_types), H_z[self.q_z < 0], color='red') + + if range_x_axis is not None: + plt.xlim(range_x_axis) + + # Add labels and title + plt.title('Underqualification Measure (Off-Diagonal)') + plt.xlabel('$z$') + plt.ylabel('$H(z)$') + plt.grid(False) + plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True)) + plt.show() + +OffDiagonal.plot_H_z = plot_H_z +``` + +```{code-cell} ipython3 +example_off_diag.plot_H_z() +``` + ++++ {"user_expressions": []} + +The layering property extends to the general discrete setting. + +There are $|H(\mathbb{R})|-1$ layers in total. + +Enumerating the range of $H$ as $H(\mathbb{R}) = \{h_1,h_2, \dots, h_{|H(\mathbb{R})|}\}$ with $h_1 < h_2 < \dots < h_{|H(\mathbb{R})|},$ we can define layer $L_\ell,$ for $\ell \in \{ 1,\dots,|H(\mathbb{R})|-1\}$ as the collection of types $z \in Z$ such that + +$$ +H(z-) \leq h_{\ell -1} < h_{\ell } \leq H(z), +$$ + +(which are types in $X$), or + +$$ +H(z) \leq h_{\ell -1} < h_{\ell } \leq H(z-), +$$ + +which are types in $Y$. + +The *mass* associated with layer $L_\ell$ is $M_\ell = h_{\ell+1}- h_{\ell}.$ + +Intuitively, a layer $L_\ell$ consists of some mass $M_\ell,$ of multiple types in $Z,$ i.e. the problem within the layer is *unitary*. + +A unitary problem is essentially an assignment problem up to a constant: we can solve the problem with unit mass and then rescale a solution by $M_\ell.$ + +Moreover, each layer $L_\ell$ contains an even number of types $N_\ell \in 2\mathbb{N},$ which are alternating, i.e., ordering them as $z_1 < z_2\dots < z_{ N_\ell-1} < z_{ N_\ell}$ all odd (or even, respectively) indexed types belong to the same side. + + +The following method finds the layers associated with distributions $F$ and $G$. + +Again, types in $X$ are indexed with $\{0, \dots,|X|-1\}$ and types in $Y$ with $\{|X|, \dots,|X| + |Y|-1\}$. + +Using these indices (instead of the types themselves) to represent the layers allows keeping track of sides types in each layer, without adding an additional bit of information that would identify the side of the first type in the layer, which, because a layer is alternating, would then allow identifying sides of all types in the layer. + +In addition, using indices will let us extract the cost function within a layer from the cost function $c_{zz'}$ computed offline. + +```{code-cell} ipython3 +def find_layers(self): + # Compute H(z) on the joint support + H_z = np.concatenate([[0], np.cumsum(self.q_z)]) + + # Compute the range of H, i.e. H(R), stored in ascending order + layers_height = np.unique(H_z) + + # Compute the mass of each layer + layers_mass = np.diff(layers_height) + + # Compute layers + # the following |H(R)|x|Z| matrix has entry (z,l) equal to 1 iff type z belongs to layer l + layers_01 = ((H_z[None, :-1] <= layers_height[:-1, None]) + * (layers_height[1:, None] <= H_z[None, 1:]) | + (H_z[None, 1:] <= layers_height[:-1, None]) + * (layers_height[1:, None] <= H_z[None, :-1])) + + # each layer is reshaped as a list of indices correponding to types + layers = [self.type_z[layers_01[ell]] + for ell in range(len(layers_height)-1)] + + return layers, layers_mass, layers_height, H_z + +OffDiagonal.find_layers = find_layers +``` + +```{code-cell} ipython3 +layers_list_example, layers_mass_example, _, _ = example_off_diag.find_layers() +print(layers_list_example) +``` + ++++ {"user_expressions": []} + +The following method gives a graphical representation of the layers. + +From the picture it is easy to spot two key features described above: + + * types are alternating + + * the layer problem is unitary + +```{code-cell} ipython3 +def plot_layers(self, figsize=(15, 8)): + # Find layers + layers, layers_mass , layers_height, H_z = self.find_layers() + + plt.figure(figsize=figsize) + + # Plot H(z) + step = np.concatenate(([self.support_z.min() - .05 * self.support_z.ptp()], + self.support_z, + [self.support_z.max() + .05 * self.support_z.ptp()])) + height = np.concatenate((H_z, [0])) + plt.step(step, height, where='post', color='black', label='CDF', zorder=1) + + # Plot layers + colors = cm.viridis(np.linspace(0, 1, len(layers))) + for ell, layer in enumerate(layers): + plt.vlines(self.types_list[layer], layers_height[ell] , + layers_height[ell] + layers_mass[ell], + color=colors[ell], linewidth=2) + plt.scatter(self.types_list[layer], + np.ones(len(layer)) * layers_height[ell] + +.5 * layers_mass[ell], + color=colors[ell], s=50) + + plt.axhline(layers_height[ell], color=colors[ell], + linestyle=':', linewidth=1.5, zorder=0) + + # Add labels and title + plt.xlabel('$z$') + plt.title('Layers') + plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True)) + plt.show() + +OffDiagonal.plot_layers = plot_layers +``` + +```{code-cell} ipython3 +example_off_diag.plot_layers() +``` + ++++ {"user_expressions": []} + +### Solving a layer + ++++ {"user_expressions": []} + +Recall that layer $L_\ell$ consists of a list of distinct types from $Y \sqcup X$ + +$$ +z_1 < z_2\dots < z_{N_\ell-1} < z_{N_\ell}, +$$ + +which is alternating. + +The problem within a layer is unitary. + +Hence we can solve the problem with unit masses and later rescale the solution by the layer's mass $M_\ell$. + +Let us select a layer from the example above (we pick the one with maximum number of types) and plot the types on the real line + +```{code-cell} ipython3 +# Pick layer with maximum number of types +layer_id_example = max(enumerate(layers_list_example), + key = lambda x: len(x[1]))[0] +layer_example = layers_list_example[layer_id_example] + + +# Plot layer types +def plot_layer_types(self, layer, mass, figsize=(15, 3)): + + plt.figure(figsize=figsize) + + # Scatter plot n_x + x_layer = layer[layer < len(self.X_types)] + y_layer = layer[layer >= len(self.X_types)] - len(self.X_types) + M_ell = np.ones(len(x_layer))* mass + + plt.scatter(self.X_types[x_layer], M_ell, color='blue', label='X types') + plt.vlines(self.X_types[x_layer], ymin=0, ymax= M_ell, + color='blue', linestyles='dashed') + + # Scatter plot m_y + plt.scatter(self.Y_types[y_layer], - M_ell, color='red', label='Y types') + plt.vlines(self.Y_types[y_layer], ymin=0, ymax=- M_ell, + color='red', linestyles='dashed') + + # Add grid and y=0 axis + # plt.grid(True) + plt.axhline(0, color='black', linewidth=1) + plt.gca().spines['bottom'].set_position(('data', 0)) + + # Labeling the axes and the title + plt.ylabel('mass') + plt.title('Distributions of types in the layer') + plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True)) + plt.gca().spines['top'].set_visible(False) + plt.gca().spines['right'].set_visible(False) + plt.legend() + plt.show() + +ConcaveCostOT.plot_layer_types = plot_layer_types + +example_off_diag.plot_layer_types(layer_example, + layers_mass_example[layer_id_example]) +``` + ++++ {"user_expressions": []} + +Given the structure of a layer and the *no intersecting pairs* property, the optimal matching and value of the layer can be found recursively. + +Indeed, if in certain optimal matching $1$ and $j \in [N_\ell],$ $ j-1 $ odd, are paired, then there is no matching between agents in $[2,j-1]$ and those in $[j+1,N_\ell]$ (if both are non empty, i.e., $j$ is not $2$ or $N_\ell$). + +Hence in such optimal solution agents in $[2,j-1]$ are matched among themselves. + +Since $[z_2,z_{j-1}]$ (as well as $[z_{j+1},z_{N_\ell}]$) is alternating, we can reason recursively. + +Let $V_{ij}$ be the optimal value of matching agents in $[i,j]$ with $i,j \in [N_\ell],$ $j -i \in \{1,3,\dots,N_\ell-1\}$. + + + +Suppose that we computed the value $V_{ij}$ for all $i,j \in [N_\ell]$ with $i-j \in \{1,3,\dots,t-2\}$ for some odd natural number $t$. + +Then, for $i,j \in [N_\ell]$ with $i-j= t$ we have + +$$ +V_{ij} = \min_{k \in \{i+1,i+3,\dots,j\}} \left\{ c_{ik} + V_{i+1,k-1} + V_{k+1,j}\right\} +$$ + +with the RHS depending only on previously computed values. + +We set the boundary conditions at $t=-1$: $V_{i+1,i} = 0$ for each $i \in [N_\ell],$ so that we can apply the same Bellman equation at $t =1.$ + +The following method takes as input the layer types indices and computes the value function as a matrix $[V_{ij}]_{ i \in [N_\ell+1], j \in [N_\ell ]}$. + +In order to distinguish entries that are relevant for our computations from those that are never accessed, we initialize this matrix as full of NaN values. + +```{code-cell} ipython3 +def solve_bellman_eqs(self,layer): + # Recover cost function within the layer + cost_i_j = self.cost_z_z[layer[:,None],layer[None,:]] + + # Initialize value function + V_i_j = np.full((len(layer)+1,len(layer)), np.nan) + + # Add boundary conditions + i_bdry = np.arange(len(layer)) + V_i_j[i_bdry+1, i_bdry] = 0 + + t = 1 + while t < len(layer): + # Select agents i in [n_L-t] (with potential partners j's in [t,n_L]) + i_t = np.arange(len(layer)-t) + + # For each i, select each k with |k-i| <= t + # (potential partners of i within segment) + index_ik = i_t[:,None] + np.arange(1, t+1, 2)[None,:] + + # Compute optimal value for pairs with |i-j| = t + V_i_j[i_t, i_t + t] = (cost_i_j[i_t[:,None], index_ik] + + V_i_j[i_t[:,None] + 1, index_ik - 1] + + V_i_j[index_ik + 1, i_t[:,None] + t]).min(1) + # Go to next odd integer + t += 2 + + return V_i_j + +OffDiagonal.solve_bellman_eqs = solve_bellman_eqs +``` + ++++ {"user_expressions": []} + +Let's compute values for the layer from our example. + +Only non-NaN entries are actually used in the computations. + +```{code-cell} ipython3 +# Compute layer value function +V_i_j = example_off_diag.solve_bellman_eqs(layer_example) + +print(f"Type indices in the layer: {layer_example}") +print('##########################') +print("Section of the Value function of the layer:") +print(V_i_j.round(2)[:min(10, V_i_j.shape[0]), + :min(10, V_i_j.shape[1])]) +``` + ++++ {"user_expressions": []} + +Having computed the value function, we can proceed to compute the optimal matching as the *policy* that attains the value function that solves the Bellman equation (*policy evaluation*). + +Specifically, we start from agent $1$ and match it with the $k$ that achieves the minimum in the equation associated with $V_{1,2N_\ell};$ + +Then we store segments $[2,k-1]$ and $[k+1,2N_\ell]$ (if not empty). + +In general, given a segment $[i,j],$ we match $i$ with $k$ that achieves the minimum in the equation associated with $V_{ij}$ and store the segments $[i,k-1]$ and $[k+1,j]$ (if not empty). + +The algorithm proceeds until there are no segments left. + +```{code-cell} ipython3 +def find_layer_matching(self, V_i_j, layer): + # Initialize + segments_to_process = [np.arange(len(layer))] + matching = np.zeros((len(self.X_types),len(self.Y_types)), bool) + + while segments_to_process: + # Pick i, first agent of the segment + # and potential partners i+1,i+3,..., in the segment + segment = segments_to_process[0] + i_0 = segment[0] + potential_matches = np.arange(i_0, segment[-1], 2) + 1 + + # Compute optimal partner j_i + obj = (self.cost_z_z[layer[i_0],layer[potential_matches]] + + V_i_j[i_0 +1, potential_matches -1] + + V_i_j[potential_matches +1,segment[-1]]) + + j_i_0 = np.argmin(obj)*2 + (i_0 + 1) + + # Add matched pair (i,j_i) + self.add_pair_to_matching(layer[[i_0,j_i_0]], matching) + + # Update segments to process: + # remove current segment + segments_to_process = segments_to_process[1:] + + # add [i+1,j-1] and [j+1,last agent of the segment] + if j_i_0 > i_0 + 1: + segments_to_process.append(np.arange(i_0 + 1, j_i_0)) + if j_i_0 < segment[-1]: + segments_to_process.append(np.arange(j_i_0 + 1, segment[-1] + 1)) + + return matching + +OffDiagonal.find_layer_matching = find_layer_matching +``` + ++++ {"user_expressions": []} + +Lets apply this method our example to find the matching within the layer and then rescale it by $M_\ell$. + +Note that the unscaled value equals $V_{1,N_\ell}.$ + +```{code-cell} ipython3 +matching_layer = example_off_diag.find_layer_matching(V_i_j,layer_example) +print(f"Value of the layer (unscaled): {(matching_layer * example_off_diag.cost_x_y).sum()}") +print(f"Value of the layer (scaled by the mass = {layers_mass_example[layer_id_example]}): " + f"{layers_mass_example[layer_id_example] * (matching_layer * example_off_diag.cost_x_y).sum()}") +``` + ++++ {"user_expressions": []} + +The following method plots the matching within a layer. + +We apply it to the layer from our example. + +```{code-cell} ipython3 +def plot_layer_matching(self, layer, matching_layer): + # Create the figure and axis + fig, ax = plt.subplots(figsize=(15, 15)) + + # Plot the points on the x-axis + X_types_layer = self.X_types[layer[layer < len(self.X_types)]] + Y_types_layer = self.Y_types[layer[layer >= len(self.X_types)] + - len(self.X_types)] + ax.scatter(X_types_layer, np.zeros_like(X_types_layer), color='blue', + s = 20, zorder=5) + ax.scatter(Y_types_layer, np.zeros_like(Y_types_layer), color='red', + s = 20, zorder=5) + + # Draw semicircles for each row in matchings + matched_types = np.where(matching_layer >0) + matched_types_x = self.X_types[matched_types[0]] + matched_types_y = self.Y_types[matched_types[1]] + + for iter in range(len(matched_types_x)): + width = abs(matched_types_x[iter] - matched_types_y[iter]) + center = (matched_types_x[iter] + matched_types_y[iter]) / 2 + height = width + semicircle = patches.Arc((center, 0), width, height, theta1=0, + theta2=180, lw=3) + ax.add_patch(semicircle) + + # Add title and layout settings + plt.title('Optimal Layer Matching' ) + ax.set_aspect('equal') + plt.gca().spines['bottom'].set_position(('data', 0)) + ax.spines['left'].set_color('none') + ax.spines['top'].set_color('none') + ax.spines['right'].set_color('none') + ax.yaxis.set_ticks([]) + ax.set_ylim(bottom= -self.support_z.ptp() / 100) + + plt.show() + +ConcaveCostOT.plot_layer_matching = plot_layer_matching +``` + +```{code-cell} ipython3 +example_off_diag.plot_layer_matching(layer_example, matching_layer) +``` + ++++ {"user_expressions": []} + +#### Solving a layer in a smarter way + ++++ {"user_expressions": []} + +We will now present two key results in the context of OT with concave type costs. + +We refer {cite}`boerma2023composite` and {cite}`delon2011minimum` for proofs. + + +Consider the problem faced within a layer, i.e., types from $Y \sqcup X$ + +$$ +z_1 < z_2\dots < z_{N_\ell-1} < z_{N_\ell}, \quad N_\ell \in 2 \mathbb{N} +$$ + +are alternating and the problem is unitary. + +Given a matching on $[1,k],$ $k \in [N_\ell],$ $k$ even, we say that a matched pair $(i,j)$ within this matching is *hidden* if there is a matched pair $(i',j')$ with $i' < i