Skip to content

Commit 5d3ceba

Browse files
committed
grapheme popularity as hat metric
1 parent 2b596eb commit 5d3ceba

File tree

5 files changed

+48
-74
lines changed

5 files changed

+48
-74
lines changed

packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
import { CompositeKeyMap, HatStability, TokenHat } from "@cursorless/common";
2-
import { memoize, min } from "lodash";
1+
import {
2+
CompositeKeyMap,
3+
DefaultMap,
4+
HatStability,
5+
TokenHat,
6+
} from "@cursorless/common";
37
import { HatCandidate } from "./allocateHats";
48

59
/**
@@ -37,26 +41,10 @@ export function hatOldTokenRank(
3741
};
3842
}
3943

40-
/**
41-
* @param tokenRank The rank of the current token, so that we don't consider
42-
* higher ranked tokens (which already have been assigned hats)
43-
* @param graphemeTokenRanks A map from graphemes to an ordered list of the
44-
* ranks of tokens containing the grapheme
45-
* @returns A metric which returns the minimum token rank among lower ranked
46-
* tokens that contain the hat's grapheme (or Infinity if the grapheme doesn't
47-
* appear in any lower ranked tokens)
48-
*/
49-
export function minimumTokenRankContainingGrapheme(
50-
tokenRank: number,
51-
graphemeTokenRanks: { [key: string]: number[] },
44+
export function leastPopularGrapheme(
45+
graphemePopularity: DefaultMap<string, number>,
5246
): HatMetric {
53-
const coreMetric = memoize((graphemeText: string): number => {
54-
return (
55-
min(graphemeTokenRanks[graphemeText].filter((r) => r > tokenRank)) ??
56-
Infinity
57-
);
58-
});
59-
return ({ grapheme: { text } }) => coreMetric(text);
47+
return ({ grapheme: { text } }) => -graphemePopularity.get(text);
6048
}
6149

6250
/**

packages/cursorless-engine/src/util/allocateHats/allocateHats.ts

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,7 @@ export function allocateHats(
6262
* Lookup tables with information about which graphemes / hats appear in which
6363
* tokens
6464
*/
65-
const context = getHatRankingContext(
66-
rankedTokens,
67-
tokenOldHatMap,
68-
tokenGraphemeSplitter,
69-
);
65+
const context = getHatRankingContext(rankedTokens, tokenOldHatMap);
7066

7167
/* All initially enabled hat styles. */
7268
const enabledHatStyleNames = Object.keys(enabledHatStyles);
@@ -83,10 +79,25 @@ export function allocateHats(
8379
() => [...enabledHatStyleNames],
8480
);
8581

82+
// For every token, add that token's score to all the graphemes in the token.
83+
// TODO: move "graphemes for tokens" into getRankedTokens
84+
// to avoid recalculating it every time.
85+
const graphemePopularity = new DefaultMap<string, number>(() => 0);
86+
rankedTokens.forEach(({ token }) => {
87+
tokenGraphemeSplitter
88+
.getTokenGraphemes(token.text)
89+
.forEach(({ text: graphemeText }) => {
90+
graphemePopularity.set(
91+
graphemeText,
92+
graphemePopularity.get(graphemeText) + 1,
93+
);
94+
});
95+
});
96+
8697
// Iterate through tokens in order of decreasing rank, assigning each one a
8798
// hat
8899
return rankedTokens
89-
.map<TokenHat | undefined>(({ token, rank: tokenRank }) => {
100+
.map<TokenHat | undefined>(({ token }) => {
90101
/**
91102
* All hats for the graphemes in this token that weren't taken by a
92103
* higher ranked token
@@ -101,11 +112,21 @@ export function allocateHats(
101112
const chosenHat = chooseTokenHat(
102113
context,
103114
hatStability,
104-
tokenRank,
105115
tokenOldHatMap.get(token),
116+
graphemePopularity,
106117
tokenRemainingHatCandidates,
107118
);
108119

120+
// Remove the token from the grapheme popularity contest.
121+
tokenGraphemeSplitter
122+
.getTokenGraphemes(token.text)
123+
.forEach(({ text: graphemeText }) => {
124+
graphemePopularity.set(
125+
graphemeText,
126+
graphemePopularity.get(graphemeText) - 1,
127+
);
128+
});
129+
109130
// If there are no hats left for the graphemes in this token, the token
110131
// will get no hat
111132
if (chosenHat == null) {

packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import { HatStability, TokenHat } from "@cursorless/common";
1+
import { DefaultMap, HatStability, TokenHat } from "@cursorless/common";
22
import { HatCandidate } from "./allocateHats";
33
import { RankingContext } from "./getHatRankingContext";
44
import {
55
hatOldTokenRank,
66
isOldTokenHat,
7-
minimumTokenRankContainingGrapheme,
7+
leastPopularGrapheme,
88
negativePenalty,
99
penaltyEquivalenceClass,
1010
} from "./HatMetrics";
@@ -48,10 +48,10 @@ import { maxByFirstDiffering } from "./maxByFirstDiffering";
4848
* @returns The chosen hat, or `undefined` if {@link candidates} was empty
4949
*/
5050
export function chooseTokenHat(
51-
{ hatOldTokenRanks, graphemeTokenRanks }: RankingContext,
51+
{ hatOldTokenRanks }: RankingContext,
5252
hatStability: HatStability,
53-
tokenRank: number,
5453
oldTokenHat: TokenHat | undefined,
54+
graphemePopularity: DefaultMap<string, number>,
5555
candidates: HatCandidate[],
5656
): HatCandidate | undefined {
5757
// We narrow down the candidates by a series of criteria until there is only
@@ -71,8 +71,7 @@ export function chooseTokenHat(
7171
// 4. Narrow to the hats with the lowest penalty
7272
negativePenalty,
7373

74-
// 5. Prefer hats that sit on a grapheme that doesn't appear in any highly
75-
// ranked token
76-
minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),
74+
// 5. Avoid popular graphemes
75+
leastPopularGrapheme(graphemePopularity),
7776
])!;
7877
}

packages/cursorless-engine/src/util/allocateHats/getHatRankingContext.ts

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import {
44
Token,
55
TokenHat,
66
} from "@cursorless/common";
7-
import { TokenGraphemeSplitter } from "../../tokenGraphemeSplitter";
87
import { RankedToken } from "./getRankedTokens";
98

109
export interface RankingContext {
@@ -19,53 +18,25 @@ export interface RankingContext {
1918
},
2019
number
2120
>;
22-
23-
/**
24-
* Maps from a grapheme to the list of ranks of the tokens in which the
25-
* given grapheme appears.
26-
*/
27-
graphemeTokenRanks: {
28-
[key: string]: number[];
29-
};
3021
}
3122

3223
export function getHatRankingContext(
3324
tokens: RankedToken[],
3425
oldTokenHatMap: CompositeKeyMap<Token, TokenHat>,
35-
tokenGraphemeSplitter: TokenGraphemeSplitter,
3626
): RankingContext {
37-
const graphemeTokenRanks: {
38-
[key: string]: number[];
39-
} = {};
40-
4127
const hatOldTokenRanks = new CompositeKeyMap<
4228
{ grapheme: string; hatStyle: HatStyleName },
4329
number
4430
>(({ grapheme, hatStyle }) => [grapheme, hatStyle]);
4531

46-
tokens.forEach(({ token, rank }) => {
32+
tokens.forEach(({ token }, index) => {
4733
const existingTokenHat = oldTokenHatMap.get(token);
4834
if (existingTokenHat != null) {
49-
hatOldTokenRanks.set(existingTokenHat, rank);
35+
hatOldTokenRanks.set(existingTokenHat, -index);
5036
}
51-
tokenGraphemeSplitter
52-
.getTokenGraphemes(token.text)
53-
.forEach(({ text: graphemeText }) => {
54-
let tokenRanksForGrapheme: number[];
55-
56-
if (graphemeText in graphemeTokenRanks) {
57-
tokenRanksForGrapheme = graphemeTokenRanks[graphemeText];
58-
} else {
59-
tokenRanksForGrapheme = [];
60-
graphemeTokenRanks[graphemeText] = tokenRanksForGrapheme;
61-
}
62-
63-
tokenRanksForGrapheme.push(rank);
64-
});
6537
});
6638

6739
return {
6840
hatOldTokenRanks,
69-
graphemeTokenRanks,
7041
};
7142
}

packages/cursorless-engine/src/util/allocateHats/getRankedTokens.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ export function getRankedTokens(
4444
),
4545
);
4646

47-
return tokens.map((token, index) => ({ token, rank: -index }));
47+
return tokens.map((token, index) => ({
48+
token,
49+
}));
4850
});
4951
}
5052

@@ -67,11 +69,4 @@ function getRankedEditors(
6769

6870
export interface RankedToken {
6971
token: Token;
70-
71-
/**
72-
* A number indicating how likely the token is to be used. Tokens closer to
73-
* the cursor will be considered more likely to be used, and will receive a
74-
* higher rank, causing them to be assigned better hats.
75-
*/
76-
rank: number;
7772
}

0 commit comments

Comments
 (0)