Skip to content

Commit

Permalink
Merge pull request #49 from JetBrains-Research/benchmarking-hotfixes
Browse files Browse the repository at this point in the history
Benchmarking framework hot-fixes
  • Loading branch information
K-dizzled authored Nov 27, 2024
2 parents b4c0a3b + d5dda34 commit 1048803
Show file tree
Hide file tree
Showing 13 changed files with 87 additions and 58 deletions.
1 change: 1 addition & 0 deletions etc/docs/benchmark/BENCHMARKING_FRAMEWORK_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ This experiment must be run within the workspace environment, which the user act
cd dataset/imm
nix-build # builds the 'imm' project
nix-shell # activates its environment
export COQ_LSP_PATH=$(which coq-lsp) # sets up the proper coq-lsp server path
cd ../..
npm run single-workspace-benchmark
```
Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -397,11 +397,13 @@
"preclean-test": "npm run clean && npm run rebuild-test-resources && npm run compile && npm run lint",
"clean-test": "npm run test-only",
"prebenchmark": "npm run preclean-test",
"premulti-workspace-benchmark": "npm run prebenchmark",
"premulti-workspaces-benchmark": "npm run prebenchmark",
"multi-workspaces-benchmark": "node ./out/benchmark/multiWorkspacesSetup.js",
"presingle-workspace-benchmark": "npm run prebenchmark",
"single-workspace-benchmark": "npm run test-executables-unsafe -- -g=\"[SourceExecutable] Single Workspace Benchmark\"",
"benchmark": "npm run single-workspace-benchmark",
"prebenchmark-test": "rm -rf benchmarksOutput",
"benchmark-test": "npm run benchmark",
"preteamcity-benchmark-setup": "npm run prebenchmark",
"teamcity-benchmark-setup": "node ./out/benchmark/teamCitySetup.js",
"teamcity-benchmark-agent": "npm run test-executables-unsafe -- -g=\"[SourceExecutable] Team City Benchmark Agent\"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { AbstractExperiment, ExecutionContext } from "./abstractExperiment";
* In summary, `MultiWorkspacesExperiment` is designed to work with multiple workspaces per run,
* making it better suited for large benchmarking experiments.
*/
// CRITICAL TODO: support COQ_LSP_PATH switch for the processes of different workspaces
export class MultiWorkspacesExperiment extends AbstractExperiment {
constructor(
bundles: InputBenchmarkingBundle[] = [],
Expand Down
55 changes: 28 additions & 27 deletions src/benchmark/framework/parseDataset/cacheHandlers/cacheUpdater.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import {
ParsedFileHolder,
ParsedFileTarget,
} from "../coqProjectParser/implementation/parsedWorkspaceHolder";
import { throwOnTheoremWithoutInitialGoal } from "../utils/invariantFailedErrors";

export function updateWorkspaceCache(
workspaceCache: WorkspaceCacheHolder,
Expand Down Expand Up @@ -84,32 +83,30 @@ namespace UpdateCacheHolders {
}
cachedFile.updateDocumentVersion(parsedFile.documentVersion);

const newlyInitializedCachedTheorems = new Set<string>();
for (const fileTarget of parsedFileHolder.targets()) {
let cachedTheorem = cachedFile.getCachedTheorem(
fileTarget.sourceTheorem.name
);
const theoremName = fileTarget.sourceTheorem.name;
let cachedTheorem = cachedFile.getCachedTheorem(theoremName);
if (cachedTheorem === undefined) {
newlyInitializedCachedTheorems.add(theoremName);
cacheUpdaterLogger.debug(
`+ cached new theorem: "${fileTarget.sourceTheorem.name}"`
);
cachedTheorem = new CacheHolderData.CachedTheoremData(
fileTarget.sourceTheorem
);
buildInitialTargets(
cachedTheorem,
cacheUpdaterLogger,
parsedFile.filePath
);
buildInitialTargets(cachedTheorem, cacheUpdaterLogger);
cachedFile.addCachedTheorem(cachedTheorem);
} else {
cacheUpdaterLogger.debug(
`* updated theorem: "${fileTarget.sourceTheorem.name}"`
);
}

updateCachedTheoremData(
updateCachedTheoremWithRequestedTarget(
cachedTheorem,
fileTarget,
newlyInitializedCachedTheorems.has(theoremName),
cachedFile.workspacePath,
cacheUpdaterLogger
);
Expand Down Expand Up @@ -138,18 +135,15 @@ namespace UpdateCacheHolders {
const cachedTheorem = new CacheHolderData.CachedTheoremData(
theoremData
);
buildInitialTargets(
cachedTheorem,
cachedFileUpdateLogger,
parsedFile.filePath
);
buildInitialTargets(cachedTheorem, cachedFileUpdateLogger);

for (const fileTarget of parsedFileTargetsByTheorem.get(
theoremData.name
) ?? []) {
updateCachedTheoremData(
updateCachedTheoremWithRequestedTarget(
cachedTheorem,
fileTarget,
true,
workspacePath,
cachedFileUpdateLogger
);
Expand All @@ -167,8 +161,7 @@ namespace UpdateCacheHolders {

export function buildInitialTargets(
cachedTheorem: CacheHolderData.CachedTheoremData,
cachedFileUpdateLogger: AsOneRecordLogsBuilder,
sourceFilePath: string
cachedFileUpdateLogger: AsOneRecordLogsBuilder
) {
if (!cachedTheorem.hasNoTargets()) {
cachedFileUpdateLogger
Expand Down Expand Up @@ -205,17 +198,10 @@ namespace UpdateCacheHolders {
const sourceTheoremData = cachedTheorem.theoremData;

// PROVE_THEOREM target
const initialGoal = sourceTheoremData.sourceTheorem.initial_goal;
if (initialGoal === null) {
throwOnTheoremWithoutInitialGoal(
sourceTheoremData.name,
sourceFilePath
);
}
initializeCachedTarget(
TargetType.PROVE_THEOREM,
extractTheoremFisrtProofStep(sourceTheoremData),
initialGoal
sourceTheoremData.sourceTheorem.initial_goal ?? undefined
);

// ADMIT target
Expand All @@ -224,9 +210,15 @@ namespace UpdateCacheHolders {
);
}

export function updateCachedTheoremData(
/**
* _Note:_ additionally, this method checks two invariants.
* 1. `CachedTheoremData` should have its targets initialized before updating them with parsed ones.
* 2. Parsed target should not have been requested, if it had been already present in cache.
*/
export function updateCachedTheoremWithRequestedTarget(
cachedTheorem: CacheHolderData.CachedTheoremData,
parsedTarget: ParsedFileTarget,
isNewlyInitialized: boolean,
workspacePath: string,
cachedFileUpdateLogger: AsOneRecordLogsBuilder
) {
Expand All @@ -251,6 +243,15 @@ namespace UpdateCacheHolders {
`Cache building invariant failed: \`CachedTheoremData\` is built incorrectly`
);
} else {
const parsedTargetWasBuiltFromInitialGoal =
parsedTarget.targetType === TargetType.PROVE_THEOREM &&
parsedTarget.sourceTheorem.sourceTheorem.initial_goal !== null;
const cachedTargetIsInitializedComplete =
isNewlyInitialized && parsedTargetWasBuiltFromInitialGoal;
if (cachedTargetIsInitializedComplete) {
return;
}

if (cachedTargetToUpdate.hasCachedGoal()) {
cachedFileUpdateLogger
.error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ export namespace ParseCoqProjectInternalSignature {
export type FilePathToFileTargets = { [key: string]: FileTarget[] };

export interface FileTarget {
// TODO: optimize - `PROVE_THEOREM` targets don't need to be requested explicitly,
// they will be always parsed together with the theorems
requestType: TargetRequestType;
/**
* If `specificTheoremName` is undefined, this target means request on all file theorems.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
SerializedGoal,
serializeGoal,
} from "../../../utils/coqUtils/goalParser";
import { extractSerializedTheoremFisrtProofStep } from "../../../utils/coqUtils/proofTargetExtractor";
import { LogsIPCSender } from "../../../utils/subprocessUtils/ipc/onParentProcessCallExecutor/logsIpcSender";

import { ParseCoqProjectInternalSignature } from "./internalSignature";
Expand Down Expand Up @@ -99,15 +100,12 @@ export namespace ParseCoqProjectImpl {
logger: Logger
): Promise<SerializedParsedCoqFile> {
const mockDocumentVersion = 1;
// TODO: [@Gleb Solovev] Do not create this Abort Controller but pass
// the one created at the top
// TODO + check coq-lsp creation in benchmarks
const abortController = new AbortController();
const sourceFileEnvironment = await createSourceFileEnvironment(
mockDocumentVersion,
Uri.fromPath(filePath),
coqLspClient,
abortController.signal
new AbortController().signal, // abort behaviour is not supported at the parsing stage
true // TODO: pass `ContextTheoremsRanker.needsUnwrappedNotations` here to improve performance
);
const serializedParsedFile: SerializedParsedCoqFile = {
serializedTheoremsByNames: packIntoMappedObject(
Expand Down Expand Up @@ -141,10 +139,12 @@ export namespace ParseCoqProjectImpl {
for (const fileTarget of fileTargets) {
if (fileTarget.specificTheoremName === undefined) {
// all theorems requests
for (const theorem of mappedObjectValues(theoremsMapping)) {
for (const serializedTheorem of mappedObjectValues(
theoremsMapping
)) {
const parsedTargetsFromTheorem =
await extractTargetsFromTheorem(
theorem,
serializedTheorem,
fileTarget.requestType,
serializedParsedFile,
coqLspClient,
Expand Down Expand Up @@ -176,7 +176,7 @@ export namespace ParseCoqProjectImpl {
}

async function extractTargetsFromTheorem(
theorem: SerializedTheorem,
serializedTheorem: SerializedTheorem,
requestType: TargetRequestType,
serializedParsedFile: SerializedParsedCoqFile,
coqLspClient: CoqLspClient,
Expand All @@ -192,7 +192,7 @@ export namespace ParseCoqProjectImpl {
knownGoal
) => {
return {
theoremName: theorem.name,
theoremName: serializedTheorem.name,
targetType: targetType,
goalToProve:
knownGoal ??
Expand All @@ -207,12 +207,19 @@ export namespace ParseCoqProjectImpl {
};
switch (requestType) {
case TargetRequestType.THEOREM_PROOF:
// THEOREM_PROOF goals are already parsed within the theorems,
// so `ParsedFileTarget`-s for them are redundant
return [];
const theoremProofTarget = await targetBuilder(
extractSerializedTheoremFisrtProofStep(serializedTheorem),
TargetType.PROVE_THEOREM,
serializedTheorem.initial_goal
);
if (serializedTheorem.initial_goal === undefined) {
serializedTheorem.initial_goal =
theoremProofTarget.goalToProve;
}
return [theoremProofTarget];
case TargetRequestType.ALL_ADMITS:
const parsedTargets = [];
for (const holeProofStep of theorem.proof!.holes) {
for (const holeProofStep of serializedTheorem.proof!.holes) {
parsedTargets.push(
await targetBuilder(
holeProofStep,
Expand Down
1 change: 1 addition & 0 deletions src/benchmark/framework/structures/common/inputTargets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ export abstract class FileTarget implements EqualTo<FileTarget> {
abstract equalTo(other: FileTarget): boolean;
abstract hash(): number;
abstract toString(linePrefix: string, itemizeString: string): string;
// TODO: support `needsTheoremInitialGoals` here to improve parsing performance
}

export class SpecificTheoremTarget extends FileTarget {
Expand Down
4 changes: 2 additions & 2 deletions src/core/inspectSourceFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export async function inspectSourceFile(
fileUri: Uri,
client: CoqLspClient,
abortSignal: AbortSignal,
needsTheoremInitialGoals: boolean = true,
needsTheoremInitialGoals: boolean,
eventLogger?: EventLogger
): Promise<AnalyzedFile> {
const sourceFileEnvironment = await createSourceFileEnvironment(
Expand Down Expand Up @@ -82,7 +82,7 @@ export async function createSourceFileEnvironment(
fileUri: Uri,
client: CoqLspClient,
abortSignal: AbortSignal,
needsTheoremInitialGoals: boolean = true,
needsTheoremInitialGoals: boolean,
eventLogger?: EventLogger
): Promise<SourceFileEnvironment> {
const fileTheorems = await parseCoqFile(
Expand Down
6 changes: 5 additions & 1 deletion src/llm/llmServices/grazie/grazieService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,11 @@ class GrazieServiceInternal extends LLMServiceInternal<
chat: ChatHistory,
modelParams: GrazieModelParams
): GrazieFormattedHistory {
const o1CompatibleChatHistory = toO1CompatibleChatHistory(chat, modelParams.modelName);
const o1CompatibleChatHistory = toO1CompatibleChatHistory(
chat,
modelParams.modelName,
"grazie"
);

return o1CompatibleChatHistory.map((message: ChatMessage) => {
const grazieRoleName =
Expand Down
6 changes: 3 additions & 3 deletions src/llm/llmServices/openai/openAiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import { GeneratedProofImpl } from "../generatedProof";
import { LLMServiceImpl } from "../llmService";
import { LLMServiceInternal } from "../llmServiceInternal";
import { OpenAiModelParams } from "../modelParams";
import { toO1CompatibleChatHistory } from "../utils/o1ClassModels";

import { OpenAiModelParamsResolver } from "./openAiModelParamsResolver";
import { toO1CompatibleChatHistory } from "../utils/o1ClassModels";

export class OpenAiService extends LLMServiceImpl<
OpenAiUserModelParams,
Expand Down Expand Up @@ -219,9 +219,9 @@ class OpenAiServiceInternal extends LLMServiceInternal<
private static readonly connectionErrorPattern = /^Connection error\.$/;

private formatChatHistory(
chat: ChatHistory,
chat: ChatHistory,
modelParams: OpenAiModelParams
): ChatHistory {
return toO1CompatibleChatHistory(chat, modelParams.modelName);
return toO1CompatibleChatHistory(chat, modelParams.modelName, "openai");
}
}
19 changes: 14 additions & 5 deletions src/llm/llmServices/utils/o1ClassModels.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
import { ChatHistory, ChatMessage } from "../commonStructures/chat";

const o1ClassModels = ['o1-preview', 'o1-preview-2024-09-12', 'o1-mini', 'o1-mini-2024-09-12'];
const o1ClassModelsOpenAI = [
"o1-preview",
"o1-preview-2024-09-12",
"o1-mini",
"o1-mini-2024-09-12",
];
const o1ClassModelsGrazie = ["openai-o1", "openai-o1-mini"];

/**
* As of November 2024, o1 model requires a different format of chat history.
* It doesn't support the system prompt, therefore we manually
* change system prompt to a user's message in case of this specific
* change system prompt to a user's message in case of this specific
* model usage.
*/
export function toO1CompatibleChatHistory(
chatHistory: ChatHistory,
modelName: string
chatHistory: ChatHistory,
modelName: string,
service: "openai" | "grazie"
): ChatHistory {
const o1ClassModels =
service === "openai" ? o1ClassModelsOpenAI : o1ClassModelsGrazie;
if (o1ClassModels.includes(modelName)) {
return chatHistory.map((message: ChatMessage) => {
return {
Expand All @@ -22,4 +31,4 @@ export function toO1CompatibleChatHistory(
} else {
return chatHistory;
}
}
}
3 changes: 2 additions & 1 deletion src/test/commonTestFunctions/prepareEnvironment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ export async function withPreparedEnvironment<T>(
(_hole) => true,
fileUri,
client,
new AbortController().signal
new AbortController().signal,
true // to support any ranker
)
);
const preparedEnvironment = {
Expand Down
Loading

0 comments on commit 1048803

Please sign in to comment.