From 4ce4241d1a82205a09ff835cc935b423d6563bd7 Mon Sep 17 00:00:00 2001 From: Felix Kling Date: Tue, 9 Aug 2022 20:39:27 +0200 Subject: [PATCH 1/3] codemirror file view: Use new /scip highlighting endpoint This commit makes minimal and additive changes to the frontend and Go backend to make the new /scip highlighting endpoint available through GraphQL. There are currently three possible scenarios how a file might get highlighted: - HTML blob view, default: Highlighted HTML is generated by syntect - HTML blob view, tree sitter: SCIP data is generated by tree sitter, HTML is generated on the client side - CodeMirror blob view, default: SCIP is generated by either syntect or tree sitter So far SCIP data was only generated for specific languages, determined by the server. With CodeMirror, SCIP also needs to be generated when the client requests it. My preferred solution would have been to let the server determine this based on the requested fields in the GraphQL request, but the library we are using [does not support that yet](https://github.com/graph-gophers/graphql-go/issues/17). Making the highlighting requests in the field resolvers (i.e. `HTML()` and `LSIF()`) is also not possible without additional changes because the `Aborted()` field depends on the result of the request. This led me change the `formatOnly` field from a boolean to an enum, with which the client can now request: - `HTML_PLAINTEXT` - `HTML_HIGHLIGHT` - `JSON_SCIP` It's not ideal because the server can still return SCIP data depending on whether tree sitter is configured for the language (see second bullet point at the beginning) but I think this is still better than introducing a new field for highlighting. So, if CodeMirror is *disabled*, everything works as before. When CodeMirror is *enabled* it will explicitly request `JSON_SCIP` data and only include the `lsif` field in the GraphQL request. The server will route that request to the new `/scip` endpoint. --- client/web/src/lsif/html.ts | 4 ++ client/web/src/repo/blob/BlobPage.tsx | 13 +++-- client/web/src/repo/blob/backend.ts | 34 +++++++----- cmd/frontend/graphqlbackend/highlight.go | 5 +- cmd/frontend/graphqlbackend/schema.graphql | 30 ++++++++--- cmd/frontend/internal/highlight/highlight.go | 15 +++--- cmd/frontend/internal/highlight/language.go | 10 ++++ internal/gosyntect/gosyntect.go | 57 ++++++++++++++++---- 8 files changed, 126 insertions(+), 42 deletions(-) diff --git a/client/web/src/lsif/html.ts b/client/web/src/lsif/html.ts index ee1340e1ceb7..20abbb76f8e8 100644 --- a/client/web/src/lsif/html.ts +++ b/client/web/src/lsif/html.ts @@ -70,6 +70,10 @@ function highlightSlice(html: HtmlBuilder, kind: SyntaxKind | undefined, slice: // Currently assumes that no ranges overlap in the occurrences. export function render(lsif_json: string, content: string): string { + if (!lsif_json.trim()) { + return '' + } + const occurrences = (JSON.parse(lsif_json) as JsonDocument).occurrences?.map(occ => new Occurrence(occ)) if (!occurrences) { return '' diff --git a/client/web/src/repo/blob/BlobPage.tsx b/client/web/src/repo/blob/BlobPage.tsx index 7aa6119fa6db..7d69918f0267 100644 --- a/client/web/src/repo/blob/BlobPage.tsx +++ b/client/web/src/repo/blob/BlobPage.tsx @@ -13,7 +13,7 @@ import { ErrorLike, isErrorLike, asError } from '@sourcegraph/common' import { SearchContextProps } from '@sourcegraph/search' import { StreamingSearchResultsListProps } from '@sourcegraph/search-ui' import { ExtensionsControllerProps } from '@sourcegraph/shared/src/extensions/controller' -import { Scalars } from '@sourcegraph/shared/src/graphql-operations' +import { HighlightResponseFormat, Scalars } from '@sourcegraph/shared/src/graphql-operations' import { PlatformContextProps } from '@sourcegraph/shared/src/platform/context' import { SettingsCascadeProps } from '@sourcegraph/shared/src/settings/settings' import { TelemetryProps } from '@sourcegraph/shared/src/telemetry/telemetryService' @@ -158,7 +158,7 @@ export const BlobPage: React.FunctionComponent> = return of(undefined) } - return fetchBlob({ repoName, commitID, filePath, formatOnly: true }).pipe( + return fetchBlob({ repoName, commitID, filePath, format: HighlightResponseFormat.HTML_PLAINTEXT }).pipe( map(blob => { if (blob === null) { return blob @@ -166,7 +166,7 @@ export const BlobPage: React.FunctionComponent> = const blobInfo: BlobPageInfo = { content: blob.content, - html: blob.highlight.html, + html: blob.highlight.html ?? '', repoName, revision, commitID, @@ -202,6 +202,9 @@ export const BlobPage: React.FunctionComponent> = commitID, filePath, disableTimeout, + format: enableCodeMirror + ? HighlightResponseFormat.JSON_SCIP + : HighlightResponseFormat.HTML_HIGHLIGHT, }) ), map(blob => { @@ -219,8 +222,8 @@ export const BlobPage: React.FunctionComponent> = const blobInfo: BlobPageInfo = { content: blob.content, - html: blob.highlight.html, - lsif: blob.highlight.lsif, + html: blob.highlight.html ?? '', + lsif: blob.highlight.lsif ?? '', repoName, revision, commitID, diff --git a/client/web/src/repo/blob/backend.ts b/client/web/src/repo/blob/backend.ts index 1cddd71154c7..023709feb1c3 100644 --- a/client/web/src/repo/blob/backend.ts +++ b/client/web/src/repo/blob/backend.ts @@ -6,18 +6,17 @@ import { dataOrThrowErrors, gql } from '@sourcegraph/http-client' import { ParsedRepoURI, makeRepoURI } from '@sourcegraph/shared/src/util/url' import { requestGraphQL } from '../../backend/graphql' -import { BlobFileFields, BlobResult, BlobVariables } from '../../graphql-operations' +import { BlobFileFields, BlobResult, BlobVariables, HighlightResponseFormat } from '../../graphql-operations' -function fetchBlobCacheKey(parsed: ParsedRepoURI & { disableTimeout?: boolean; formatOnly?: boolean }): string { - return `${makeRepoURI(parsed)}?disableTimeout=${parsed.disableTimeout}&formatOnly=${parsed.formatOnly}` +function fetchBlobCacheKey(parsed: ParsedRepoURI & { disableTimeout?: boolean; format?: string }): string { + return `${makeRepoURI(parsed)}?disableTimeout=${parsed.disableTimeout}&=${parsed.format}` } - interface FetchBlobArguments { repoName: string commitID: string filePath: string disableTimeout?: boolean - formatOnly?: boolean + format?: HighlightResponseFormat } export const fetchBlob = memoizeObservable( @@ -26,16 +25,24 @@ export const fetchBlob = memoizeObservable( commitID, filePath, disableTimeout = false, - formatOnly = false, - }: FetchBlobArguments): Observable => - requestGraphQL( + format = HighlightResponseFormat.HTML_HIGHLIGHT, + }: FetchBlobArguments): Observable => { + // We only want to include HTML data if explicitly requested. We always + // include LSIF because this is used for languages that are configured + // to be processed with tree sitter (and is used when explicitly + // requested via JSON_SCIP). + const html = + format === HighlightResponseFormat.HTML_PLAINTEXT || format === HighlightResponseFormat.HTML_HIGHLIGHT + + return requestGraphQL( gql` query Blob( $repoName: String! $commitID: String! $filePath: String! $disableTimeout: Boolean! - $formatOnly: Boolean! + $format: HighlightResponseFormat! + $html: Boolean! ) { repository(name: $repoName) { commit(rev: $commitID) { @@ -49,14 +56,14 @@ export const fetchBlob = memoizeObservable( fragment BlobFileFields on File2 { content richHTML - highlight(disableTimeout: $disableTimeout, formatOnly: $formatOnly) { + highlight(disableTimeout: $disableTimeout, format: $format) { aborted - html + html @include(if: $html) lsif } } `, - { repoName, commitID, filePath, disableTimeout, formatOnly } + { repoName, commitID, filePath, disableTimeout, format, html } ).pipe( map(dataOrThrowErrors), map(data => { @@ -65,6 +72,7 @@ export const fetchBlob = memoizeObservable( } return data.repository.commit.file }) - ), + ) + }, fetchBlobCacheKey ) diff --git a/cmd/frontend/graphqlbackend/highlight.go b/cmd/frontend/graphqlbackend/highlight.go index 9de1cef9b8fb..03acb9b5c46a 100644 --- a/cmd/frontend/graphqlbackend/highlight.go +++ b/cmd/frontend/graphqlbackend/highlight.go @@ -7,6 +7,7 @@ import ( "github.com/gogo/protobuf/jsonpb" "github.com/sourcegraph/sourcegraph/cmd/frontend/internal/highlight" + "github.com/sourcegraph/sourcegraph/internal/gosyntect" "github.com/sourcegraph/sourcegraph/internal/search/result" ) @@ -35,7 +36,7 @@ type HighlightArgs struct { DisableTimeout bool IsLightTheme *bool HighlightLongLines bool - FormatOnly bool + Format string } type highlightedFileResolver struct { @@ -92,7 +93,7 @@ func highlightContent(ctx context.Context, args *HighlightArgs, content, path st HighlightLongLines: args.HighlightLongLines, SimulateTimeout: simulateTimeout, Metadata: metadata, - FormatOnly: args.FormatOnly, + Format: gosyntect.GetResponseFormat(args.Format), }) result.aborted = aborted diff --git a/cmd/frontend/graphqlbackend/schema.graphql b/cmd/frontend/graphqlbackend/schema.graphql index 3dc1bfa24394..e84ef43afe61 100755 --- a/cmd/frontend/graphqlbackend/schema.graphql +++ b/cmd/frontend/graphqlbackend/schema.graphql @@ -4326,6 +4326,24 @@ type GitTree implements TreeEntry { ): Boolean! } +""" +The format and highlighting to use when requesting highlighting information for a file. +""" +enum HighlightResponseFormat { + """ + HTML formatted file content without syntax highlighting. + """ + HTML_PLAINTEXT + """ + HTML formatted file content with syntax highlighting. + """ + HTML_HIGHLIGHT + """ + SCIP highlighting information as JSON. + """ + JSON_SCIP +} + """ A file. In a future version of Sourcegraph, a repository's files may be distinct from a repository's blobs @@ -4393,9 +4411,9 @@ interface File2 { """ highlightLongLines: Boolean = false """ - Return un-highlighted blob contents that are only formatted as a table for use in our blob views. + Specifies which format/highlighting technique to use. """ - formatOnly: Boolean = false + format: HighlightResponseFormat = HTML_HIGHLIGHT ): HighlightedFile! } @@ -4460,9 +4478,9 @@ type VirtualFile implements File2 { """ highlightLongLines: Boolean = false """ - Return un-highlighted blob contents that are only formatted as a table for use in our blob views. + Specifies which format/highlighting technique to use. """ - formatOnly: Boolean = false + format: HighlightResponseFormat = HTML_HIGHLIGHT ): HighlightedFile! } @@ -4565,9 +4583,9 @@ type GitBlob implements TreeEntry & File2 { """ highlightLongLines: Boolean = false """ - Return un-highlighted blob contents that are only formatted as a table for use in our blob views. + Specifies which format/highlighting technique to use. """ - formatOnly: Boolean = false + format: HighlightResponseFormat = HTML_HIGHLIGHT ): HighlightedFile! """ Submodule metadata if this tree points to a submodule diff --git a/cmd/frontend/internal/highlight/highlight.go b/cmd/frontend/internal/highlight/highlight.go index fe93026194b2..ad7e073b6b21 100644 --- a/cmd/frontend/internal/highlight/highlight.go +++ b/cmd/frontend/internal/highlight/highlight.go @@ -106,10 +106,8 @@ type Params struct { // Metadata provides optional metadata about the code we're highlighting. Metadata Metadata - // FormatOnly, if true, will skip highlighting and only return the code - // in a formatted table view. This is useful if we want to display code - // as quickly as possible, without waiting for highlighting. - FormatOnly bool + // Format defines the response format of the syntax highlighting request. + Format gosyntect.HighlightResponseType } // Metadata contains metadata about a request to highlight code. It is used to @@ -405,7 +403,7 @@ func Code(ctx context.Context, p Params) (response *HighlightedCode, aborted boo return plainResponse, true, nil } - if p.FormatOnly { + if p.Format == gosyntect.FormatHTMLPlaintext { return unhighlightedCode(err, code) } @@ -431,6 +429,7 @@ func Code(ctx context.Context, p Params) (response *HighlightedCode, aborted boo Tracer: ot.GetTracer(ctx), LineLengthLimit: maxLineLength, CSS: true, + Engine: getEngineParameter(filetypeQuery.Engine), } // Set the Filetype part of the command if: @@ -443,7 +442,7 @@ func Code(ctx context.Context, p Params) (response *HighlightedCode, aborted boo query.Filetype = filetypeQuery.Language } - resp, err := client.Highlight(ctx, query, filetypeQuery.Engine == EngineTreeSitter) + resp, err := client.Highlight(ctx, query, p.Format) if ctx.Err() == context.DeadlineExceeded { log15.Warn( @@ -487,7 +486,9 @@ func Code(ctx context.Context, p Params) (response *HighlightedCode, aborted boo return unhighlightedCode(err, code) } - if filetypeQuery.Engine == EngineTreeSitter { + // We need to return SCIP data if explicitly requested or if the selected + // engine is tree sitter. + if p.Format == gosyntect.FormatJSONSCIP || filetypeQuery.Engine == EngineTreeSitter { document := new(scip.Document) data, err := base64.StdEncoding.DecodeString(resp.Data) diff --git a/cmd/frontend/internal/highlight/language.go b/cmd/frontend/internal/highlight/language.go index 5fd616f44414..0869cba106a1 100644 --- a/cmd/frontend/internal/highlight/language.go +++ b/cmd/frontend/internal/highlight/language.go @@ -10,6 +10,7 @@ import ( "github.com/sourcegraph/sourcegraph/internal/conf" "github.com/sourcegraph/sourcegraph/internal/conf/conftypes" + "github.com/sourcegraph/sourcegraph/internal/gosyntect" ) type EngineType int @@ -20,6 +21,15 @@ const ( EngineSyntect ) +// Converts an engine type to the corresponding parameter value for the syntax +// highlighting request. Defaults to "syntec". +func getEngineParameter(engine EngineType) string { + if engine == EngineTreeSitter { + return gosyntect.SyntaxEngineTreesitter + } + return gosyntect.SyntaxEngineSyntect +} + type SyntaxEngineQuery struct { Engine EngineType Language string diff --git a/internal/gosyntect/gosyntect.go b/internal/gosyntect/gosyntect.go index 9977966012cf..c65fdbc0cb99 100644 --- a/internal/gosyntect/gosyntect.go +++ b/internal/gosyntect/gosyntect.go @@ -14,6 +14,32 @@ import ( "github.com/sourcegraph/sourcegraph/lib/errors" ) +const ( + SyntaxEngineSyntect = "syntect" + SyntaxEngineTreesitter = "tree-sitter" +) + +type HighlightResponseType string + +// The different response formats supported by the syntax highlighter. +const ( + FormatHTMLPlaintext HighlightResponseType = "HTML_PLAINTEXT" + FormatHTMLHighlight HighlightResponseType = "HTML_HIGHLIGHT" + FormatJSONSCIP HighlightResponseType = "JSON_SCIP" +) + +// Returns corresponding format type for the request format. Defaults to +// FormatHTMLHighlight +func GetResponseFormat(format string) HighlightResponseType { + if format == string(FormatHTMLPlaintext) { + return FormatHTMLPlaintext + } + if format == string(FormatJSONSCIP) { + return FormatJSONSCIP + } + return FormatHTMLHighlight +} + // Query represents a code highlighting query to the syntect_server. type Query struct { // Filepath is the file path of the code. It can be the full file path, or @@ -57,6 +83,9 @@ type Query struct { // Tracer, if not nil, will be used to record opentracing spans associated with the query. Tracer opentracing.Tracer + + // Which highlighting engine to use + Engine string `json:"engine"` } // Response represents a response to a code highlighting query. @@ -64,9 +93,6 @@ type Response struct { // Data is the actual highlighted HTML version of Query.Code. Data string - // LSIF is the base64 encoded byte array of an LSIF Typed payload containing highlighting data. - LSIF string - // Plaintext indicates whether or not a syntax could not be found for the // file and instead it was rendered as plain text. Plaintext bool @@ -94,7 +120,9 @@ var ( type response struct { // Successful response fields. - Data string `json:"data"` + Data string `json:"data"` + // Used by the /scip endpoint + Scip string `json:"scip"` Plaintext bool `json:"plaintext"` // Error response fields. @@ -141,11 +169,11 @@ func (c *Client) IsTreesitterSupported(filetype string) bool { // automatically do this via the query or something else. It feels a bit goofy // to be a separate param. But I need to clean up these other deprecated // options later, so it's OK for the first iteration. -func (c *Client) Highlight(ctx context.Context, q *Query, useTreeSitter bool) (*Response, error) { +func (c *Client) Highlight(ctx context.Context, q *Query, format HighlightResponseType) (*Response, error) { // Normalize filetype q.Filetype = normalizeFiletype(q.Filetype) - if useTreeSitter && !c.IsTreesitterSupported(q.Filetype) { + if q.Engine == SyntaxEngineTreesitter && !c.IsTreesitterSupported(q.Filetype) { return nil, errors.New("Not a valid treesitter filetype") } @@ -156,7 +184,11 @@ func (c *Client) Highlight(ctx context.Context, q *Query, useTreeSitter bool) (* } var url string - if useTreeSitter { + if format == FormatJSONSCIP { + url = "/scip" + } else if q.Engine == SyntaxEngineTreesitter { + // "Legacy SCIP mode" for the HTML blob view and languages configured to + // be processed with tree sitter. url = "/lsif" } else { url = "/" @@ -221,10 +253,17 @@ func (c *Client) Highlight(ctx context.Context, q *Query, useTreeSitter bool) (* } return nil, errors.Wrap(err, c.syntectServer) } - return &Response{ + var response = &Response{ Data: r.Data, Plaintext: r.Plaintext, - }, nil + } + + // If SCIP is set, prefer it over HTML + if r.Scip != "" { + response.Data = r.Scip + } + + return response, nil } func (c *Client) url(path string) string { From ebc7f3d36db99fd91c0d0d78f2007b6ea5e9f210 Mon Sep 17 00:00:00 2001 From: Felix Kling Date: Tue, 9 Aug 2022 21:54:10 +0200 Subject: [PATCH 2/3] Fix TS --- client/web/src/repo/tree/HomeTab.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/web/src/repo/tree/HomeTab.tsx b/client/web/src/repo/tree/HomeTab.tsx index c3c1053ceb26..70dd75188874 100644 --- a/client/web/src/repo/tree/HomeTab.tsx +++ b/client/web/src/repo/tree/HomeTab.tsx @@ -106,7 +106,7 @@ export const HomeTab: React.FunctionComponent> = const blobInfo: BlobInfo & { richHTML: string; aborted: boolean } = { content: blob.content, - html: blob.highlight.html, + html: blob.highlight.html ?? '', repoName: repo.name, revision, commitID, From 11e55b294e862a1142116da3e20f1e6a760492de Mon Sep 17 00:00:00 2001 From: Felix Kling Date: Tue, 9 Aug 2022 21:56:35 +0200 Subject: [PATCH 3/3] Ideomatic Go --- internal/gosyntect/gosyntect.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/gosyntect/gosyntect.go b/internal/gosyntect/gosyntect.go index c65fdbc0cb99..780ceb32b81b 100644 --- a/internal/gosyntect/gosyntect.go +++ b/internal/gosyntect/gosyntect.go @@ -253,7 +253,7 @@ func (c *Client) Highlight(ctx context.Context, q *Query, format HighlightRespon } return nil, errors.Wrap(err, c.syntectServer) } - var response = &Response{ + response := &Response{ Data: r.Data, Plaintext: r.Plaintext, }