Skip to content

Commit

Permalink
Add Markdown conversion of Discussions to Google Action (#1034)
Browse files Browse the repository at this point in the history
  • Loading branch information
isTravis authored Mar 6, 2025
1 parent 33a0f7a commit 33e510b
Show file tree
Hide file tree
Showing 2 changed files with 330 additions and 3 deletions.
318 changes: 318 additions & 0 deletions core/actions/googleDriveImport/discussionSchema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
import type { DOMOutputSpec, Mark, Node, NodeSpec } from "prosemirror-model";

import { Schema } from "prosemirror-model";

export const baseNodes: { [key: string]: NodeSpec } = {
doc: {
content: "block+",
attrs: {
meta: { default: {} },
},
},
paragraph: {
selectable: false,
// reactive: true,
content: "inline*",
group: "block",
attrs: {
id: { default: null },
class: { default: null },
textAlign: { default: null },
rtl: { default: null },
},
parseDOM: [
{
tag: "p",
getAttrs: (node) => {
return {
id: (node as Element).getAttribute("id"),
class: (node as Element).getAttribute("class"),
textAlign: (node as Element).getAttribute("data-text-align"),
rtl: (node as Element).getAttribute("data-rtl"),
};
},
},
],
toDOM: (node) => {
const isEmpty = !node.content || (Array.isArray(node.content) && !node.content.length);
const children = isEmpty ? ["br"] : 0;
return [
"p",
{
class: node.attrs.class,
...(node.attrs.id && { id: node.attrs.id }),
...(node.attrs.textAlign && { "data-text-align": node.attrs.textAlign }),
...(node.attrs.rtl && { "data-rtl": node.attrs.rtl.toString() }),
},
children,
] as DOMOutputSpec;
},
},
blockquote: {
content: "block+",
group: "block",
attrs: {
id: { default: null },
},
selectable: false,
parseDOM: [
{
tag: "blockquote",
getAttrs: (node) => {
return {
id: (node as Element).getAttribute("id"),
};
},
},
],
toDOM: (node) => {
return [
"blockquote",
{ ...(node.attrs.id && { id: node.attrs.id }) },
0,
] as DOMOutputSpec;
},
},
horizontal_rule: {
group: "block",
parseDOM: [{ tag: "hr" }],
selectable: true,
toDOM: () => {
return ["div", ["hr"]] as DOMOutputSpec;
},
},
heading: {
attrs: {
level: { default: 1 },
fixedId: { default: "" },
id: { default: "" },
textAlign: { default: null },
rtl: { default: null },
},
content: "inline*",
group: "block",
defining: true,
selectable: false,
parseDOM: [1, 2, 3, 4, 5, 6].map((level) => {
return {
tag: `h${level}`,
getAttrs: (node) => {
return {
id: (node as Element).getAttribute("id"),
textAlign: (node as Element).getAttribute("data-text-align"),
rtl: (node as Element).getAttribute("data-rtl"),
level,
};
},
};
}),
toDOM: (node) => {
return [
`h${node.attrs.level}`,
{
id: node.attrs.fixedId || node.attrs.id,
...(node.attrs.textAlign && { "data-text-align": node.attrs.textAlign }),
...(node.attrs.rtl && { "data-rtl": node.attrs.rtl.toString() }),
},
0,
] as DOMOutputSpec;
},
},
ordered_list: {
content: "list_item+",
group: "block",
attrs: {
id: { default: null },
order: { default: 1 },
rtl: { default: null },
},
selectable: false,
parseDOM: [
{
tag: "ol",
getAttrs: (node) => {
return {
id: (node as Element).getAttribute("id"),
order: (node as Element).hasAttribute("start")
? +(node as Element).getAttribute("start")!
: 1,
rtl: (node as Element).getAttribute("data-rtl"),
};
},
},
],
toDOM: (node) => {
return [
"ol",
{
...(node.attrs.id && { id: node.attrs.id }),
...(node.attrs.textAlign && { "data-text-align": node.attrs.textAlign }),
...(node.attrs.rtl && { "data-rtl": node.attrs.rtl.toString() }),
start: node.attrs.order === 1 ? null : node.attrs.order,
},
0,
] as DOMOutputSpec;
},
},
bullet_list: {
content: "list_item+",
group: "block",
attrs: {
id: { default: null },
rtl: { default: null },
},
selectable: false,
parseDOM: [
{
tag: "ul",
getAttrs: (node) => {
return {
id: (node as Element).getAttribute("id"),
rtl: (node as Element).getAttribute("data-rtl"),
};
},
},
],
toDOM: (node) => {
return [
"ul",
{
...(node.attrs.id && { id: node.attrs.id }),
...(node.attrs.textAlign && { "data-text-align": node.attrs.textAlign }),
...(node.attrs.rtl && { "data-rtl": node.attrs.rtl.toString() }),
},
0,
] as DOMOutputSpec;
},
},
list_item: {
content: "paragraph block*",
defining: true,
selectable: false,
parseDOM: [{ tag: "li" }],
toDOM: () => {
return ["li", 0] as DOMOutputSpec;
},
},
text: {
inline: true,
group: "inline",
toDOM: (node) => {
return node.text!;
},
},
hard_break: {
inline: true,
group: "inline",
selectable: false,
parseDOM: [{ tag: "br" }],
toDOM: () => {
return ["br"] as DOMOutputSpec;
},
},
};

export const baseMarks = {
em: {
parseDOM: [
{ tag: "i" },
{ tag: "em" },
{
style: "font-style",
getAttrs: (value: string) => value === "italic" && null,
},
],
toDOM: () => {
return ["em"] as DOMOutputSpec;
},
},

strong: {
parseDOM: [
{ tag: "strong" },
/*
This works around a Google Docs misbehavior where
pasted content will be inexplicably wrapped in `<b>`
tags with a font-weight normal.
*/
{
tag: "b",
getAttrs: (node: HTMLElement) => node.style.fontWeight !== "normal" && null,
},
{
style: "font-weight",
getAttrs: (value: string) => /^(bold(er)?|[5-9]\d{2,})$/.test(value) && null,
},
],
toDOM: () => {
return ["strong"] as DOMOutputSpec;
},
},
link: {
inclusive: false,
attrs: {
href: { default: "" },
title: { default: null },
target: { default: null },
pubEdgeId: { default: null },
},
parseDOM: [
{
tag: "a[href]",
getAttrs: (dom: HTMLElement) => {
if (dom.getAttribute("data-node-type") === "reference") {
return false;
}
return {
href: dom.getAttribute("href"),
title: dom.getAttribute("title"),
target: dom.getAttribute("target"),
pubEdgeId: dom.getAttribute("data-pub-edge-id"),
};
},
},
],
toDOM: (mark: Mark, inline: boolean) => {
/* Links seem to be recieving a target attr that is a dom element */
/* coming from the wrong source in some interfaces. This ensures */
/* only strings can be a target attr. */
let attrs = mark.attrs;
if (attrs.target && typeof attrs.target !== "string") {
attrs = { ...attrs, target: null };
}
const { pubEdgeId, ...restAttrs } = attrs;
return ["a", { "data-pub-edge-id": pubEdgeId, ...restAttrs }] as DOMOutputSpec;
},
},
sub: {
parseDOM: [{ tag: "sub" }],
toDOM: () => {
return ["sub"] as DOMOutputSpec;
},
},
sup: {
parseDOM: [{ tag: "sup" }],
toDOM: () => {
return ["sup"] as DOMOutputSpec;
},
},
strike: {
parseDOM: [{ tag: "s" }, { tag: "strike" }, { tag: "del" }],
toDOM: () => {
return ["s"] as DOMOutputSpec;
},
},
code: {
parseDOM: [{ tag: "code" }],
toDOM: () => {
return ["code"] as DOMOutputSpec;
},
},
};

const mySchema = new Schema({
nodes: baseNodes,
marks: baseMarks,
});

export default mySchema;
15 changes: 12 additions & 3 deletions core/actions/googleDriveImport/formatDriveData.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
// import { writeFile } from "fs/promises";
import type { Element, Root } from "hast";
import type { Root } from "hast";

import { defaultMarkdownSerializer } from "prosemirror-markdown";
import { Node } from "prosemirror-model";
import { rehype } from "rehype";
import rehypeFormat from "rehype-format";
import { visit } from "unist-util-visit";

import type { PubsId } from "db/public";
import { logger } from "logger";

import type { DriveData } from "./getGDriveFiles";
import { uploadFileToS3 } from "~/lib/server";
import schema from "./discussionSchema";
import {
appendFigureAttributes,
cleanUnusedSpans,
Expand Down Expand Up @@ -224,14 +226,21 @@ export const formatDriveData = async (
: comment.commenter && comment.commenter.orcid
? `https://orcid.org/${comment.commenter.orcid}`
: null;
const prosemirrorToMarkdown = (content: any): string => {
const doc = Node.fromJSON(schema, content);
return defaultMarkdownSerializer.serialize(doc);
};

const markdownContent = prosemirrorToMarkdown(comment.content);
const commentObject: any = {
id: comment.id,
values: {
[`${communitySlug}:anchor`]:
index === 0 && discussion.anchors.length
? JSON.stringify(discussion.anchors[0])
: undefined,
[`${communitySlug}:content`]: comment.text,
// [`${communitySlug}:content`]: comment.text,
[`${communitySlug}:content`]: markdownContent,
[`${communitySlug}:publication-date`]: comment.createdAt,
[`${communitySlug}:full-name`]: commentAuthorName,
[`${communitySlug}:orcid`]: commentAuthorORCID,
Expand Down

0 comments on commit 33e510b

Please sign in to comment.