Skip to content

[node] support git sources #382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions node/flatpak_node_generator/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
'git': {},
'git+http': {'scheme': 'http'},
'git+https': {'scheme': 'https'},
'git+ssh': {'scheme': 'https'},
}


Expand All @@ -33,6 +34,14 @@ def parse_git_source(self, version: str, from_: Optional[str] = None) -> GitSour
if not new_url.netloc:
path = new_url.path.split('/')
new_url = new_url._replace(netloc=path[0], path='/'.join(path[1:]))
# Replace https://[email protected]:ianstormtaylor/to-camel-case.git
# with https://[email protected]/ianstormtaylor/to-camel-case.git
# for git+ssh URLs
if ':' in new_url.netloc:
netloc_split = new_url.netloc.split(':')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will break if there are extra colons (I mean, there really shouldn't be, but alas)

Suggested change
netloc_split = new_url.netloc.split(':')
netloc_split = new_url.netloc.split(':', 1)

(note that the branch immediately above also handles this, but...it does so kinda poorly, by comparison)

new_url = new_url._replace(
netloc=netloc_split[0], path=f'/{netloc_split[1]}{new_url.path}'
)

return GitSource(
original=original_url.geturl(),
Expand Down
185 changes: 112 additions & 73 deletions node/flatpak_node_generator/providers/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
NamedTuple,
Optional,
Set,
Tuple,
Type,
)

Expand Down Expand Up @@ -79,11 +80,17 @@ def _process_packages_v1(
elif version_url.scheme == 'file':
source = LocalSource(path=version_url.path)
else:
integrity = Integrity.parse(info['integrity'])
integrity = None
if 'integrity' in info:
integrity = Integrity.parse(info['integrity'])

if 'resolved' in info:
source = ResolvedSource(
resolved=info['resolved'], integrity=integrity
)
if info['resolved'].startswith('git+'):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

info['resolved'] should probably be in an intermediate variable so the same string constant isn't copy-pasted in 3 places. (this is also something the rest of the code admittedly does poorly.)

source = self.parse_git_source(info['resolved'])
else:
source = ResolvedSource(
resolved=info['resolved'], integrity=integrity
)
elif version_url.scheme in {'http', 'https'}:
source = PackageURLSource(resolved=version, integrity=integrity)
else:
Expand All @@ -104,20 +111,15 @@ def _process_packages_v2(
# NOTE We're not interested in symlinks, NPM will create them at install time
# but we still could collect package symlinks anyway just for completeness
continue
if install_path == '':
# The root project is typically listed with a key of ''
continue

name = info.get('name')

source: PackageSource
package_json_path = lockfile.parent / install_path / 'package.json'
if (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the reason behind this change? if "entry without a resolved is always a local path" is reliable, then yeah it would make removing this hack doable...but that would ideally be in a separate commit before this one, to be able to tell the logic apart more easily.

'node_modules' not in install_path.split('/')
and package_json_path.exists()
):
source = LocalSource(path=install_path)
if name is None:
with package_json_path.open('rb') as fp:
name = json.load(fp)['name']
elif 'resolved' in info:

if 'resolved' in info:
resolved_url = urllib.parse.urlparse(info['resolved'])
if resolved_url.scheme == 'file':
source = LocalSource(path=resolved_url.path)
Expand All @@ -130,14 +132,10 @@ def _process_packages_v2(
integrity=integrity, resolved=info['resolved']
)
elif resolved_url.scheme.startswith('git+'):
raise NotImplementedError(
'Git sources in lockfile v2 format are not supported yet'
f' (package {install_path} in {lockfile})'
)
source = self.parse_git_source(info['resolved'])
else:
raise NotImplementedError(
f"Don't know how to handle package {install_path} in {lockfile}"
)
source = LocalSource(path=install_path)
name = install_path

# NOTE We can't reliably determine the package name from the lockfile v2 syntax,
# but we need it for registry queries and special source processing;
Expand Down Expand Up @@ -208,7 +206,7 @@ def __init__(
self.all_lockfiles: Set[Path] = set()
# Mapping of lockfiles to a dict of the Git source target paths and GitSource objects.
self.git_sources: DefaultDict[
Path, Dict[Path, GitSource]
Path, Dict[Path, Tuple[str, GitSource]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should update the comment above to indicate what this is, since it's no longer just a GitSource. or maybe the value a dataclass like NamedGitSource or something instead, to make it clearer what the str is (and then you can just use attributes to get the innards, instead of having to do unpacking assignments)

] = collections.defaultdict(lambda: {})
# FIXME better pass the same provider object we created in main
self.rcfile_provider = NpmRCFileProvider()
Expand Down Expand Up @@ -369,9 +367,49 @@ async def generate_package(self, package: Package) -> None:
# Get a unique name to use for the Git repository folder.
name = f'{package.name}-{source.commit}'
path = self.gen.data_root / 'git-packages' / name
self.git_sources[package.lockfile][path] = source
self.git_sources[package.lockfile][path] = (package.name, source)
self.gen.add_git_source(source.url, source.commit, path)

url = urllib.parse.urlparse(source.url)
Copy link
Collaborator

@bbhtt bbhtt May 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the git source here always going to github.com and gitlab.com? because only those two seems to be handled.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I was working on this github and gitlab were the only two forges on which I could easily find packages to test, however npm seems to also support bitbucket and gists from their docs: https://docs.npmjs.com/cli/v11/commands/npm-install.

I agree the code should be fixed though as any other url will fail. Is it ok to throw an exception if it's not a url we can handle? My suspicion is that gitlab/github covers 80%+ of use cases. If anyone can provide a failing url it could be addressed in future as we need to determine where to get the tarball from and how npm wants it indexed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s it ok to throw an exception if it's not a url we can handle?

Ok let's do that for now.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the full list of stuff to handle would be in: https://github.com/npm/hosted-git-info

if url.netloc == '[email protected]' or url.netloc == 'github.com':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be url.hostname == 'github.com' (and maybe also check url.username in ('git', '') or similar? do you know if that's semantically important in any way? if not, then just checking the hostname is a bit cleaner.) ditto for the gitlab.com check below.

url = url._replace(
netloc='codeload.github.com', path=re.sub('.git$', '', url.path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the . in .git here and below be escaped? might be useful to extract this into a separate git_suffix = re.compile(r'\.git$') and then use that for the substitutions below.

)
tarball_url = url._replace(
path=re.sub('$', f'/tar.gz/{source.commit}', url.path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this just...replacing the empty string? couldn't it just be a string concat then? ditto for gitlab.com below.

)
index_url = tarball_url.geturl()
elif url.netloc == '[email protected]' or url.netloc == 'gitlab.com':
url = url._replace(
netloc='gitlab.com', path=re.sub('.git$', '', url.path)
)
tarball_url = url._replace(
path=re.sub(
'$',
f'/-/archive/{source.commit}/{package.name}-{source.commit}.tar.gz',
url.path,
)
)
index_url = url._replace(
path=re.sub(
'$', f'/repository/archive.tar.gz?ref={source.commit}', url.path
)
).geturl()
metadata = await RemoteUrlMetadata.get(
tarball_url.geturl(), cachable=True, integrity_algorithm='sha512'
)

self.gen.add_url_source(
url=tarball_url.geturl(),
integrity=metadata.integrity,
destination=self.get_cacache_content_path(metadata.integrity),
)

self.add_index_entry(
url=index_url,
metadata=metadata,
)

elif isinstance(source, LocalSource):
pass

Expand Down Expand Up @@ -432,8 +470,8 @@ def _finalize(self) -> None:
if type == "object"
then
to_entries | map(
if (.value | type == "string") and $data[.value]
then .value = "git+file:\($buildroot)/\($data[.value])"
if (.key | type == "string") and $data[.key]
then .value = "git+file://\($buildroot)/\($data[.key])"
else .
end
) | from_entries
Expand All @@ -445,7 +483,7 @@ def _finalize(self) -> None:
walk(
if type == "object" and (.version | type == "string") and $data[.version]
then
.version = "git+file:\($buildroot)/\($data[.version])"
.resolved = "git+file:\($buildroot)/\($data[.version])"
else .
end
)
Expand All @@ -459,56 +497,57 @@ def _finalize(self) -> None:
'package-lock.json': {},
}

for path, source in sources.items():
GIT_URL_PREFIX = 'git+'

new_version = f'{path}#{source.commit}'
assert source.from_ is not None
data['package.json'][source.from_] = new_version
data['package-lock.json'][source.original] = new_version

if source.from_.startswith(GIT_URL_PREFIX):
data['package.json'][
source.from_[len(GIT_URL_PREFIX) :]
] = new_version

if source.original.startswith(GIT_URL_PREFIX):
data['package-lock.json'][
source.original[len(GIT_URL_PREFIX) :]
] = new_version

for filename, script in scripts.items():
target = Path('$FLATPAK_BUILDER_BUILDDIR') / prefix / filename
script = (
textwrap.dedent(script.lstrip('\n')).strip().replace('\n', '')
)
json_data = json.dumps(data[filename])
patch_commands[lockfile].append(
'jq'
' --arg buildroot "$FLATPAK_BUILDER_BUILDDIR"'
f' --argjson data {shlex.quote(json_data)}'
f' {shlex.quote(script)} {target}'
f' > {target}.new'
)
patch_commands[lockfile].append(f'mv {target}{{.new,}}')

patch_all_commands: List[str] = []
for lockfile in self.all_lockfiles:
patch_dest = (
self.gen.data_root / 'patch' / self.relative_lockfile_dir(lockfile)
)
# Don't use with_extension to avoid problems if the package has a . in its name.
patch_dest = patch_dest.with_name(patch_dest.name + '.sh')
with open(lockfile) as fp:
lockfile_v1 = json.load(fp)['lockfileVersion'] == 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could probably be saved inside self.git_sources' values, right? and then no need to re-open the lockfile here.


if lockfile_v1:
for path, name_source in sources.items():
GIT_URL_PREFIX = 'git+'
name, source = name_source
new_version = f'{path}#{source.commit}'
data['package.json'][name] = new_version
data['package-lock.json'][source.original] = new_version

if source.original.startswith(GIT_URL_PREFIX):
data['package-lock.json'][
source.original[len(GIT_URL_PREFIX) :]
] = new_version

for filename, script in scripts.items():
target = Path('$FLATPAK_BUILDER_BUILDDIR') / prefix / filename
script = (
textwrap.dedent(script.lstrip('\n'))
.strip()
.replace('\n', '')
)
json_data = json.dumps(data[filename])
patch_commands[lockfile].append(
'jq'
' --arg buildroot "$FLATPAK_BUILDER_BUILDDIR"'
f' --argjson data {shlex.quote(json_data)}'
f' {shlex.quote(script)} {target}'
f' > {target}.new'
)
patch_commands[lockfile].append(f'mv {target}{{.new,}}')

if len(patch_commands) > 0:
patch_all_commands: List[str] = []
for lockfile in self.all_lockfiles:
patch_dest = (
self.gen.data_root / 'patch' / self.relative_lockfile_dir(lockfile)
)
# Don't use with_extension to avoid problems if the package has a . in its name.
patch_dest = patch_dest.with_name(patch_dest.name + '.sh')

self.gen.add_script_source(patch_commands[lockfile], patch_dest)
patch_all_commands.append(f'"$FLATPAK_BUILDER_BUILDDIR"/{patch_dest}')
self.gen.add_script_source(patch_commands[lockfile], patch_dest)
patch_all_commands.append(f'"$FLATPAK_BUILDER_BUILDDIR"/{patch_dest}')

patch_all_dest = self.gen.data_root / 'patch-all.sh'
self.gen.add_script_source(patch_all_commands, patch_all_dest)
patch_all_dest = self.gen.data_root / 'patch-all.sh'
self.gen.add_script_source(patch_all_commands, patch_all_dest)

if not self.no_autopatch:
# FLATPAK_BUILDER_BUILDDIR isn't defined yet for script sources.
self.gen.add_command(f'FLATPAK_BUILDER_BUILDDIR="$PWD" {patch_all_dest}')
if not self.no_autopatch:
# FLATPAK_BUILDER_BUILDDIR isn't defined yet for script sources.
self.gen.add_command(f'FLATPAK_BUILDER_BUILDDIR="$PWD" {patch_all_dest}')

if self.index_entries:
for path, entry in self.index_entries.items():
Expand Down
94 changes: 94 additions & 0 deletions node/tests/data/packages/git/package-lock.v1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very nice to see the tests for this!

"name": "@flatpak-node-generator-tests/git",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"@flatpak-node-generator-tests/subdir": {
"version": "file:subdir",
"requires": {
"arr-diff": "github:jonschlinkert/arr-diff#f39e5f10f55fd0cc38fa686c6ebc11747815850c",
"array.chunk": "git+https://github.com/zhiyelee/array.chunk.git",
"decamelize": "git+https://github.com/sindresorhus/decamelize.git#da8552d335f9067354ca9b93378482eb9f1fb821",
"filled-array": "github:sindresorhus/filled-array#v2.1.0",
"in-array": "git+ssh://[email protected]/jonschlinkert/in-array.git",
"sorted-object": "github:domenic/sorted-object",
"text-encoding-shim": "gitlab:t-affeldt/text-encoding-shim",
"unordered-array-remove": "git+ssh://[email protected]/mafintosh/unordered-array-remove.git#9ecfd7d63da6e5499b3a530574aca2d8c826af93"
}
},
"arr-diff": {
"version": "github:jonschlinkert/arr-diff#f39e5f10f55fd0cc38fa686c6ebc11747815850c",
"from": "github:jonschlinkert/arr-diff#f39e5f10f55fd0cc38fa686c6ebc11747815850c"
},
"array-range": {
"version": "github:mattdesl/array-range#d8b6fbdfc29caf846be02229325dfb4967f1dcd6",
"from": "github:mattdesl/array-range#master"
},
"array.chunk": {
"version": "git+https://github.com/zhiyelee/array.chunk.git#1ba8011a64448210e334a17642e395690a7164c0",
"from": "git+https://github.com/zhiyelee/array.chunk.git"
},
"decamelize": {
"version": "git+https://github.com/sindresorhus/decamelize.git#da8552d335f9067354ca9b93378482eb9f1fb821",
"from": "git+https://github.com/sindresorhus/decamelize.git#da8552d335f9067354ca9b93378482eb9f1fb821"
},
"filled-array": {
"version": "github:sindresorhus/filled-array#3529bc985247d0f84db4080fecd8276643838d0c",
"from": "github:sindresorhus/filled-array#v2.1.0"
},
"in-array": {
"version": "git+ssh://[email protected]/jonschlinkert/in-array.git#47a5e55362098646b56a3ec6775bd5198df1c7ed",
"from": "git+ssh://[email protected]/jonschlinkert/in-array.git"
},
"is-empty-object": {
"version": "github:gummesson/is-empty-object#7b50c8eb4e14135631f7c94e01c0c8a36e5d75f8",
"from": "github:gummesson/is-empty-object#7b50c8eb4e14135631f7c94e01c0c8a36e5d75f8"
},
"is-number": {
"version": "github:jonschlinkert/is-number#98e8ff1da1a89f93d1397a24d7413ed15421c139",
"from": "github:jonschlinkert/is-number"
},
"person-lib": {
"version": "gitlab:volodymyrkr/person-lib#752fd1828b1eb3a9635bf725ae5e1704a375e524",
"from": "gitlab:volodymyrkr/person-lib"
},
"sorted-object": {
"version": "github:domenic/sorted-object#87105deb13d4f4151b2abd1a78d27a5216e3e79d",
"from": "github:domenic/sorted-object"
},
"text-encoding-shim": {
"version": "gitlab:t-affeldt/text-encoding-shim#33b05934b4e4e6c65fc06260eaa3b2aa2909e7b5",
"from": "gitlab:t-affeldt/text-encoding-shim"
},
"to-camel-case": {
"version": "git+ssh://[email protected]/ianstormtaylor/to-camel-case.git#00a20429b600ddb6e4f8ff5b17c52914f40fe67d",
"from": "git+ssh://[email protected]/ianstormtaylor/to-camel-case.git",
"requires": {
"to-space-case": "^1.0.0"
}
},
"to-capital-case": {
"version": "git+https://[email protected]/ianstormtaylor/to-capital-case.git#b82f61e00e099b01514e25177bb2d56d0f64b157",
"from": "git+https://[email protected]/ianstormtaylor/to-capital-case.git",
"requires": {
"to-space-case": "^1.0.0"
}
},
"to-no-case": {
"version": "1.0.0",
"resolved": "git+https://[email protected]/ianstormtaylor/to-no-case.git#9078578dcf394e63f34fd7c6666772192e537b90"
},
"to-space-case": {
"version": "1.0.0",
"resolved": "git+ssh://[email protected]/ianstormtaylor/to-space-case.git#aa68213d1211745ce7c6c725ba072e6b13bef640",
"requires": {
"to-no-case": "^1.0.0"
}
},
"unordered-array-remove": {
"version": "git+ssh://[email protected]/mafintosh/unordered-array-remove.git#9ecfd7d63da6e5499b3a530574aca2d8c826af93",
"from": "git+ssh://[email protected]/mafintosh/unordered-array-remove.git#9ecfd7d63da6e5499b3a530574aca2d8c826af93"
}
}
}
Loading