Skip to content

Commit 745306d

Browse files
authored
ci(jscpd): check changed lines only, to avoid false positives #5950
## Problem The `jscpd` check checks changed *files*. So duplicate code may be reported on lines outside of the actual changes (in the same file), which may be considered false positives. ## Solution - Only report "clones" (duplicate code) on actually changed lines. - Refactor the action to simplify the bash scripting. - Add tests for `filterDuplicates.ts`.
1 parent f98308c commit 745306d

File tree

3 files changed

+467
-38
lines changed

3 files changed

+467
-38
lines changed

.github/workflows/filterDuplicates.js

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
/**
2+
* Filters the report produced by jscpd to only include clones that involve changes from the given git diff.
3+
* If the filtered report is non-empty, i.e. there exists a clone in the changes,
4+
* the program exits with an error and logs the filtered report to console.
5+
*
6+
* Usage:
7+
* node filterDuplicates.js run [path_to_git_diff] [path_to_jscpd_report]
8+
*
9+
* Tests:
10+
* node filterDuplicates.js test
11+
*/
12+
13+
const fs = require('fs/promises')
14+
const path = require('path')
15+
16+
function parseDiffFilePath(filePathLine) {
17+
return filePathLine.split(' ')[2].split('/').slice(1).join('/')
18+
}
19+
20+
function parseDiffRange(rangeLine) {
21+
const [_fromRange, toRange] = rangeLine.split(' ').slice(1, 3)
22+
const [startLine, numLines] = toRange.slice(1).split(',').map(Number)
23+
const range = [startLine, startLine + numLines]
24+
return range
25+
}
26+
27+
async function parseDiff(diffPath) {
28+
const diff = await fs.readFile(diffPath, 'utf8')
29+
const lines = diff.split('\n')
30+
let currentFile = null
31+
let currentFileChanges = []
32+
const fileChanges = new Map()
33+
34+
for (const line of lines) {
35+
if (line.startsWith('diff')) {
36+
if (currentFile) {
37+
fileChanges.set(currentFile, currentFileChanges)
38+
}
39+
currentFile = parseDiffFilePath(line)
40+
currentFileChanges = []
41+
}
42+
if (line.startsWith('@@')) {
43+
currentFileChanges.push(parseDiffRange(line))
44+
}
45+
}
46+
47+
fileChanges.set(currentFile, currentFileChanges)
48+
49+
return fileChanges
50+
}
51+
52+
function doesOverlap(range1, range2) {
53+
const [start1, end1] = range1
54+
const [start2, end2] = range2
55+
return (
56+
(start1 >= start2 && start1 <= end2) || (end1 >= start2 && end1 <= end2) || (start2 >= start1 && end2 <= end1)
57+
)
58+
}
59+
60+
function isCloneInChanges(changes, cloneInstance) {
61+
const fileName = cloneInstance.name
62+
const cloneStart = cloneInstance.start
63+
const cloneEnd = cloneInstance.end
64+
const lineChangeRanges = changes.get(fileName)
65+
66+
if (!lineChangeRanges) {
67+
return false
68+
}
69+
70+
return lineChangeRanges.some((range) => doesOverlap([cloneStart, cloneEnd], range))
71+
}
72+
73+
function isInChanges(changes, dupe) {
74+
return isCloneInChanges(changes, dupe.firstFile) || isCloneInChanges(changes, dupe.secondFile)
75+
}
76+
77+
function filterDuplicates(report, changes) {
78+
duplicates = []
79+
for (const dupe of report.duplicates) {
80+
if (isInChanges(changes, dupe)) {
81+
duplicates.push(dupe)
82+
}
83+
}
84+
return duplicates
85+
}
86+
87+
async function run() {
88+
const rawDiffPath = process.argv[3]
89+
const jscpdReportPath = process.argv[4]
90+
const changes = await parseDiff(rawDiffPath)
91+
const jscpdReport = JSON.parse(await fs.readFile(jscpdReportPath, 'utf8'))
92+
const filteredDuplicates = filterDuplicates(jscpdReport, changes)
93+
94+
console.log('%s files changes', changes.size)
95+
console.log('%s duplicates found', filteredDuplicates.length)
96+
if (filteredDuplicates.length > 0) {
97+
console.log(filteredDuplicates)
98+
process.exit(1)
99+
}
100+
}
101+
102+
/**
103+
* Mini-test Suite
104+
*/
105+
console.log(__dirname)
106+
const testDiffFile = path.resolve(__dirname, 'test/test_diff.txt')
107+
let testCounter = 0
108+
function assertEqual(actual, expected) {
109+
if (actual !== expected) {
110+
throw new Error(`Expected ${expected} but got ${actual}`)
111+
}
112+
testCounter += 1
113+
}
114+
115+
async function test() {
116+
test_parseDiffFilePath()
117+
test_parseDiffRange()
118+
test_doesOverlap()
119+
await test_parseDiff()
120+
await test_isCloneInChanges()
121+
await test_isInChanges()
122+
await test_filterDuplicates()
123+
console.log('All tests passed (%s)', testCounter)
124+
}
125+
126+
function test_parseDiffFilePath() {
127+
assertEqual(
128+
parseDiffFilePath(
129+
'diff --git a/.github/workflows/copyPasteDetection.yml b/.github/workflows/copyPasteDetection.yml'
130+
),
131+
'.github/workflows/copyPasteDetection.yml'
132+
)
133+
assertEqual(
134+
parseDiffFilePath('diff --git a/.github/workflows/filterDuplicates.js b/.github/workflows/filterDuplicates.js'),
135+
'.github/workflows/filterDuplicates.js'
136+
)
137+
}
138+
139+
function test_parseDiffRange() {
140+
assertEqual(parseDiffRange('@@ -1,4 +1,4 @@').join(','), '1,5')
141+
assertEqual(parseDiffRange('@@ -10,4 +10,4 @@').join(','), '10,14')
142+
assertEqual(parseDiffRange('@@ -10,4 +10,5 @@').join(','), '10,15')
143+
}
144+
145+
function test_doesOverlap() {
146+
assertEqual(doesOverlap([1, 5], [2, 4]), true)
147+
assertEqual(doesOverlap([2, 3], [2, 4]), true)
148+
assertEqual(doesOverlap([2, 3], [1, 4]), true)
149+
assertEqual(doesOverlap([1, 5], [5, 6]), true)
150+
assertEqual(doesOverlap([1, 5], [6, 7]), false)
151+
assertEqual(doesOverlap([6, 7], [1, 5]), false)
152+
assertEqual(doesOverlap([2, 5], [4, 5]), true)
153+
}
154+
155+
async function test_parseDiff() {
156+
const changes = await parseDiff(testDiffFile)
157+
assertEqual(changes.size, 2)
158+
assertEqual(changes.get('.github/workflows/copyPasteDetection.yml').length, 1)
159+
assertEqual(changes.get('.github/workflows/filterDuplicates.js').length, 1)
160+
assertEqual(changes.get('.github/workflows/filterDuplicates.js')[0].join(','), '1,86')
161+
assertEqual(changes.get('.github/workflows/copyPasteDetection.yml')[0].join(','), '26,73')
162+
}
163+
164+
async function test_isCloneInChanges() {
165+
const changes = await parseDiff(testDiffFile)
166+
assertEqual(
167+
isCloneInChanges(changes, {
168+
name: '.github/workflows/filterDuplicates.js',
169+
start: 1,
170+
end: 86,
171+
}),
172+
true
173+
)
174+
assertEqual(
175+
isCloneInChanges(changes, {
176+
name: '.github/workflows/filterDuplicates.js',
177+
start: 80,
178+
end: 95,
179+
}),
180+
true
181+
)
182+
assertEqual(
183+
isCloneInChanges(changes, {
184+
name: '.github/workflows/filterDuplicates.js',
185+
start: 87,
186+
end: 95,
187+
}),
188+
false
189+
)
190+
assertEqual(
191+
isCloneInChanges(changes, {
192+
name: 'some-fake-file',
193+
start: 1,
194+
end: 100,
195+
}),
196+
false
197+
)
198+
}
199+
200+
async function test_isInChanges() {
201+
const changes = await parseDiff(testDiffFile)
202+
const dupe = {
203+
firstFile: {
204+
name: '.github/workflows/filterDuplicates.js',
205+
start: 1,
206+
end: 86,
207+
},
208+
secondFile: {
209+
name: '.github/workflows/filterDuplicates.js',
210+
start: 80,
211+
end: 95,
212+
},
213+
}
214+
assertEqual(isInChanges(changes, dupe), true)
215+
dupe.secondFile.start = 87
216+
assertEqual(isInChanges(changes, dupe), true)
217+
dupe.firstFile.name = 'some-fake-file'
218+
assertEqual(isInChanges(changes, dupe), false)
219+
}
220+
221+
async function test_filterDuplicates() {
222+
assertEqual(
223+
filterDuplicates(
224+
{
225+
duplicates: [
226+
{
227+
firstFile: {
228+
name: '.github/workflows/filterDuplicates.js',
229+
start: 1,
230+
end: 86,
231+
},
232+
secondFile: {
233+
name: '.github/workflows/filterDuplicates.js',
234+
start: 80,
235+
end: 95,
236+
},
237+
},
238+
],
239+
},
240+
await parseDiff(testDiffFile)
241+
).length,
242+
1
243+
)
244+
assertEqual(
245+
filterDuplicates(
246+
{
247+
duplicates: [
248+
{
249+
firstFile: {
250+
name: 'some-other-file',
251+
start: 1,
252+
end: 86,
253+
},
254+
secondFile: {
255+
name: '.github/workflows/filterDuplicates.js',
256+
start: 90,
257+
end: 95,
258+
},
259+
},
260+
],
261+
},
262+
await parseDiff(testDiffFile)
263+
).length,
264+
0
265+
)
266+
}
267+
268+
async function main() {
269+
const mode = process.argv[2]
270+
if (mode === 'run') {
271+
await run()
272+
} else if (mode === 'test') {
273+
await test()
274+
} else {
275+
throw new Error('Invalid mode')
276+
}
277+
}
278+
279+
void main()

.github/workflows/node.js.yml

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ jobs:
8888
env:
8989
CURRENT_BRANCH: ${{ github.head_ref }}
9090
TARGET_BRANCH: ${{ github.event.pull_request.base.ref }}
91-
run: git diff --name-only origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt
91+
run: git diff origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt
9292

9393
- run: npm install -g jscpd
9494

@@ -100,43 +100,8 @@ jobs:
100100
name: unfiltered-jscpd-report
101101
path: ./jscpd-report.json
102102

103-
- name: Filter jscpd report for changed files
104-
run: |
105-
if [ ! -f ./jscpd-report.json ]; then
106-
echo "jscpd-report.json not found"
107-
exit 1
108-
fi
109-
echo "Filtering jscpd report for changed files..."
110-
CHANGED_FILES=$(jq -R -s -c 'split("\n")[:-1]' diff_output.txt)
111-
echo "Changed files: $CHANGED_FILES"
112-
jq --argjson changed_files "$CHANGED_FILES" '
113-
.duplicates | map(select(
114-
(.firstFile?.name as $fname | $changed_files | any(. == $fname)) or
115-
(.secondFile?.name as $sname | $changed_files | any(. == $sname))
116-
))
117-
' ./jscpd-report.json > filtered-jscpd-report.json
118-
cat filtered-jscpd-report.json
119-
120-
- name: Check for duplicates
121-
run: |
122-
if [ $(wc -l < ./filtered-jscpd-report.json) -gt 1 ]; then
123-
echo "filtered_report_exists=true" >> $GITHUB_ENV
124-
else
125-
echo "filtered_report_exists=false" >> $GITHUB_ENV
126-
fi
127-
- name: upload filtered report (if applicable)
128-
if: env.filtered_report_exists == 'true'
129-
uses: actions/upload-artifact@v4
130-
with:
131-
name: filtered-jscpd-report
132-
path: ./filtered-jscpd-report.json
133-
134-
- name: Fail and log found duplicates.
135-
if: env.filtered_report_exists == 'true'
136-
run: |
137-
cat ./filtered-jscpd-report.json
138-
echo "Duplications found, failing the check."
139-
exit 1
103+
- name: Check for Duplicates
104+
run: node "$GITHUB_WORKSPACE/.github/workflows/filterDuplicates.js" run diff_output.txt jscpd-report.json
140105

141106
macos:
142107
needs: lint-commits

0 commit comments

Comments
 (0)