diff --git a/src/analyze-action.ts b/src/analyze-action.ts index 112e069a8..ff678b964 100644 --- a/src/analyze-action.ts +++ b/src/analyze-action.ts @@ -268,7 +268,7 @@ async function run() { pull_request && (await setupDiffInformedQueryRun( pull_request.base.ref as string, - pull_request.head.ref as string, + pull_request.head.label as string, codeql, logger, features, diff --git a/src/analyze.ts b/src/analyze.ts index 7e82df4a5..2c3b714dc 100644 --- a/src/analyze.ts +++ b/src/analyze.ts @@ -7,6 +7,7 @@ import del from "del"; import * as yaml from "js-yaml"; import * as actionsUtil from "./actions-util"; +import { getApiClient } from "./api-client"; import { setupCppAutobuild } from "./autobuild"; import { CODEQL_VERSION_ANALYSIS_SUMMARY_V2, @@ -17,7 +18,6 @@ import * as configUtils from "./config-utils"; import { addDiagnostic, makeDiagnostic } from "./diagnostics"; import { EnvVar } from "./environment"; import { FeatureEnablement, Feature } from "./feature-flags"; -import * as gitUtils from "./git-utils"; import { isScannedLanguage, Language } from "./languages"; import { Logger, withGroupAsync } from "./logging"; import { DatabaseCreationTimings, EventReport } from "./status-report"; @@ -240,7 +240,8 @@ async function finalizeDatabaseCreation( * Set up the diff-informed analysis feature. * * @param baseRef The base branch name, used for calculating the diff range. - * @param headRef The head branch name, used for calculating the diff range. + * @param headLabel The label that uniquely identifies the head branch across + * repositories, used for calculating the diff range. * @param codeql * @param logger * @param features @@ -249,7 +250,7 @@ async function finalizeDatabaseCreation( */ export async function setupDiffInformedQueryRun( baseRef: string, - headRef: string, + headLabel: string, codeql: CodeQL, logger: Logger, features: FeatureEnablement, @@ -262,7 +263,7 @@ export async function setupDiffInformedQueryRun( async () => { const diffRanges = await getPullRequestEditedDiffRanges( baseRef, - headRef, + headLabel, logger, ); return writeDiffRangeDataExtensionPack(logger, diffRanges); @@ -280,7 +281,8 @@ interface DiffThunkRange { * Return the file line ranges that were added or modified in the pull request. * * @param baseRef The base branch name, used for calculating the diff range. - * @param headRef The head branch name, used for calculating the diff range. + * @param headLabel The label that uniquely identifies the head branch across + * repositories, used for calculating the diff range. * @param logger * @returns An array of tuples, where each tuple contains the absolute path of a * file, the start line and the end line (both 1-based and inclusive) of an @@ -289,107 +291,61 @@ interface DiffThunkRange { */ async function getPullRequestEditedDiffRanges( baseRef: string, - headRef: string, + headLabel: string, logger: Logger, ): Promise { - const checkoutPath = actionsUtil.getOptionalInput("checkout_path"); - if (checkoutPath === undefined) { - return undefined; - } - - // To compute the merge bases between the base branch and the PR topic branch, - // we need to fetch the commit graph from the branch heads to those merge - // babes. The following 6-step procedure does so while limiting the amount of - // history fetched. - - // Step 1: Deepen from the PR merge commit to the base branch head and the PR - // topic branch head, so that the PR merge commit is no longer considered a - // grafted commit. - await gitUtils.deepenGitHistory(); - // Step 2: Fetch the base branch shallow history. This step ensures that the - // base branch name is present in the local repository. Normally the base - // branch name would be added by Step 4. However, if the base branch head is - // an ancestor of the PR topic branch head, Step 4 would fail without doing - // anything, so we need to fetch the base branch explicitly. - await gitUtils.gitFetch(baseRef, ["--depth=1"]); - // Step 3: Fetch the PR topic branch history, stopping when we reach commits - // that are reachable from the base branch head. - await gitUtils.gitFetch(headRef, [`--shallow-exclude=${baseRef}`]); - // Step 4: Fetch the base branch history, stopping when we reach commits that - // are reachable from the PR topic branch head. - await gitUtils.gitFetch(baseRef, [`--shallow-exclude=${headRef}`]); - // Step 5: Repack the history to remove the shallow grafts that were added by - // the previous fetches. This step works around a bug that causes subsequent - // deepening fetches to fail with "fatal: error in object: unshallow ". - // See https://stackoverflow.com/q/63878612 - await gitUtils.gitRepack(["-d"]); - // Step 6: Deepen the history so that we have the merge bases between the base - // branch and the PR topic branch. - await gitUtils.deepenGitHistory(); - - // To compute the exact same diff as GitHub would compute for the PR, we need - // to use the same merge base as GitHub. That is easy to do if there is only - // one merge base, which is by far the most common case. If there are multiple - // merge bases, we stop without producing a diff range. - const mergeBases = await gitUtils.getAllGitMergeBases([baseRef, headRef]); - logger.info(`Merge bases: ${mergeBases.join(", ")}`); - if (mergeBases.length !== 1) { - logger.info( - "Cannot compute diff range because baseRef and headRef " + - `have ${mergeBases.length} merge bases (instead of exactly 1).`, - ); - return undefined; - } - - const diffHunkHeaders = await gitUtils.getGitDiffHunkHeaders( - mergeBases[0], - headRef, - ); - if (diffHunkHeaders === undefined) { - return undefined; - } + await getFileDiffsWithBasehead(baseRef, headLabel, logger); + return undefined; +} - const results = new Array(); - - let changedFile = ""; - for (const line of diffHunkHeaders) { - if (line.startsWith("+++ ")) { - const filePath = gitUtils.decodeGitFilePath(line.substring(4)); - if (filePath.startsWith("b/")) { - // The file was edited: track all hunks in the file - changedFile = filePath.substring(2); - } else if (filePath === "/dev/null") { - // The file was deleted: skip all hunks in the file - changedFile = ""; - } else { - logger.warning(`Failed to parse diff hunk header line: ${line}`); - return undefined; - } - continue; - } - if (line.startsWith("@@ ")) { - if (changedFile === "") continue; +/** + * This interface is an abbreviated version of the file diff object returned by + * the GitHub API. + */ +interface FileDiff { + filename: string; + changes: number; + // A patch may be absent if the file is binary, if the file diff is too large, + // or if the file is unchanged. + patch?: string | undefined; +} - const match = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@/); - if (match === null) { - logger.warning(`Failed to parse diff hunk header line: ${line}`); - return undefined; - } - const startLine = parseInt(match[1], 10); - const numLines = parseInt(match[2], 10); - if (numLines === 0) { - // The hunk was a deletion: skip it - continue; - } - const endLine = startLine + (numLines || 1) - 1; - results.push({ - path: path.join(checkoutPath, changedFile), - startLine, - endLine, - }); +async function getFileDiffsWithBasehead( + baseRef: string, + headLabel: string, + logger: Logger, +): Promise { + const ownerRepo = util.getRequiredEnvParam("GITHUB_REPOSITORY").split("/"); + const owner = ownerRepo[0]; + const repo = ownerRepo[1]; + const basehead = `${baseRef}...${headLabel}`; + try { + const response = await getApiClient().rest.repos.compareCommitsWithBasehead( + { + owner, + repo, + basehead, + per_page: 1, + }, + ); + logger.debug( + `Response from compareCommitsWithBasehead(${basehead}):` + + `\n${JSON.stringify(response, null, 2)}`, + ); + return response.data.files; + } catch (error: any) { + if (error.status) { + logger.warning(`Error retrieving diff ${basehead}: ${error.message}`); + logger.debug( + `Error running compareCommitsWithBasehead(${basehead}):` + + `\nRequest: ${JSON.stringify(error.request, null, 2)}` + + `\nError Response: ${JSON.stringify(error.response, null, 2)}`, + ); + return undefined; + } else { + throw error; } } - return results; } /**