diff --git a/.github/workflows/explore-triage-commenter.yml b/.github/workflows/explore-triage-commenter.yml new file mode 100644 index 000000000000..d1215fdc26c6 --- /dev/null +++ b/.github/workflows/explore-triage-commenter.yml @@ -0,0 +1,212 @@ +name: Explore PR Triage Commenter + +# Posts a sticky comment on PRs that touch topic or collection pages, +# surfacing the facts maintainers normally look up by hand: +# - topics: repo count for the topic +# - collections: per-item stars, last push, owner type, plus a flag if +# the PR author looks like one of the item owners (self-submission) +# +# Edit-in-place: subsequent runs (synchronize, reopen) update the same +# comment instead of posting a new one. Marker: + +on: + pull_request_target: + types: [opened, synchronize, reopened] + paths: + - 'topics/**' + - 'collections/**' + +concurrency: + group: explore-triage-commenter-${{ github.event.pull_request.number }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + triage: + runs-on: ubuntu-latest + steps: + - uses: actions/github-script@v9 + env: + MARKER: '' + with: + script: | + const marker = process.env.MARKER; + const pr = context.payload.pull_request; + const prNumber = pr.number; + const prAuthor = pr.user.login.toLowerCase(); + const headSha = pr.head.sha; + const baseOwner = context.repo.owner; + const baseRepo = context.repo.repo; + + // List files in the PR (paginated). + const files = await github.paginate(github.rest.pulls.listFiles, { + owner: baseOwner, + repo: baseRepo, + pull_number: prNumber, + per_page: 100, + }); + + // Detect topic and collection slugs touched. + // Skip removed files; only validate slug shape we'd ever expect on disk. + const SLUG = /^[a-z0-9](?:[a-z0-9-]{0,80}[a-z0-9])?$/i; + const topics = new Set(); + const collections = new Set(); + for (const f of files) { + if (f.status === 'removed') continue; + const m = f.filename.match(/^(topics|collections)\/([^\/]+)\//); + if (!m) continue; + const slug = m[2]; + if (!SLUG.test(slug)) continue; + if (m[1] === 'topics') topics.add(slug); + else collections.add(slug); + } + + if (topics.size === 0 && collections.size === 0) { + core.info('No topic or collection changes detected; nothing to do.'); + return; + } + + const sections = []; + + // ---- Topic section ---- + if (topics.size > 0) { + const lines = ['### Topics', '']; + for (const slug of topics) { + let count = null; + try { + const res = await github.rest.search.repos({ + q: `topic:${slug}`, + per_page: 1, + }); + count = res.data.total_count; + } catch (err) { + core.warning(`Search failed for topic '${slug}': ${err.message}`); + } + const url = `https://github.com/topics/${encodeURIComponent(slug)}`; + if (count == null) { + lines.push(`- **${slug}** — [topic page](${url}) _(repo count lookup failed)_`); + } else { + lines.push(`- **${slug}** — ${count.toLocaleString()} repositories — [topic page](${url})`); + } + } + sections.push(lines.join('\n')); + } + + // ---- Collection section ---- + if (collections.size > 0) { + for (const slug of collections) { + const lines = [`### Collection \`${slug}\``, '']; + + // Read collection's index.md at the PR head SHA. + // PR commits from forks are mirrored into the base repo's network, + // so we can fetch from the base repo with the head SHA — simpler + // and avoids any cross-repo token concerns. + let content; + try { + const res = await github.rest.repos.getContent({ + owner: baseOwner, + repo: baseRepo, + path: `collections/${slug}/index.md`, + ref: headSha, + }); + content = Buffer.from(res.data.content, 'base64').toString('utf8'); + } catch (err) { + lines.push(`_Could not read \`collections/${slug}/index.md\` at PR head (\`${err.status || 'error'}\`)._`); + sections.push(lines.join('\n')); + continue; + } + + const items = parseCollectionItems(content); + if (items.length === 0) { + lines.push('_No `items:` list found in frontmatter._'); + sections.push(lines.join('\n')); + continue; + } + + lines.push('| Item | Stars | Last push | Owner type | Notes |'); + lines.push('| --- | ---: | --- | --- | --- |'); + + for (const item of items) { + if (!/^[\w.-]+\/[\w.-]+$/.test(item)) { + const safeItem = item.replace(/`/g, "'").replace(/\\/g, '\\\\').replace(/\|/g, '\\|'); + lines.push(`| \`${safeItem}\` | – | – | – | invalid format |`); + continue; + } + const [owner, repo] = item.split('/'); + try { + const r = await github.rest.repos.get({ owner, repo }); + const stars = r.data.stargazers_count.toLocaleString(); + const pushed = r.data.pushed_at ? r.data.pushed_at.slice(0, 10) : '–'; + const ownerType = r.data.owner.type; + const notes = []; + if (owner.toLowerCase() === prAuthor) notes.push('⚠️ possible self-submission'); + if (r.data.archived) notes.push('archived'); + if (r.data.disabled) notes.push('disabled'); + lines.push(`| [\`${item}\`](https://github.com/${item}) | ${stars} | ${pushed} | ${ownerType} | ${notes.join(', ') || '–'} |`); + } catch (err) { + const note = err.status === 404 ? 'not found' : `error (${err.status || '?'})`; + lines.push(`| \`${item}\` | – | – | – | ${note} |`); + } + } + lines.push(''); + sections.push(lines.join('\n')); + } + } + + const body = [ + marker, + '', + '', + '## Maintainer triage', + '', + ...sections, + ].join('\n'); + + // Edit-in-place via marker. + const comments = await github.paginate(github.rest.issues.listComments, { + owner: baseOwner, + repo: baseRepo, + issue_number: prNumber, + per_page: 100, + }); + const existing = comments.find(c => c.body && c.body.startsWith(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: baseOwner, + repo: baseRepo, + comment_id: existing.id, + body, + }); + core.info(`Updated comment ${existing.id}`); + } else { + await github.rest.issues.createComment({ + owner: baseOwner, + repo: baseRepo, + issue_number: prNumber, + body, + }); + core.info('Created new comment'); + } + + function parseCollectionItems(text) { + // Frontmatter between leading --- lines. + const fmMatch = text.match(/^---\n([\s\S]*?)\n---/); + if (!fmMatch) return []; + const lines = fmMatch[1].split('\n'); + const items = []; + let inItems = false; + for (const line of lines) { + if (/^items:\s*$/.test(line)) { inItems = true; continue; } + // Next top-level key ends the items block. + if (inItems && /^[a-zA-Z_]\w*\s*:/.test(line)) break; + if (inItems) { + const m = line.match(/^\s*-\s*([^\s#]+)/); + if (m) items.push(m[1]); + } + } + return items; + } diff --git a/.github/workflows/topic-commenter.yml b/.github/workflows/topic-commenter.yml deleted file mode 100644 index f9e30dca9ab6..000000000000 --- a/.github/workflows/topic-commenter.yml +++ /dev/null @@ -1,78 +0,0 @@ -name: Topic PR Commenter - -# this workflow is failing due to permissions problems -# until we can fix it with a better bot, i'll preserve -# the code but make it so it never matches a real path -on: - pull_request: - paths: - - 'ENOSUCHPATH' - -permissions: - contents: read - pull-requests: write - -jobs: - comment: - runs-on: ubuntu-latest - - steps: - - name: Comment on PR with topic info - uses: actions/github-script@v9 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - script: | - // Get the PR number from the event payload - const prNumber = context.payload.pull_request.number; - - // List the files changed in the PR - const { data: files } = await github.rest.pulls.listFiles({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: prNumber, - }); - - // Extract topics from any file changed in the "topics/" folder. - // Assumes the file name (e.g. "python.md") indicates the topic "python" - const topics = []; - for (const file of files) { - if (file.filename.startsWith('topics/')) { - const parts = file.filename.split('/'); - const topicName = parts[parts.length - 2]; - topics.push(topicName); - } - } - - if (topics.length === 0) { - console.log('No topics found in changed files.'); - return; - } - - // Remove duplicate topic names (in case multiple files reference the same topic) - const uniqueTopics = [...new Set(topics)]; - - // Prepare the body of the comment - let commentBody = '## Topic Information\n\n'; - - for (const topic of uniqueTopics) { - // Query the GitHub Search API for repositories with the topic. - // Note: The Search API endpoint returns a JSON with a total_count field. - const searchResponse = await github.request('GET /search/repositories', { - q: `topic:${topic}` - }); - const repoCount = searchResponse.data.total_count; - - // Append topic details to the comment body - commentBody += `### ${topic}\n`; - commentBody += `- [Topic Page](https://github.com/topics/${topic})\n`; - commentBody += `- Repositories: ${repoCount}\n\n`; - } - - // Post the comment on the PR - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body: commentBody - }); diff --git a/topics/ai-for-science/index.md b/topics/ai-for-science/index.md new file mode 100644 index 000000000000..84bccaa59778 --- /dev/null +++ b/topics/ai-for-science/index.md @@ -0,0 +1,10 @@ +--- +aliases: ai4science, ml-for-science, artificial-intelligence-for-science +display_name: AI for science +related: machine-learning, deep-learning, simulation, bioinformatics, chemistry, physics, data-science, ai +short_description: AI for science applies machine learning and artificial intelligence to accelerate scientific discovery across disciplines. +topic: ai-for-science +--- +AI for science is the application of machine learning and artificial intelligence methods to accelerate research and discovery across scientific domains. It encompasses work in protein structure prediction, climate modeling, drug discovery, materials design, and particle physics, among others. + +Rather than replacing traditional scientific methods, AI for science augments them by learning patterns from experimental and simulation data to generate hypotheses, design experiments, and build fast surrogate models. Landmark examples include AlphaFold for protein structure prediction, GraphCast for weather forecasting, and FermiNet for quantum chemistry.