From 285b9b0468760bb513ef469d688415e8eb70ab21 Mon Sep 17 00:00:00 2001 From: "Thomas F. K. Jorna" Date: Wed, 13 May 2026 12:07:07 +0200 Subject: [PATCH] fix: normalize orcid use everywhere --- .../ContributorsList/Contributor.tsx | 9 ++- client/containers/User/UserHeader.tsx | 7 ++- deposit/transform/collection.ts | 3 +- deposit/transform/pub.ts | 3 +- server/user/queries.ts | 14 +++-- .../migrations/2026_05_13_normalizeOrcids.js | 23 ++++++++ utils/api/schemas/user.ts | 9 ++- utils/crossref/schema/contributors.js | 13 +++- utils/orcid.ts | 12 ++++ workers/tasks/communityExport.tsx | 59 +++++++++++-------- workers/tasks/export/pandoc.ts | 5 +- 11 files changed, 117 insertions(+), 40 deletions(-) create mode 100644 tools/migrations/2026_05_13_normalizeOrcids.js diff --git a/client/components/ContributorsList/Contributor.tsx b/client/components/ContributorsList/Contributor.tsx index 1ab6357e2..e6313797a 100644 --- a/client/components/ContributorsList/Contributor.tsx +++ b/client/components/ContributorsList/Contributor.tsx @@ -3,6 +3,7 @@ import React from 'react'; import PropTypes from 'prop-types'; import { Avatar, Icon } from 'components'; +import { normalizeOrcid } from 'utils/orcid'; import './contributor.scss'; @@ -38,20 +39,22 @@ const Contributor = function (props) { return curr; }, ''); + const orcid = normalizeOrcid(user.orcid); + return (
{avatarElement}
{nameElement}
- {user.orcid && ( + {orcid && (
- {user.orcid} + {orcid}
)} diff --git a/client/containers/User/UserHeader.tsx b/client/containers/User/UserHeader.tsx index eaa93e808..8150545ce 100644 --- a/client/containers/User/UserHeader.tsx +++ b/client/containers/User/UserHeader.tsx @@ -10,6 +10,7 @@ import Avatar from 'components/Avatar/Avatar'; import Icon from 'components/Icon/Icon'; import SpamStatusMenu from 'components/SpamStatusMenu'; import { usePageContext } from 'utils/hooks'; +import { normalizeOrcid } from 'utils/orcid'; import './userHeader.scss'; @@ -43,6 +44,8 @@ const UserHeader = function (props) { setSpamStatus(status); }, []); + const orcid = normalizeOrcid(props.userData.orcid); + const links = [ { value: props.userData.location, icon: 'map-marker' as const, url: '' }, { @@ -51,9 +54,9 @@ const UserHeader = function (props) { url: props.userData.website, }, { - value: props.userData.orcid, + value: orcid as string, icon: 'orcid' as const, - url: `https://www.orcid.org/${props.userData.orcid}`, + url: orcid ? `https://orcid.org/${orcid}` : '', }, { value: props.userData.github, diff --git a/deposit/transform/collection.ts b/deposit/transform/collection.ts index 209a9faf7..221d704b9 100644 --- a/deposit/transform/collection.ts +++ b/deposit/transform/collection.ts @@ -12,6 +12,7 @@ import { fetchFacetsForScope } from 'server/facets'; import { expect } from 'utils/assert'; import { collectionUrl } from 'utils/canonicalUrls'; import { licenseDetailsByKind } from 'utils/licenses'; +import { normalizeOrcid } from 'utils/orcid'; const attributionRoleToResourceContributorRole: Record = { 'Writing – Review & Editing': 'Editor', @@ -46,7 +47,7 @@ function transformCollectionAttributionToResourceContribution( return { contributor: { name: attribution.user?.fullName ?? expect(attribution.name), - orcid: attribution.orcid, + orcid: normalizeOrcid(attribution.orcid), }, contributorAffiliation: attribution.affiliation, contributorRole: transformAttributionRoleToResourceContributorRole(role), diff --git a/deposit/transform/pub.ts b/deposit/transform/pub.ts index daa216ae8..fb3a8c0d5 100644 --- a/deposit/transform/pub.ts +++ b/deposit/transform/pub.ts @@ -21,6 +21,7 @@ import { exists, expect } from 'utils/assert'; import { pubUrl } from 'utils/canonicalUrls'; import { getPrimaryCollection } from 'utils/collections/primary'; import { licenseDetailsByKind } from 'utils/licenses'; +import { normalizeOrcid } from 'utils/orcid'; import { getWordAndCharacterCountsFromDoc } from 'utils/pub/metadata'; import { RelationType, type relationTypeDefinitions } from 'utils/pubEdge'; import { sortByRank } from 'utils/rank'; @@ -63,7 +64,7 @@ function transformPubAttributionToResourceContribution( return { contributor: { name: attribution.user?.fullName ?? expect(attribution.name), - orcid: attribution.user?.orcid ?? attribution.orcid, + orcid: normalizeOrcid(attribution.user?.orcid ?? attribution.orcid), }, contributorAffiliation: attribution.affiliation, contributorRole: transformAttributionRoleToResourceContributorRole(role), diff --git a/server/user/queries.ts b/server/user/queries.ts index 1cbdaf60c..2be5adfce 100644 --- a/server/user/queries.ts +++ b/server/user/queries.ts @@ -4,7 +4,7 @@ import { promisify } from 'util'; import { Signup, User } from 'server/models'; import { subscribeUser } from 'server/utils/mailchimp'; import { expect } from 'utils/assert'; -import { ORCID_PATTERN } from 'utils/orcid'; +import { normalizeOrcid } from 'utils/orcid'; import { slugifyString } from 'utils/strings'; type InputValues = CreationAttributes & { @@ -35,7 +35,7 @@ export const createUser = async (inputValues: InputValues) => { bio: inputValues.bio, location: inputValues.location, website: inputValues.website, - orcid: inputValues.orcid, + orcid: normalizeOrcid(inputValues.orcid), github: inputValues.github, twitter: inputValues.twitter, facebook: inputValues.facebook, @@ -93,8 +93,14 @@ export const updateUser = ( filteredValues.initials = `${filteredValues.firstName[0]}${filteredValues.lastName[0]}`; } - if (filteredValues.orcid && (filteredValues.orcid as string).match(ORCID_PATTERN) === null) { - throw new Error('Invalid ORCID'); + if (filteredValues.orcid) { + const normalized = normalizeOrcid(filteredValues.orcid as string); + + if (!normalized) { + throw new Error('Invalid ORCID'); + } + + filteredValues.orcid = normalized; } // A bit of extra paranoia diff --git a/tools/migrations/2026_05_13_normalizeOrcids.js b/tools/migrations/2026_05_13_normalizeOrcids.js new file mode 100644 index 000000000..55dec791f --- /dev/null +++ b/tools/migrations/2026_05_13_normalizeOrcids.js @@ -0,0 +1,23 @@ +export const up = async ({ sequelize }) => { + // strip orcid.org URL prefixes from all orcid columns, leaving just the bare identifier. + // handles http/https and optional www prefix. + const tables = ['Users', 'PubAttributions', 'CollectionAttributions']; + + for (const table of tables) { + const [, meta] = await sequelize.query( + `UPDATE "${table}" + SET orcid = regexp_replace(orcid, '^https?://(?:www\\.)?orcid\\.org/', '') + WHERE orcid LIKE '%orcid.org/%'`, + ); + + const count = meta?.rowCount ?? meta; + if (count > 0) { + console.info(`${table}: normalized ${count} orcid(s)`); + } + } +}; + +export const down = async () => { + // not reversible -- the bare identifiers are strictly more correct than the URLs + throw new Error('Irreversible migration: orcid normalization cannot be undone'); +}; diff --git a/utils/api/schemas/user.ts b/utils/api/schemas/user.ts index 31e275314..8fd6cbf78 100644 --- a/utils/api/schemas/user.ts +++ b/utils/api/schemas/user.ts @@ -2,6 +2,8 @@ import type { MinimalUser, User, UserWithPrivateFields } from 'types'; import { z } from 'zod'; +import { ORCID_ID_OR_URL_PATTERN, ORCID_PATTERN } from 'utils/orcid'; + export const privateUserSchema = z.object({ id: z.string().uuid(), slug: z.string(), @@ -20,7 +22,12 @@ export const privateUserSchema = z.object({ facebook: z.string().nullable(), twitter: z.string().nullable(), github: z.string().nullable(), - orcid: z.string().nullable(), + orcid: z + .string() + .regex(ORCID_ID_OR_URL_PATTERN) + .transform((orcid) => orcid.match(ORCID_PATTERN)?.[0]!) + .nullable() + .or(z.literal('')), googleScholar: z.string().nullable(), resetHashExpiration: z.coerce .date() diff --git a/utils/crossref/schema/contributors.js b/utils/crossref/schema/contributors.js index 626bcbf26..b998f75bb 100644 --- a/utils/crossref/schema/contributors.js +++ b/utils/crossref/schema/contributors.js @@ -1,5 +1,7 @@ /** Renders a list of contributors */ +import { normalizeOrcid } from 'utils/orcid'; + const roleList = { 'Writing – Review & Editing': 'editor', Editor: 'editor', @@ -16,9 +18,12 @@ export default (attributions) => { if (attributions.length === 0) { return {}; } + return { contributors: { person_name: attributions.map((attribution, attributionIndex) => { + const orcid = normalizeOrcid(attribution.user.orcid); + const personNameOutput = { '@contributor_role': attribution.isAuthor ? checkRole(attribution) : 'reader', '@sequence': attributionIndex === 0 ? 'first' : 'additional', @@ -27,17 +32,21 @@ export default (attributions) => { ? attribution.user.lastName : attribution.user.firstName, affiliation: attribution.affiliation, - ORCID: `https://orcid.org/${attribution.user.orcid}`, + ORCID: orcid ? `https://orcid.org/${orcid}` : undefined, }; + if (!personNameOutput.affiliation) { delete personNameOutput.affiliation; } + if (!personNameOutput.given_name) { delete personNameOutput.given_name; } - if (!attribution.user.orcid) { + + if (!personNameOutput.ORCID) { delete personNameOutput.ORCID; } + return personNameOutput; }), }, diff --git a/utils/orcid.ts b/utils/orcid.ts index 35c4e2e88..e10245e1d 100644 --- a/utils/orcid.ts +++ b/utils/orcid.ts @@ -2,3 +2,15 @@ export const ORCID_PATTERN = /(\d{4}-){3}\d{3}(\d|X)/g; export const ORCID_ID_OR_URL_PATTERN = /^(?:(?:https?:\/\/)?(?:www\.)?orcid\.org\/)?(\d{4}-){3}\d{3}(\d|X)$/g; + +/** + * extracts the bare ORCID identifier (e.g. 0000-0001-2345-6789) from a string + * that may be a full URL, a bare ID, or anything in between. returns null if no + * valid ORCID can be found. + */ +export const normalizeOrcid = (value: string | null | undefined): string | null => { + if (!value) return null; + + const match = value.match(/(\d{4}-){3}\d{3}(\d|X)/); + return match?.[0] ?? null; +}; diff --git a/workers/tasks/communityExport.tsx b/workers/tasks/communityExport.tsx index ae1c3059e..9222efb77 100644 --- a/workers/tasks/communityExport.tsx +++ b/workers/tasks/communityExport.tsx @@ -44,6 +44,7 @@ import ensureUserForAttribution from 'utils/ensureUserForAttribution'; import { isProd } from 'utils/environment'; import { getAssetUrlFromResizedUrl } from 'utils/images'; import { licenseDetailsByKind } from 'utils/licenses'; +import { normalizeOrcid } from 'utils/orcid'; import { getTextAbstract } from 'utils/pub/metadata'; // for some reason when imported from utils/notes, it tries to import the client/utils/notes.ts file instead @@ -96,18 +97,22 @@ const renderPubFooter = (metadata: PubMetadata) => {

Authors

    - {authors.map((a: any) => ( -
  • - {a.user?.fullName || a.name} - {a.affiliation && ({a.affiliation})} - {a.orcid && ( - - {' — '} - ORCID - - )} -
  • - ))} + {authors.map((a: any) => { + const orcid = normalizeOrcid(a.orcid); + + return ( +
  • + {a.user?.fullName || a.name} + {a.affiliation && ({a.affiliation})} + {orcid && ( + + {' — '} + ORCID + + )} +
  • + ); + })}
)} @@ -115,19 +120,23 @@ const renderPubFooter = (metadata: PubMetadata) => {

Contributors

    - {contributors.map((a: any) => ( -
  • - {a.user?.fullName || a.name} - {a.roles?.length > 0 && — {a.roles.join(', ')}} - {a.affiliation && ({a.affiliation})} - {a.orcid && ( - - {' — '} - ORCID - - )} -
  • - ))} + {contributors.map((a: any) => { + const orcid = normalizeOrcid(a.orcid); + + return ( +
  • + {a.user?.fullName || a.name} + {a.roles?.length > 0 && — {a.roles.join(', ')}} + {a.affiliation && ({a.affiliation})} + {orcid && ( + + {' — '} + ORCID + + )} +
  • + ); + })}
)} diff --git a/workers/tasks/export/pandoc.ts b/workers/tasks/export/pandoc.ts index 9d530e121..7db436fc6 100644 --- a/workers/tasks/export/pandoc.ts +++ b/workers/tasks/export/pandoc.ts @@ -14,6 +14,7 @@ import YAML from 'yaml'; import { editorSchema, getReactedDocFromJson } from 'components/Editor'; import { getPathToCslFileForCitationStyleKind } from 'server/utils/citations'; +import { normalizeOrcid } from 'utils/orcid'; import { rules } from '../import/rules'; import { @@ -94,11 +95,13 @@ const createYamlMetadataFile = async (pubMetadata: PubMetadata, pandocTarget: Pa const affiliationIds = getAffiliations(attr).map((aff) => { return dedupedAffiliations.indexOf(aff); }); + const orcid = normalizeOrcid(attr.user.orcid); + return { ...(attr.user.lastName && { surname: attr.user.lastName }), ...(attr.user.firstName && { 'given-names': attr.user.firstName }), ...(publicEmail && { email: publicEmail }), - ...(attr.user.orcid && { orcid: attr.user.orcid }), + ...(orcid && { orcid }), ...(attr.affiliation && { affiliation: affiliationIds }), }; }