From 90b3b66031836b1b24a72faa1e27517ddc1f133c Mon Sep 17 00:00:00 2001 From: alec_dev Date: Tue, 30 Sep 2025 12:33:26 -0500 Subject: [PATCH 01/38] Refactor picklist handling to improve null handling and type coercion when building a case statement for the QB. --- specifyweb/backend/stored_queries/format.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index 1bd017c7c49..c70607bd537 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -8,7 +8,7 @@ from xml.sax.saxutils import quoteattr from specifyweb.specify.api.utils import get_picklists -from sqlalchemy import Table as SQLTable, inspect, case +from sqlalchemy import Table as SQLTable, inspect, case, type_coerce from sqlalchemy.orm import aliased, Query from sqlalchemy.sql.expression import func, cast, literal, Label from sqlalchemy.sql.functions import concat @@ -408,9 +408,20 @@ def _fieldformat(self, table: Table, specify_field: Field, if self.format_picklist: picklists, _ = get_picklists(self.collection, table.table, specify_field.name) if picklists: - cases = [(field == item.value, item.title) for item in picklists[0].picklistitems.all()] - _case = case(cases, else_=field) - + # cases = [(field == item.value, item.title) for item in picklists[0].picklistitems.all()] + # _case = case(cases, else_=field) + items = list(picklists[0].picklistitems.all()) + if not items: + expr = cast(field, types.String()) + return blank_nulls(expr) if self.replace_nulls else expr + + cases = [ + (field == item.value, literal(item.title or "", type_=types.String())) + for item in items + ] + _case = case(cases, else_=cast(field, types.String())) + _case = type_coerce(_case, types.String()) + return blank_nulls(_case) if self.replace_nulls else _case if self.format_types and specify_field.type == "java.lang.Boolean": From ee9d106629cc990070f444ffe8abda3d761894f9 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 15 Oct 2025 16:03:30 -0500 Subject: [PATCH 02/38] Apply format.py changes from issue-7456 --- specifyweb/backend/stored_queries/format.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index c70607bd537..9a06926678c 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -321,13 +321,7 @@ def aggregate(self, query: QueryConstruct, aliased_orm_table = aliased(orm_table) if is_self_join_aggregation: # Handle self join aggregation - if field.name in {'children', 'components'} and field.relatedModelName == 'CollectionObject': - # Child = aliased(orm_table) - subquery_query = Query([]) \ - .select_from(aliased_orm_table) \ - .filter(aliased_orm_table.ComponentParentID == rel_table._id) \ - .correlate(rel_table) - elif field.is_relationship and \ + if field.is_relationship and \ field.type == 'one-to-many' and \ field.otherSideName in [fld.name for fld in specify_model.relationships]: # Handle self join aggregation in the general case From 29e01a28156e978d449e045be478b52d045c85c9 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 20 Oct 2025 13:02:42 -0500 Subject: [PATCH 03/38] Apply changes from adding stats_counts context api --- specifyweb/backend/context/urls.py | 1 + specifyweb/backend/context/views.py | 17 +++- .../components/InitialContext/systemInfo.ts | 81 +++++++++++++++---- specifyweb/settings/specify_settings.py | 1 + 4 files changed, 84 insertions(+), 16 deletions(-) diff --git a/specifyweb/backend/context/urls.py b/specifyweb/backend/context/urls.py index 59ee69b16c1..a9b4e875b20 100644 --- a/specifyweb/backend/context/urls.py +++ b/specifyweb/backend/context/urls.py @@ -19,6 +19,7 @@ re_path(r'^api_endpoints.json$', views.api_endpoints), re_path(r'^api_endpoints_all.json$', views.api_endpoints_all), re_path(r'^user.json$', views.user), + re_path(r'^stats_counts.json$', views.stats_counts), re_path(r'^system_info.json$', views.system_info), re_path(r'^server_time.json$', views.get_server_time), re_path(r'^domain.json$', views.domain), diff --git a/specifyweb/backend/context/views.py b/specifyweb/backend/context/views.py index bed109ce506..a94f29eacfe 100644 --- a/specifyweb/backend/context/views.py +++ b/specifyweb/backend/context/views.py @@ -26,7 +26,7 @@ PermissionTargetAction, \ check_permission_targets, skip_collection_access_check, query_pt, \ CollectionAccessPT -from specifyweb.specify.models import Collection, Institution, \ +from specifyweb.specify.models import Collection, Collectionobject, Institution, \ Specifyuser, Spprincipal, Spversion, Collectionobjecttype from specifyweb.specify.models_utils.schema import base_schema from specifyweb.specify.models_utils.serialize_datamodel import datamodel_to_json @@ -655,6 +655,7 @@ def system_info(request): database_version=spversion.appversion, schema_version=spversion.schemaversion, stats_url=settings.STATS_URL, + stats_2_url=settings.STATS_2_URL, database=settings.DATABASE_NAME, institution=institution.name, institution_guid=institution.guid, @@ -1018,3 +1019,17 @@ def schema_language(request): dict(zip(('language', 'country', 'variant'), row)) for row in schema_languages ], safe=False) + +@require_http_methods(['GET', 'HEAD']) +@cache_control(max_age=86400, public=True) +@login_maybe_required +def stats_counts(request): + """Get the count of collection objects, collections, and users.""" + co_count = Collectionobject.objects.count() + collection_count = Collection.objects.count() + user_count = Specifyuser.objects.count() + return JsonResponse({ + 'Collectionobject': co_count, + 'Collection': collection_count, + 'Specifyuser': user_count, + }) diff --git a/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts b/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts index 8ff7a789077..da2414c3854 100644 --- a/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts +++ b/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts @@ -22,31 +22,67 @@ type SystemInfo = { readonly institution_guid: LocalizedString; readonly isa_number: LocalizedString; readonly stats_url: string | null; + readonly stats_2_url: string | null; readonly discipline_type: string; }; +type StatsCounts = { + readonly Collectionobject: number; + readonly Collection: number; + readonly Specifyuser: number; +}; + let systemInfo: SystemInfo; +function buildStatsLambdaUrl(base: string | null | undefined): string | null { + if (!base) return null; + let u = base.trim(); + + if (!/^https?:\/\//i.test(u)) u = `https://${u}`; + + const hasRoute = /\/(prod|default)\/[^\s/]+/.test(u); + if (!hasRoute) { + const stage = 'prod'; + const route = 'AggrgatedSp7Stats'; + u = `${u.replace(/\/$/, '') }/${stage}/${route}`; + } + return u; +} + export const fetchContext = load( '/context/system_info.json', 'application/json' -).then((data) => { +).then(async (data) => { systemInfo = data; - if (systemInfo.stats_url !== null) - ping( + + if (systemInfo.stats_url !== null) { + let counts: StatsCounts | null = null; + try { + counts = await load('/context/stats_counts.json', 'application/json'); + } catch { + // If counts fetch fails, proceed without them. + counts = null; + } + + const parameters = { + version: systemInfo.version, + dbVersion: systemInfo.database_version, + institution: systemInfo.institution, + institutionGUID: systemInfo.institution_guid, + discipline: systemInfo.discipline, + collection: systemInfo.collection, + collectionGUID: systemInfo.collection_guid, + isaNumber: systemInfo.isa_number, + disciplineType: systemInfo.discipline_type, + collectionObjectCount: counts?.Collectionobject ?? 0, + collectionCount: counts?.Collection ?? 0, + userCount: counts?.Specifyuser ?? 0, + }; + + await ping( formatUrl( systemInfo.stats_url, - { - version: systemInfo.version, - dbVersion: systemInfo.database_version, - institution: systemInfo.institution, - institutionGUID: systemInfo.institution_guid, - discipline: systemInfo.discipline, - collection: systemInfo.collection, - collectionGUID: systemInfo.collection_guid, - isaNumber: systemInfo.isa_number, - disciplineType: systemInfo.discipline_type, - }, + parameters, /* * I don't know if the receiving server handles GET parameters in a * case-sensitive way. Thus, don't convert keys to lower case, but leave @@ -56,7 +92,22 @@ export const fetchContext = load( ), { errorMode: 'silent' } ).catch(softFail); + + /* + * Await ping( + * formatUrl(systemInfo.stats_2_url, parameters, false), + * { errorMode: 'silent' } + * ).catch(softFail); + */ + + const lambdaUrl = buildStatsLambdaUrl(systemInfo.stats_2_url); + if (lambdaUrl) { + await ping(formatUrl(lambdaUrl, parameters, false), { errorMode: 'silent' }) + .catch(softFail); + } + } + return systemInfo; }); -export const getSystemInfo = (): SystemInfo => systemInfo; +export const getSystemInfo = (): SystemInfo => systemInfo; \ No newline at end of file diff --git a/specifyweb/settings/specify_settings.py b/specifyweb/settings/specify_settings.py index b83bdda9007..62515074c25 100644 --- a/specifyweb/settings/specify_settings.py +++ b/specifyweb/settings/specify_settings.py @@ -102,6 +102,7 @@ # Usage stats are transmitted to the following address. # Set to None to disable. STATS_URL = "https://stats.specifycloud.org/capture" +STATS_2_URL = "pj9lpoo1pc.execute-api.us-east-1.amazonaws.com" # "https://stats-2.specifycloud.org/prod/AggrgatedSp7Stats" # Workbench uploader log directory. # Must exist and be writeable by the web server process. From 2d9dd2c782d25089ad9a8f0bb3d77aeefb232a1e Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 20 Oct 2025 13:26:43 -0500 Subject: [PATCH 04/38] Apply group_concat and blank_nulls changes --- .../backend/stored_queries/blank_nulls.py | 2 + specifyweb/backend/stored_queries/format.py | 46 ++++++--- .../backend/stored_queries/group_concat.py | 43 ++++---- specifyweb/backend/stored_queries/utils.py | 97 ++++++++++++++++--- 4 files changed, 140 insertions(+), 48 deletions(-) diff --git a/specifyweb/backend/stored_queries/blank_nulls.py b/specifyweb/backend/stored_queries/blank_nulls.py index 48aa2195936..1f532a5f16f 100644 --- a/specifyweb/backend/stored_queries/blank_nulls.py +++ b/specifyweb/backend/stored_queries/blank_nulls.py @@ -16,6 +16,8 @@ """ class blank_nulls(expression.FunctionElement): name = 'blank_nulls' + type = String() + inherit_cache = True """ The `@compiles` decorator tells sqlalchemy to run this function whenever a blank_nulls object needs to be compiled. diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index 9a06926678c..b671a3e3ef5 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -7,6 +7,7 @@ from xml.etree.ElementTree import Element from xml.sax.saxutils import quoteattr +from specifyweb.backend.stored_queries.utils import ensure_string_type_if_null from specifyweb.specify.api.utils import get_picklists from sqlalchemy import Table as SQLTable, inspect, case, type_coerce from sqlalchemy.orm import aliased, Query @@ -208,9 +209,14 @@ def make_expr(self, new_expr = self._fieldformat(formatter_field_spec.table, formatter_field_spec.get_field(), new_expr) if 'trimzeros' in fieldNodeAttrib: + # new_expr = case( + # [(new_expr.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), cast(new_expr, types.Numeric(65)))], + # else_=new_expr + # ) + numeric_str = cast(cast(new_expr, types.Numeric(65)), types.String()) new_expr = case( - [(new_expr.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), cast(new_expr, types.Numeric(65)))], - else_=new_expr + (new_expr.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), numeric_str), + else_=cast(new_expr, types.String()), ) if 'format' in fieldNodeAttrib: @@ -377,17 +383,25 @@ def _dateformat(self, specify_field, field): prec_fld = getattr(field.class_, specify_field.name + 'Precision', None) - format_expr = ( - case( - [ - (prec_fld == 2, self.date_format_month), - (prec_fld == 3, self.date_format_year), - ], - else_=self.date_format, + # format_expr = ( + # case( + # [ + # (prec_fld == 2, self.date_format_month), + # (prec_fld == 3, self.date_format_year), + # ], + # else_=self.date_format, + # ) + # if prec_fld is not None + # else self.date_format + # ) + if prec_fld is not None: + format_expr = case( + (prec_fld == 2, literal(self.date_format_month, type_=types.String())), + (prec_fld == 3, literal(self.date_format_year, type_=types.String())), + else_=ensure_string_type_if_null(literal(self.date_format, type_=types.String())), ) - if prec_fld is not None - else self.date_format - ) + else: + format_expr = literal(self.date_format, type_=types.String()) return func.date_format(field, format_expr) @@ -395,8 +409,10 @@ def _fieldformat(self, table: Table, specify_field: Field, field: InstrumentedAttribute | Extract): if self.format_agent_type and specify_field is Agent_model.get_field("agenttype"): - cases = [(field == _id, name) for (_id, name) in enumerate(agent_types)] - _case = case(cases) + # cases = [(field == _id, name) for (_id, name) in enumerate(agent_types)] + # _case = case(cases) + cases = [(field == _id, ensure_string_type_if_null(literal(name, type_=types.String()))) for (_id, name) in enumerate(agent_types)] + _case = case(cases, else_=ensure_string_type_if_null(literal('', type_=types.String()))) return blank_nulls(_case) if self.replace_nulls else _case if self.format_picklist: @@ -410,7 +426,7 @@ def _fieldformat(self, table: Table, specify_field: Field, return blank_nulls(expr) if self.replace_nulls else expr cases = [ - (field == item.value, literal(item.title or "", type_=types.String())) + (field == item.value, ensure_string_type_if_null(literal(item.title or ""))) for item in items ] _case = case(cases, else_=cast(field, types.String())) diff --git a/specifyweb/backend/stored_queries/group_concat.py b/specifyweb/backend/stored_queries/group_concat.py index 2f1825345ab..b3466c88619 100644 --- a/specifyweb/backend/stored_queries/group_concat.py +++ b/specifyweb/backend/stored_queries/group_concat.py @@ -2,46 +2,45 @@ # https://stackoverflow.com/questions/19205850/how-do-i-write-a-group-concat-function-in-sqlalchemy import sqlalchemy -from sqlalchemy.sql import expression +from sqlalchemy.sql import expression, bindparam from sqlalchemy.ext import compiler +from sqlalchemy.sql.elements import ClauseElement +from sqlalchemy.types import String from specifyweb.backend.stored_queries.query_construct import QueryConstruct -# class changed from FunctionElement to ColumnElement class group_concat(expression.ColumnElement): + # MySQL GROUP_CONCAT(expr [ORDER BY ...] [SEPARATOR ...]) name = "group_concat" - def __init__(self, expr, separator=None, order_by=None): + type = String() + inherit_cache = True + + def __init__(self, expr: ClauseElement, separator=None, order_by: ClauseElement | None = None): + super().__init__() self.expr = expr self.separator = separator self.order_by = order_by @compiler.compiles(group_concat) def _group_concat_mysql(element, compiler, **kwargs): - # Old way of extracting clauses, when group_concat was a FunctionElement - # expr, separator, order_by = extract_clauses(element, compiler) - - inner_expr= compiler.process(element.expr, **kwargs) + inner_expr = compiler.process(element.expr, **kwargs) if element.order_by is not None: - order_by = compiler.process(element.order_by, **kwargs) - inner_expr+= " ORDER BY %s" % order_by + if isinstance(element.order_by, (list, tuple, set)): + ob = ", ".join(compiler.process(ob, **kwargs) for ob in element.order_by) + else: + ob = compiler.process(element.order_by, **kwargs) + inner_expr += f" ORDER BY {ob}" + if element.separator is not None: - # Resorting to text() + bindparams to avoid SQL injection and fix parameter ordering bug - separator = compiler.process(sqlalchemy.text(" SEPARATOR :sep").bindparams(sep=element.separator), **kwargs) - inner_expr+= " %s" % separator + if isinstance(element.separator, ClauseElement): + sep_sql = compiler.process(element.separator, **kwargs) + else: + sep_sql = compiler.process(bindparam("sep", element.separator, type_=String()), **kwargs) + inner_expr += f" SEPARATOR {sep_sql}" return 'GROUP_CONCAT(%s)' % inner_expr -def extract_clauses(element, compiler): - expr = compiler.process(element.clauses.clauses[0]) - def process_clause(idx): - return compiler.process(element.clauses.clauses[idx]) - - separator = process_clause(1) if len(element.clauses) > 1 else None - order_by = process_clause(2) if len(element.clauses) > 2 else None - - return expr, separator, order_by - def group_by_displayed_fields(query: QueryConstruct, fields, ignore_cat_num=False): for field in fields: if ( diff --git a/specifyweb/backend/stored_queries/utils.py b/specifyweb/backend/stored_queries/utils.py index 218423c48f0..64cd49b1944 100644 --- a/specifyweb/backend/stored_queries/utils.py +++ b/specifyweb/backend/stored_queries/utils.py @@ -1,15 +1,90 @@ import logging +from typing import Any, Optional + +from sqlalchemy import type_coerce, types +from sqlalchemy.dialects import mysql as mysql_dialect +from sqlalchemy.sql.elements import ClauseElement +from sqlalchemy.sql.selectable import Select +from sqlalchemy.sql.sqltypes import NullType logger = logging.getLogger(__name__) -def log_sqlalchemy_query(query): - # Call this function to debug the raw SQL query generated by SQLAlchemy - # TODO: verify theis import - from sqlalchemy.dialects import mysql - compiled_query = query.statement.compile(dialect=mysql.dialect(), compile_kwargs={"literal_binds": True}) - raw_sql = str(compiled_query).replace('\n', ' ') + ';' - logger.debug('='.join(['' for _ in range(80)])) - logger.debug(raw_sql) - logger.debug('='.join(['' for _ in range(80)])) - # Run in the storred_queries.execute file, in the execute function, right before the return statement, line 546 - # from specifyweb.specify.utils import log_sqlalchemy_query; log_sqlalchemy_query(query) \ No newline at end of file +def _coerce_statement(obj: Any) -> ClauseElement: + """ + Accepts an ORM Query (legacy), a Select, or any ClauseElement. + Returns a ClauseElement suitable for compilation. + """ + # Legacy ORM Query has .statement + stmt = getattr(obj, "statement", None) + if stmt is not None: + return stmt + if isinstance(obj, ClauseElement): + return obj + raise TypeError(f"Unsupported query/select type: {type(obj)!r}") + +def _debug_nulltype_columns(stmt: ClauseElement) -> None: + """ + Scan for expressions with NullType before compiling, to + help explain TypeError crashes when using literal_binds=True. + """ + try: + raw_cols = getattr(stmt, "_raw_columns", None) + if not raw_cols: + return + for i, expr in enumerate(raw_cols): + t = getattr(expr, "type", None) + if t is None or isinstance(t, NullType): + logger.debug("[SA-NullType] col #%s expr=%r type=%r", i, expr, t) + except Exception: + pass + +def log_sqlalchemy_query( + query_or_stmt: Any, + *, + literal_binds: bool = True, + dialect: Optional[Any] = None, + level: int = logging.DEBUG, +) -> Optional[str]: + """ + Log the SQL for a query/statement. + Run in the stored_queries.execute file, in the execute function, right before the return statement: + from specifyweb.specify.utils import log_sqlalchemy_query; log_sqlalchemy_query(query) + """ + if not logger.isEnabledFor(level): + return None # skip compiling and logging if we're not logging at this level + + dialect = dialect or mysql_dialect.dialect() + stmt = _coerce_statement(query_or_stmt) + + # help trace NullType roots issues + _debug_nulltype_columns(stmt) + + try: + compiled = stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": literal_binds}) + sql = str(compiled).replace("\n", " ") + ";" + sep = "=" * 80 + logger.log(level, "%s\n%s\n%s", sep, sql, sep) + return sql + + except TypeError as e: + try: + compiled_fb = stmt.compile(dialect=dialect) + sql_fb = str(compiled_fb) + logger.log( + level, + "[fallback no-literal-binds]\n%s\n[compile TypeError: %s]", + sql_fb, + e, + ) + return sql_fb + except Exception as e2: + logger.log(level, "[query logging failed after TypeError: %s] (fallback err: %s)", e, e2) + return None + + except Exception as e: + logger.log(level, "[query logging failed: %s]", e, exc_info=True) + return None + +def ensure_string_type_if_null(expr): + t = getattr(expr, "type", None) + return type_coerce(expr, types.String()) if t is None or isinstance(t, NullType) else expr \ No newline at end of file From cb1b9dd0013e89ff3e2eead12b1b9d56042e7d75 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 24 Oct 2025 15:41:37 -0500 Subject: [PATCH 05/38] apply changes from issue-7510 --- .../backend/stored_queries/execution.py | 22 ++++++++++++------- specifyweb/backend/stored_queries/format.py | 8 +++---- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py index 14121c0d676..4efea97fde6 100644 --- a/specifyweb/backend/stored_queries/execution.py +++ b/specifyweb/backend/stored_queries/execution.py @@ -51,6 +51,15 @@ class QuerySort: def by_id(sort_id: QUREYFIELD_SORT_T): return QuerySort.SORT_TYPES[sort_id] +def DefaultQueryFormatterProps(): + return ObjectFormatterProps( + format_agent_type=False, + format_picklist=False, + format_types=True, + numeric_catalog_number=True, + format_expr=True + ) + class BuildQueryProps(NamedTuple): recordsetid: int | None = None replace_nulls: bool = False @@ -58,13 +67,7 @@ class BuildQueryProps(NamedTuple): distinct: bool = False series: bool = False implicit_or: bool = True - formatter_props: ObjectFormatterProps = ObjectFormatterProps( - format_agent_type = False, - format_picklist = False, - format_types = True, - numeric_catalog_number = True, - format_expr = True, - ) + formatter_props: ObjectFormatterProps = DefaultQueryFormatterProps() def set_group_concat_max_len(connection): @@ -789,10 +792,13 @@ def execute( offset, recordsetid=None, formatauditobjs=False, - formatter_props=ObjectFormatterProps(), + formatter_props=None, ): "Build and execute a query, returning the results as a data structure for json serialization" + if formatter_props is None: + formatter_props = DefaultQueryFormatterProps() + set_group_concat_max_len(session.info["connection"]) query, order_by_exprs = build_query( session, diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index b671a3e3ef5..65fc4388c30 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -41,10 +41,10 @@ Spauditlog_model = datamodel.get_table('SpAuditLog') class ObjectFormatterProps(NamedTuple): - format_agent_type: bool = False, - format_picklist: bool = False, - format_types: bool = True, - numeric_catalog_number: bool = True, + format_agent_type: bool = False + format_picklist: bool = False + format_types: bool = True + numeric_catalog_number: bool = True # format_expr determines if make_expr should call _fieldformat, like in versions before 7.10.2. # Batch edit expects it to be false to correctly handle some edge cases. format_expr: bool = False From fb5d5e98b9553c8994de2b98464e9a54b6ddf6b6 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 3 Nov 2025 17:06:09 -0600 Subject: [PATCH 06/38] Apply new changes for new renumber_tree function --- specifyweb/backend/trees/extras.py | 81 +++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/specifyweb/backend/trees/extras.py b/specifyweb/backend/trees/extras.py index b37158d25b9..7e8e7857188 100644 --- a/specifyweb/backend/trees/extras.py +++ b/specifyweb/backend/trees/extras.py @@ -1,7 +1,6 @@ import re from contextlib import contextmanager import logging -from typing import List from specifyweb.backend.trees.ranks import RankOperation, post_tree_rank_save, pre_tree_rank_deletion, \ verify_rank_parent_chain_integrity, pre_tree_rank_init, post_tree_rank_deletion @@ -529,7 +528,7 @@ def fullname_expr(depth, reverse): def parent_joins(table, depth): return '\n'.join([ - "left join {table} t{1} on t{0}.parentid = t{1}.{table}id".format(j-1, j, table=table) + "LEFT JOIN {table} t{1} ON t{0}.parentid = t{1}.{table}id".format(j-1, j, table=table) for j in range(1, depth) ]) @@ -640,6 +639,7 @@ def print_paths(table, depth): print(sql) def renumber_tree(table): + # NOTE: This old implementation doesn't work in MariaDB 11.8 logger.info('renumbering tree') cursor = connection.cursor() @@ -742,3 +742,80 @@ def is_treedef(obj): return issubclass(obj.__class__, Tree) or bool( re.search(r"treedef'>$", str(obj.__class__), re.IGNORECASE) ) + +def renumber_tree(table: str) -> None: + cursor = connection.cursor() + logger.debug(f"[renumber_tree] running for {table}") + + # cursor.execute("SET SESSION sql_safe_updates = 0") # might be needed for MariaDB 11, uncomment if updates don't occur + + # Sync rankid + sql_sync = ( + f"UPDATE {table} t " + f"JOIN {table}treedefitem d ON t.{table}treedefitemid = d.{table}treedefitemid " + f"SET t.rankid = d.rankid" + ) + logger.debug(sql_sync) + cursor.execute(sql_sync) + + # Compute max logical depth to size the LEFT JOIN ladder + cursor.execute(f"SELECT COUNT(DISTINCT rankid) FROM {table}") + depth = cursor.fetchone()[0] or 1 + + def tree_parent_joins(tbl: str, d: int) -> str: # replace parent_joins if join errors occur + if d <= 1: + return "" # only t0 exists + return "\n".join( + f"LEFT JOIN {tbl} t{j} ON t{j-1}.parentid = t{j}.{tbl}id" + for j in range(1, d) + ) + + def tree_path_expr(tbl: str, d: int) -> str: # replace path_expr if ordering issues arise + # Highest ancestor first, leaf last; LPAD stabilizes lexical order + parts = ", ".join([f"LPAD(t{i}.{tbl}id, 12, '0')" for i in reversed(range(d))]) + return f"CONCAT_WS(',', {parts})" + + # Preorder numbering using ROW_NUMBER() + sql_preorder = ( + f"UPDATE {table} t\n" + f"JOIN (\n" + f" SELECT id, rn FROM (\n" + f" SELECT\n" + f" t0.{table}id AS id,\n" + f" ROW_NUMBER() OVER (ORDER BY {path_expr(table, depth)}, t0.{table}id) AS rn\n" + f" FROM {table} t0\n" + f"{parent_joins(table, depth)}\n" + f" ) ordered\n" + f") r ON r.id = t.{table}id\n" + f"SET t.nodenumber = r.rn,\n" + f" t.highestchildnodenumber = r.rn" + ) + logger.debug(sql_preorder) + cursor.execute(sql_preorder) + + # Compute highestchildnodenumber bottom-up + sql_ranks = f"SELECT DISTINCT rankid FROM {table} ORDER BY rankid DESC" + logger.debug(sql_ranks) + cursor.execute(sql_ranks) + ranks = [row[0] for row in cursor.fetchall()] + + for rank in ranks[1:]: + sql_hcnn = ( + f"UPDATE {table} t\n" + f"JOIN (\n" + f" SELECT parentid, MAX(highestchildnodenumber) AS hcnn\n" + f" FROM {table}\n" + f" WHERE rankid > %(rank)s\n" + f" GROUP BY parentid\n" + f") sub ON sub.parentid = t.{table}id\n" + f"SET t.highestchildnodenumber = sub.hcnn\n" + f"WHERE t.rankid = %(rank)s" + ) + logger.debug(sql_hcnn, {"rank": rank}) + cursor.execute(sql_hcnn, {"rank": rank}) + + # Clear task semaphores + from specifyweb.specify.models import datamodel, Sptasksemaphore + tree_model = datamodel.get_table(table) + tasknames = [name.format(tree_model.name) for name in ("UpdateNodes{}", "BadNodes{}")] + Sptasksemaphore.objects.filter(taskname__in=tasknames).update(islocked=False) From 9151d0a43f6dcac347c0e0f97320c37203a66f00 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 3 Nov 2025 17:06:59 -0600 Subject: [PATCH 07/38] Update docker-compose.yml for mariadb 11.8 --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 41c149c0f16..bc94997ef05 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,7 +15,7 @@ services: mariadb: container_name: mariadb - image: mariadb:10.11 + image: mariadb:11.8 volumes: - "database:/var/lib/mysql" # the data directory for mariadb - "./seed-database:/docker-entrypoint-initdb.d" From b77fb62038bab0220e4c13f2800bdad5541a6d4d Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 5 Nov 2025 11:43:54 -0600 Subject: [PATCH 08/38] Fix blank_nulls wrapping numeric CatalogNumber cast instead of raw column --- specifyweb/backend/stored_queries/format.py | 23 ++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index 65fc4388c30..d9e3abc9381 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -200,15 +200,17 @@ def make_expr(self, formatter, aggregator, previous_tables) + raw_expr = new_expr else: new_query, table, model, specify_field = query.build_join( specify_model, orm_table, formatter_field_spec.join_path) new_expr = getattr(table, specify_field.name) - + raw_expr = new_expr + if self.format_expr: new_expr = self._fieldformat(formatter_field_spec.table, formatter_field_spec.get_field(), new_expr) - if 'trimzeros' in fieldNodeAttrib: + if 'trimzeros' in fieldNodeAttrib and fieldNodeAttrib['trimzeros'] == 'true': # new_expr = case( # [(new_expr.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), cast(new_expr, types.Numeric(65)))], # else_=new_expr @@ -225,7 +227,22 @@ def make_expr(self, if 'sep' in fieldNodeAttrib: new_expr = concat(fieldNodeAttrib['sep'], new_expr) - return new_query, blank_nulls(new_expr) if do_blank_null else new_expr, formatter_field_spec + if do_blank_null: + sf = formatter_field_spec.get_field() + is_catalog_num = ( + sf is not None + and sf is CollectionObject_model.get_field('catalogNumber') + ) + if ( + is_catalog_num + and self.numeric_catalog_number + and all_numeric_catnum_formats(self.collection) + ): + return new_query, blank_nulls(raw_expr), formatter_field_spec + + return new_query, blank_nulls(new_expr), formatter_field_spec + + return new_query, new_expr, formatter_field_spec def objformat(self, query: QueryConstruct, orm_table: SQLTable, formatter_name, cycle_detector=[]) -> tuple[QueryConstruct, blank_nulls]: From dcecec6c3ada3c8bdb40705e08e82170cc3a5f71 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 5 Nov 2025 14:28:42 -0600 Subject: [PATCH 09/38] fix customized formatters --- specifyweb/backend/stored_queries/format.py | 50 ++++++++++++--------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index d9e3abc9381..b3b42b8d831 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -207,25 +207,33 @@ def make_expr(self, new_expr = getattr(table, specify_field.name) raw_expr = new_expr + # Apply field-level formatting, which may include numeric cast if self.format_expr: - new_expr = self._fieldformat(formatter_field_spec.table, formatter_field_spec.get_field(), new_expr) - - if 'trimzeros' in fieldNodeAttrib and fieldNodeAttrib['trimzeros'] == 'true': - # new_expr = case( - # [(new_expr.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), cast(new_expr, types.Numeric(65)))], - # else_=new_expr - # ) - numeric_str = cast(cast(new_expr, types.Numeric(65)), types.String()) - new_expr = case( - (new_expr.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), numeric_str), - else_=cast(new_expr, types.String()), - ) - - if 'format' in fieldNodeAttrib: - new_expr = self.pseudo_sprintf(fieldNodeAttrib['format'], new_expr) - - if 'sep' in fieldNodeAttrib: - new_expr = concat(fieldNodeAttrib['sep'], new_expr) + new_expr = self._fieldformat( + formatter_field_spec.table, + formatter_field_spec.get_field(), + new_expr + ) + + # Helper function to apply only string-ish transforms with no numeric casts + def apply_stringish(expr): + e = expr + if fieldNodeAttrib.get('trimzeros') == 'true': + numeric_str = cast(cast(e, types.Numeric(65)), types.String()) + e = case( + (e.op('REGEXP')('^-?[0-9]+(\\.[0-9]+)?$'), numeric_str), + else_=cast(e, types.String()), + ) + fmt = fieldNodeAttrib.get('format') + if fmt is not None: + e = self.pseudo_sprintf(fmt, e) + sep = fieldNodeAttrib.get('sep') + if sep is not None: + e = concat(sep, e) + return e + + stringish_expr = apply_stringish(raw_expr) + formatted_expr = apply_stringish(new_expr) if do_blank_null: sf = formatter_field_spec.get_field() @@ -238,11 +246,11 @@ def make_expr(self, and self.numeric_catalog_number and all_numeric_catnum_formats(self.collection) ): - return new_query, blank_nulls(raw_expr), formatter_field_spec + return new_query, blank_nulls(stringish_expr), formatter_field_spec - return new_query, blank_nulls(new_expr), formatter_field_spec + return new_query, blank_nulls(formatted_expr), formatter_field_spec - return new_query, new_expr, formatter_field_spec + return new_query, formatted_expr, formatter_field_spec def objformat(self, query: QueryConstruct, orm_table: SQLTable, formatter_name, cycle_detector=[]) -> tuple[QueryConstruct, blank_nulls]: From 4b9f0c3cc18b2e533b1b5e67a4476fd25561b9f1 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 22 Jan 2026 10:41:51 -0600 Subject: [PATCH 10/38] fix: Use named locks when autonumbering See #6490, #5337 Replaces #5404 Fixes #4148, #7560 This branch is largely the application of #7455 on `v7.11.3` --- .../specify/models_utils/lock_tables.py | 119 ++++++++++++++++++ specifyweb/specify/utils/autonumbering.py | 41 +++++- 2 files changed, 157 insertions(+), 3 deletions(-) diff --git a/specifyweb/specify/models_utils/lock_tables.py b/specifyweb/specify/models_utils/lock_tables.py index e6c2e75fbce..d1cf202fd38 100644 --- a/specifyweb/specify/models_utils/lock_tables.py +++ b/specifyweb/specify/models_utils/lock_tables.py @@ -1,4 +1,5 @@ from django.db import connection +from django.conf import settings from contextlib import contextmanager import logging @@ -8,6 +9,7 @@ @contextmanager def lock_tables(*tables): cursor = connection.cursor() + # REFACTOR: Extract this functionality to a decorator or contextmanager if cursor.db.vendor != 'mysql': logger.warning("unable to lock tables") yield @@ -18,3 +20,120 @@ def lock_tables(*tables): yield finally: cursor.execute('unlock tables') + +@contextmanager +def named_lock(raw_lock_name: str, timeout: int = 5, retry_attempts: int = 0): + """ + Handles acquiring and finally releasing a named user advisory lock. + While the lock is held, no other connection can acquire the same named lock. + + Use this sparingly: these locks do not impose any behavior on the database + like normal locks--agents interacting with the database can opt to not + follow traditional application flow and circumnavigate application behavior + + Raises a TimeoutError if timeout seconds have elapsed without acquiring the + lock (another connection holds the lock), and a ConnectionError if the + database was otherwise unable to acquire the lock. + + Example: + ``` + try: + with named_lock('my_lock') as lock: + ... # do something + except TimeoutError: + ... # handle case when lock is held by other connection + ``` + + :param raw_lock_name: The name of lock to acquire + :type raw_lock_name: str + :param timeout: The time in seconds to wait for lock release if another + connection holds the lock + :type timeout: int + :return: yields True if the lock was obtained successfully and None + otherwise + :rtype: Generator[Literal[True] | None, Any, None] + """ + + # REFACTOR: Extract this functionality to a decorator or contextmanager + if connection.vendor != "mysql": + yield + return + + db_name = getattr(settings, "DATABASE_NAME") + lock_name = f"{db_name}_{raw_lock_name}" + + acquired = acquired_named_lock(lock_name, timeout) + + while retry_attempts > 0 and acquired != True: + acquired = acquired_named_lock(lock_name, timeout) + retry_attempts -= 1 + + if acquired == False: + raise TimeoutError(f"Unable to acquire named lock: '{lock_name}'. Held by other connection") + if acquired is None: + raise ConnectionError(f"Unable to acquire named lock: '{lock_name}'. The process might have run out of memory") + + try: + yield acquired + finally: + release_named_lock(lock_name) + +def acquired_named_lock(lock_name: str, timeout: int) -> bool | None: + """ + Attempts to acquire a named lock in the database. Will wait for timeout + seconds for the lock to be released if held by another connection. + + See https://mariadb.com/docs/server/reference/sql-functions/secondary-functions/miscellaneous-functions/get_lock + + :param lock_name: The name of the lock to acquire + :type lock_name: str + :param timeout: The time in seconds to wait for lock release if another + connection holds the lock + :type timeout: int + :return: returns True if the lock was obtained successfully, False if timeout + seconds have elapsed without acquiring the lock, and None otherwise + :rtype: bool | None + """ + with connection.cursor() as cur: + cur.execute("SELECT GET_LOCK(%s, %s)", [lock_name, timeout]) + acquired_row = cur.fetchone() + + if acquired_row is None: + return None + + acquired = acquired_row[0] + + if acquired == 1: + return True + elif acquired == 0: + return False + + return None + +def release_named_lock(lock_name: str) -> bool | None: + """ + Attempt to release one instance of a held named lock. Note that multiple + instances of the same lock can be held by a single connection, in which + case each instance of the lock needs to be released separately. + + See https://mariadb.com/docs/server/reference/sql-functions/secondary-functions/miscellaneous-functions/release_lock + + :param lock_name: The name of the lock to attempt to release + :type lock_name: str + :return: returns True if one instance of the lock was sucessfully released, False + if the lock is held by another connection, and None otherwise + :rtype: bool | None + """ + with connection.cursor() as cur: + cur.execute("SELECT RELEASE_LOCK(%s)", [lock_name]) + released_row = cur.fetchone() + + if released_row is None: + return None + + released = released_row[0] + if released == 1: + return True + elif released == 0: + return False + return None diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 3576910cdf0..524cd6369ae 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -2,11 +2,12 @@ Autonumbering logic """ +from contextlib import contextmanager from .uiformatters import UIFormatter, get_uiformatters -from ..models_utils.lock_tables import lock_tables +from ..models_utils.lock_tables import named_lock import logging -from typing import List, Tuple, Set +from typing import Generator, Literal, Any from collections.abc import Sequence from specifyweb.specify.utils.scoping import Scoping @@ -32,6 +33,39 @@ def autonumber_and_save(collection, user, obj) -> None: obj.save() +@contextmanager +def autonumbering_lock(table_name: str, timeout: int = 10) -> Generator[Literal[True] | None, Any, None]: + """ + A convienent wrapper for the named_lock generator that adds the 'autonumber' + prefix to the table_name string argument for the resulting lock name. + + Raises a TimeoutError if timeout seconds have elapsed without acquiring the + lock, and a ConnectionError if the database was otherwise unable to acquire + the lock. + + Example: + ``` + try: + with autonumbering_lock('Collectionobject') as lock: + ... # do something + except TimeoutError: + ... # handle case when lock is held by other connection for > timeout + ``` + + :param table_name: The name of the table that is being autonumbered + :type table_name: str + :param timeout: The time in seconds to wait for lock release if another + connection holds the lock + :type timeout: int + :return: yields True if the lock was obtained successfully and None + otherwise + :rtype: Generator[Literal[True] | None, Any, None] + """ + lock_name = f"autonumber_{table_name.lower()}" + with named_lock(lock_name, timeout) as lock: + yield lock + + def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None: logger.debug("autonumbering %s fields: %s", obj, fields) @@ -43,13 +77,14 @@ def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[s for formatter, vals in fields ] - with lock_tables(*get_tables_to_lock(collection, obj, [formatter.field_name for formatter, _ in fields])): + with autonumbering_lock(obj._meta.db_table): for apply_autonumbering_to in thunks: apply_autonumbering_to(obj) obj.save() +# REFACTOR: Remove this funtion as it is no longer used def get_tables_to_lock(collection, obj, field_names) -> set[str]: # TODO: Include the fix for https://github.com/specify/specify7/issues/4148 from specifyweb.backend.businessrules.models import UniquenessRule From 12eb46322b4106ee12bd54679b85c9fea898199f Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 09:47:38 -0600 Subject: [PATCH 11/38] chore: simplify autonumber interface --- specifyweb/specify/utils/autonumbering.py | 12 ++-------- specifyweb/specify/utils/uiformatters.py | 29 ++++------------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 524cd6369ae..994187ab480 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -69,17 +69,9 @@ def autonumbering_lock(table_name: str, timeout: int = 10) -> Generator[Literal[ def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None: logger.debug("autonumbering %s fields: %s", obj, fields) - # The autonumber action is prepared and thunked outside the locked table - # context since it looks at other tables and that is not allowed by mysql - # if those tables are not also locked. - thunks = [ - formatter.prepare_autonumber_thunk(collection, obj.__class__, vals) - for formatter, vals in fields - ] - with autonumbering_lock(obj._meta.db_table): - for apply_autonumbering_to in thunks: - apply_autonumbering_to(obj) + for formatter, vals in fields: + formatter.apply_autonumbering(collection, obj, vals) obj.save() diff --git a/specifyweb/specify/utils/uiformatters.py b/specifyweb/specify/utils/uiformatters.py index 1d8ede523ac..a633f8916e5 100644 --- a/specifyweb/specify/utils/uiformatters.py +++ b/specifyweb/specify/utils/uiformatters.py @@ -142,30 +142,11 @@ def _autonumber_queryset(self, collection, model, fieldname: str, with_year: lis objs = model.objects.filter(**{ fieldname + '__regex': self.autonumber_regexp(with_year) }) return group_filter(objs).order_by('-' + fieldname) - def prepare_autonumber_thunk(self, collection, model, vals: Sequence[str], year: int | None=None): - with_year = self.fillin_year(vals, year) - fieldname = self.field_name.lower() - - filtered_objs = self._autonumber_queryset(collection, model, fieldname, with_year) - # At this point the query for the autonumber is defined but not yet executed. - - # The actual lookup and setting of the autonumbering value - # is thunked so that it can be executed in context of locked tables. - def apply_autonumbering_to(obj): - try: - biggest = filtered_objs[0] # actual lookup occurs here - except IndexError: - filled_vals = self.fill_vals_no_prior(with_year) - else: - filled_vals = self.fill_vals_after(getattr(biggest, fieldname)) - - # And here the new value is assigned to the object. It is - # the callers responsibilty to save the object within the - # same locked tables context because there maybe multiple - # autonumber fields. - setattr(obj, self.field_name.lower(), ''.join(filled_vals)) - - return apply_autonumbering_to + def apply_autonumbering(self, collection, obj, vals): + field_name = self.field_name.lower() + with_year = self.fillin_year(vals) + field_value = self.autonumber_now(collection, obj.__class__, vals, with_year) + setattr(obj, field_name, field_value) def fill_vals_after(self, prior: str) -> list[str]: From dfa7d6ae0f559c1c65deb7c7ab63233d3428f0d8 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 20:48:38 -0600 Subject: [PATCH 12/38] feat: backport Redis to v7.11.3 base Initially from de6d9ec5178eb2e2dd5dcb659cf9d6f0aa96a98e in #7399 --- .env | 7 +++++++ Dockerfile | 3 +++ requirements.txt | 3 ++- specifyweb/settings/specify_settings.py | 7 +++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.env b/.env index 89dea676cc5..c37cd50d416 100644 --- a/.env +++ b/.env @@ -19,6 +19,13 @@ ASSET_SERVER_URL=http://host.docker.internal/web_asset_store.xml # Make sure to set the `ASSET_SERVER_KEY` to a unique value ASSET_SERVER_KEY=your_asset_server_access_key +# Information to connect to a Redis database +# Specify will use this database as a process broker and storage for temporary +# values +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DB_INDEX=0 + REPORT_RUNNER_HOST=report-runner REPORT_RUNNER_PORT=8080 diff --git a/Dockerfile b/Dockerfile index f6fcdd985b6..3ab5748e357 100644 --- a/Dockerfile +++ b/Dockerfile @@ -163,6 +163,9 @@ RUN echo \ "\nDEPOSITORY_DIR = '/volumes/static-files/depository'" \ "\nREPORT_RUNNER_HOST = os.getenv('REPORT_RUNNER_HOST', '')" \ "\nREPORT_RUNNER_PORT = os.getenv('REPORT_RUNNER_PORT', '')" \ + "\nREDIS_HOST = os.getenv('REDIS_HOST', 'redis')" \ + "\nREDIS_PORT = os.getenv('REDIS_PORT', 6379)" \ + "\nREDIS_DB_INDEX = os.getenv('REDIS_DB_INDEX', 0)" \ "\nWEB_ATTACHMENT_URL = os.getenv('ASSET_SERVER_URL', None)" \ "\nWEB_ATTACHMENT_KEY = os.getenv('ASSET_SERVER_KEY', None)" \ "\nWEB_ATTACHMENT_COLLECTION = os.getenv('ASSET_SERVER_COLLECTION', None)" \ diff --git a/requirements.txt b/requirements.txt index dfd58894219..3ec4c05ab78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,8 @@ tzdata wheel # backports.zoneinfo==0.2.1 kombu==5.5.2 -celery[redis]==5.5.1 +redis==6.4.0 +celery==5.5.1 Django==4.2.22 mysqlclient==2.1.1 SQLAlchemy==1.4.54 diff --git a/specifyweb/settings/specify_settings.py b/specifyweb/settings/specify_settings.py index 62515074c25..a8e6a8884e9 100644 --- a/specifyweb/settings/specify_settings.py +++ b/specifyweb/settings/specify_settings.py @@ -82,6 +82,13 @@ REPORT_RUNNER_HOST = '' REPORT_RUNNER_PORT = '' +# Information to connect to a Redis database +# Specify will use this database as a process broker and storage for temporary +# values +REDIS_HOST="redis" +REDIS_PORT=6379 +REDIS_DB_INDEX=0 + # The message queue for the Specify 7 worker(s). # This should point to a Redis server for sending jobs # and retrieving results from the worker. From f294d5b00901c47ab8241bd79eb764feac8d9aec Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 20:57:27 -0600 Subject: [PATCH 13/38] feat: add utilities for interacting with Redis Based largely off the initial implementation at 5ed701ab8c21afad9a25f0bbcc933846098324b0 from #7399 --- specifyweb/backend/redis_cache/__init__.py | 0 specifyweb/backend/redis_cache/connect.py | 78 ++++++++++++++++++++++ specifyweb/backend/redis_cache/rqueue.py | 25 +++++++ 3 files changed, 103 insertions(+) create mode 100644 specifyweb/backend/redis_cache/__init__.py create mode 100644 specifyweb/backend/redis_cache/connect.py create mode 100644 specifyweb/backend/redis_cache/rqueue.py diff --git a/specifyweb/backend/redis_cache/__init__.py b/specifyweb/backend/redis_cache/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/specifyweb/backend/redis_cache/connect.py b/specifyweb/backend/redis_cache/connect.py new file mode 100644 index 00000000000..b40df1698b4 --- /dev/null +++ b/specifyweb/backend/redis_cache/connect.py @@ -0,0 +1,78 @@ +from redis import Redis +from django.conf import settings + +class RedisConnection: + def __init__(self, + host=getattr(settings, "REDIS_HOST", None), + port=getattr(settings, "REDIS_PORT", None), + db_index=getattr(settings, "REDIS_DB_INDEX", 0), + decode_responses=True): + if None in (host, port, db_index): + raise ValueError( + "Redis is not correctly configured", host, port, db_index) + self.host = host + self.port = port + self.db_index = db_index + self.decode_responses = decode_responses + self.connection = Redis( + host=self.host, + port=self.port, + db=self.db_index, + decode_responses=self.decode_responses + ) + + +class RedisDataType(RedisConnection): + + @classmethod + def from_connection(cls, connection: RedisConnection): + return cls( + host=connection.host, + port=connection.port, + db_index=connection.db_index, + decode_responses=connection.decode_responses + ) + +class RedisList(RedisDataType): + """ + See https://redis.io/docs/latest/develop/data-types/lists/ + """ + + def lpush(self, key: str, value) -> int: + return self.connection.lpush(key, value) + + def rpush(self, key: str, value) -> int: + return self.connection.rpush(key, value) + + def rpop(self, key: str) -> str | bytes | None: + return self.connection.rpop(key) + + def lpop(self, key: str) -> str | bytes | None: + return self.connection.lpop(key) + + def llen(self, key: str) -> int: + return self.connection.llen(key) + + def lrange(self, key: str, start_index: int, end_index: int) -> list[str] | list[bytes]: + return self.connection.lrange(key, start_index, end_index) + + def ltrim(self, key: str, start_index: int, end_index: int) -> list[str] | list[bytes]: + return self.connection.ltrim(key, start_index, end_index) + + +class RedisString(RedisDataType): + """ + See https://redis.io/docs/latest/develop/data-types/strings/ + """ + + def set(self, key, value, time_to_live=None, override_existing=True): + flags = { + "ex": time_to_live, + "nx": not override_existing + } + self.connection.set(key, value, **flags) + + def get(self, key, delete_key=False) -> str | bytes | None: + if delete_key: + return self.connection.getdel(key) + return self.connection.get(key) diff --git a/specifyweb/backend/redis_cache/rqueue.py b/specifyweb/backend/redis_cache/rqueue.py new file mode 100644 index 00000000000..5d57f76da5f --- /dev/null +++ b/specifyweb/backend/redis_cache/rqueue.py @@ -0,0 +1,25 @@ +from typing import Callable, Generator, cast + +from specifyweb.backend.redis_cache.connect import RedisList + + +type Serializer[T] = Callable[[T], str] + +def default_serializer(obj) -> str: + return str(obj) + +class RQueue[T]: + def __init__(self, connection: RedisList, key: str, + serializer: Serializer[T] | None = None, key_prefix: str = "specify"): + self.connection = connection + self.key = key + self.serializer = serializer or cast(Serializer[T], default_serializer) + + def push(self, *objs: T) -> int: + return self.connection.rpush(self.key, *self._serialize_objs(*objs)) + + def pop(self) -> str | bytes | None: + return self.connection.lpop(self.key) + + def _serialize_objs(self, *objs: T) -> Generator[str, None, None]: + return (self.serializer(obj) for obj in objs) From 43035455bf4e79111f215ba0367fa7e929d72258 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 21:02:47 -0600 Subject: [PATCH 14/38] feat: allow using a Manager to handle named locking --- specifyweb/backend/workbench/upload/parse.py | 13 +++- .../backend/workbench/upload/scope_context.py | 5 ++ .../backend/workbench/upload/scoping.py | 7 +- specifyweb/backend/workbench/upload/upload.py | 13 ++-- .../backend/workbench/upload/upload_table.py | 8 ++- .../backend/workbench/upload/uploadable.py | 7 +- .../specify/models_utils/lock_tables.py | 71 +++++++++++++++++++ specifyweb/specify/utils/autonumbering.py | 12 ++-- specifyweb/specify/utils/uiformatters.py | 5 +- 9 files changed, 122 insertions(+), 19 deletions(-) diff --git a/specifyweb/backend/workbench/upload/parse.py b/specifyweb/backend/workbench/upload/parse.py index 42a2d2e1931..d187f81e87d 100644 --- a/specifyweb/backend/workbench/upload/parse.py +++ b/specifyweb/backend/workbench/upload/parse.py @@ -44,7 +44,12 @@ class ParseSucess(NamedTuple): ParseResult = ParseSucess | ParseFailure -def parse_field(table_name: str, field_name: str, raw_value: str, formatter: ScopedFormatter | None = None) -> ParseResult: +def parse_field( + table_name: str, + field_name: str, + raw_value: str, + formatter: ScopedFormatter | None = None + ) -> ParseResult: table = datamodel.get_table_strict(table_name) field = table.get_field_strict(field_name) @@ -170,7 +175,11 @@ def parse_date(table: Table, field_name: str, dateformat: str, value: str) -> Pa return ParseFailure('badDateFormat', {'value': value, 'format': dateformat}) -def parse_formatted(uiformatter: ScopedFormatter, table: Table, field: Field | Relationship, value: str) -> ParseResult: +def parse_formatted( + uiformatter: ScopedFormatter, + table: Table, + field: Field | Relationship, + value: str) -> ParseResult: try: canonicalized = uiformatter(table, value) except FormatMismatch as e: diff --git a/specifyweb/backend/workbench/upload/scope_context.py b/specifyweb/backend/workbench/upload/scope_context.py index dec68e0668a..32458f2f3ad 100644 --- a/specifyweb/backend/workbench/upload/scope_context.py +++ b/specifyweb/backend/workbench/upload/scope_context.py @@ -2,6 +2,7 @@ from typing import Any, TypedDict from specifyweb.specify.utils.uiformatters import UIFormatter +from specifyweb.specify.models_utils.lock_tables import LockDispatcher # This stores some info we can reuse for caching. If this is empty, logic doesn't change, just it is slower. # IMPORTANT: In the current implementation, if there are no CRs or COTypes, it _automatically_ caches scoped upload plan @@ -22,6 +23,7 @@ class ScopeContext: def __init__(self): self.cache = {} + self.lock_dispatcher: LockDispatcher | None = None self.cache['cotypes'] = {} self.cache['date_format'] = None self.cache['fields'] = {} @@ -35,6 +37,9 @@ def set_is_variable(self): # for more info. We don't bother calling this function when we know we aren't variable so we # don't need any parameter to this function. self._is_variable = True + + def set_lock_dispatcher(self, dispatcher: LockDispatcher): + self.lock_dispatcher = dispatcher @property def is_variable(self): diff --git a/specifyweb/backend/workbench/upload/scoping.py b/specifyweb/backend/workbench/upload/scoping.py index 495971d3569..f6ed44b1a48 100644 --- a/specifyweb/backend/workbench/upload/scoping.py +++ b/specifyweb/backend/workbench/upload/scoping.py @@ -125,7 +125,7 @@ def extend_columnoptions( ui_formatter = get_or_defer_formatter(collection, tablename, fieldname, row, toOne, context) scoped_formatter = ( - None if ui_formatter is None else ui_formatter.apply_scope(collection) + None if ui_formatter is None else ui_formatter.apply_scope(collection, context.lock_dispatcher) ) if tablename.lower() == "collectionobjecttype" and fieldname.lower() == "name": @@ -255,7 +255,10 @@ def get_or_defer_formatter( def apply_scoping_to_uploadtable( - ut: UploadTable, collection, context: ScopeContext | None = None, row=None + ut: UploadTable, + collection, + context: ScopeContext | None = None, + row=None ) -> ScopedUploadTable: # IMPORTANT: # before this comment, collection is untrusted and unreliable diff --git a/specifyweb/backend/workbench/upload/upload.py b/specifyweb/backend/workbench/upload/upload.py index d74e11ed46c..71ae2b7f240 100644 --- a/specifyweb/backend/workbench/upload/upload.py +++ b/specifyweb/backend/workbench/upload/upload.py @@ -5,13 +5,8 @@ from contextlib import contextmanager from datetime import datetime, timezone from typing import ( - List, - Dict, - Union, Callable, - Optional, Sized, - Tuple, ) from collections.abc import Callable from collections.abc import Sized @@ -33,6 +28,7 @@ BatchEditPrefs, ) from specifyweb.backend.trees.views import ALL_TREES +from specifyweb.specify.utils.autonumbering import AutonumberingLockDispatcher from . import disambiguation from .upload_plan_schema import schema, parse_plan_with_basetable @@ -351,10 +347,15 @@ def do_upload( _cache = cache.copy() if cache is not None and allow_partial else cache da = disambiguations[i] if disambiguations else None batch_edit_pack = batch_edit_packs[i] if batch_edit_packs else None - with savepoint("row upload") if allow_partial else no_savepoint(): + with ( + savepoint("row upload") if allow_partial else no_savepoint() as _, + AutonumberingLockDispatcher() as autonum_dispatcher + ): # the fact that upload plan is cachable, is invariant across rows. # so, we just apply scoping once. Honestly, see if it causes enough overhead to even warrant caching + scope_context.set_lock_dispatcher(autonum_dispatcher) + if has_attachments(row): # If there's an attachments column, add attachments to upload plan attachments_valid, result = validate_attachment(row, upload_plan) # type: ignore diff --git a/specifyweb/backend/workbench/upload/upload_table.py b/specifyweb/backend/workbench/upload/upload_table.py index 4493015486a..0328ecd850b 100644 --- a/specifyweb/backend/workbench/upload/upload_table.py +++ b/specifyweb/backend/workbench/upload/upload_table.py @@ -1,6 +1,6 @@ from decimal import Decimal import logging -from typing import Any, NamedTuple, Literal, Union +from typing import Any, NamedTuple, Literal, Union, Callable from django.db import transaction, IntegrityError @@ -17,6 +17,7 @@ resolve_reference_attributes, safe_fetch, ) +from specifyweb.specify.models_utils.lock_tables import LockDispatcher from specifyweb.backend.workbench.upload.scope_context import ScopeContext from .column_options import ColumnOptions, ExtendedColumnOptions from .parsing import parse_many, ParseResult, WorkBenchParseFailure @@ -68,7 +69,10 @@ class UploadTable(NamedTuple): overrideScope: dict[Literal["collection"], int | None] | None = None def apply_scoping( - self, collection, context: ScopeContext | None = None, row=None + self, + collection, + context: ScopeContext | None = None, + row=None ) -> "ScopedUploadTable": from .scoping import apply_scoping_to_uploadtable diff --git a/specifyweb/backend/workbench/upload/uploadable.py b/specifyweb/backend/workbench/upload/uploadable.py index d1526713a4e..1fc5d93488c 100644 --- a/specifyweb/backend/workbench/upload/uploadable.py +++ b/specifyweb/backend/workbench/upload/uploadable.py @@ -1,4 +1,4 @@ -from typing import Any, TypedDict, Optional, Union +from typing import Any, TypedDict, Optional, Union, Literal from collections.abc import Callable from typing_extensions import Protocol @@ -44,7 +44,10 @@ class Uploadable(Protocol): # we cannot cache. well, we can make this more complicated by recursviely caching # static parts of even a non-entirely-cachable uploadable. def apply_scoping( - self, collection, context: ScopeContext | None = None, row=None + self, + collection, + context: ScopeContext | None = None, + row=None ) -> "ScopedUploadable": ... def get_cols(self) -> set[str]: ... diff --git a/specifyweb/specify/models_utils/lock_tables.py b/specifyweb/specify/models_utils/lock_tables.py index d1cf202fd38..4255471596a 100644 --- a/specifyweb/specify/models_utils/lock_tables.py +++ b/specifyweb/specify/models_utils/lock_tables.py @@ -1,9 +1,11 @@ from django.db import connection from django.conf import settings from contextlib import contextmanager +from typing import Iterable import logging logger = logging.getLogger(__name__) +LOCK_NAME_SEPARATOR = "_" @contextmanager @@ -137,3 +139,72 @@ def release_named_lock(lock_name: str) -> bool | None: elif released == 0: return False return None + +class Lock: + def __init__(self, name: str, timeout: int): + self.name = name + self.timeout = timeout + + def acquire(self): + acquired = acquired_named_lock(self.name, self.timeout) + + if acquired == False: + raise TimeoutError(f"Unable to acquire named lock: '{self.name}'. Held by other connection") + if acquired is None: + raise ConnectionError(f"Unable to acquire named lock: '{self.name}'. The process might have run out of memory") + + return acquired + + def release(self): + released = release_named_lock(self.name) + return released + +class LockDispatcher: + def __init__(self, lock_prefix: str | None = None, case_sensitive_names = False): + db_name = getattr(settings, "DATABASE_NAME") + self.lock_prefix_parts: list[str] = [db_name] + + if lock_prefix is not None: + self.lock_prefix_parts.append(lock_prefix) + + self.case_sensitive_names = case_sensitive_names + self.locks: list[Lock] = [] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.release_all() + + def lock_name(self, name: str): + final_name = LOCK_NAME_SEPARATOR.join((*self.lock_prefix_parts, name.lower())) + + return final_name.lower() if self.case_sensitive_names else final_name + + @contextmanager + def lock_and_release(self, name: str, timeout: int = 5): + try: + yield self.acquire(name, timeout) + finally: + self.release(name) + + def acquire(self, name: str, timeout: int = 5): + lock_name = self.lock_name(name) + lock = Lock(lock_name, timeout) + self.locks.append(lock) + return lock.acquire() + + def release_all(self): + for lock in self.locks: + lock.release() + self.locks = [] + + def release(self, name: str): + lock_name = self.lock_name(name) + remaining: list[Lock] = [] + for lock in self.locks: + if lock.name == lock_name: + lock.release() + else: + remaining.append(lock) + self.locks = remaining diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 994187ab480..866f1c28a94 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -5,7 +5,7 @@ from contextlib import contextmanager from .uiformatters import UIFormatter, get_uiformatters -from ..models_utils.lock_tables import named_lock +from ..models_utils.lock_tables import LockDispatcher import logging from typing import Generator, Literal, Any from collections.abc import Sequence @@ -32,6 +32,10 @@ def autonumber_and_save(collection, user, obj) -> None: logger.debug("no fields to autonumber for %s", obj) obj.save() +class AutonumberingLockDispatcher(LockDispatcher): + def __init__(self): + lock_prefix = "autonumbering" + super().__init__(lock_prefix=lock_prefix, case_sensitive_names=False) @contextmanager def autonumbering_lock(table_name: str, timeout: int = 10) -> Generator[Literal[True] | None, Any, None]: @@ -61,9 +65,9 @@ def autonumbering_lock(table_name: str, timeout: int = 10) -> Generator[Literal[ otherwise :rtype: Generator[Literal[True] | None, Any, None] """ - lock_name = f"autonumber_{table_name.lower()}" - with named_lock(lock_name, timeout) as lock: - yield lock + with AutonumberingLockDispatcher() as locks: + locks.acquire(name=table_name.lower(), timeout=timeout) + yield def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None: diff --git a/specifyweb/specify/utils/uiformatters.py b/specifyweb/specify/utils/uiformatters.py index a633f8916e5..7a36b8b2162 100644 --- a/specifyweb/specify/utils/uiformatters.py +++ b/specifyweb/specify/utils/uiformatters.py @@ -18,6 +18,7 @@ logger = logging.getLogger(__name__) from specifyweb.backend.context.app_resource import get_app_resource +from specifyweb.specify.models_utils.lock_tables import LockDispatcher from specifyweb.specify.datamodel import Table from specifyweb.specify import models @@ -173,10 +174,12 @@ def fill_vals_no_prior(self, vals: Sequence[str]) -> list[str]: def canonicalize(self, values: Sequence[str]) -> str: return ''.join([field.canonicalize(value) for field, value in zip(self.fields, values)]) - def apply_scope(self, collection): + def apply_scope(self, collection, lock_dispatcher: LockDispatcher | None) -> ScopedFormatter: def parser(table: Table, value: str) -> str: parsed = self.parse(value) if self.needs_autonumber(parsed): + if lock_dispatcher is not None: + lock_dispatcher.acquire(table.name, timeout=10) canonicalized = self.autonumber_now( collection, getattr(models, table.django_name), From 94fd6571f5315c238588197857b7c4e7fd0ee5f7 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 21:12:31 -0600 Subject: [PATCH 15/38] fix: better handle subsequent requests to get same lock --- .../specify/models_utils/lock_tables.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/specifyweb/specify/models_utils/lock_tables.py b/specifyweb/specify/models_utils/lock_tables.py index 4255471596a..35cda94e9ef 100644 --- a/specifyweb/specify/models_utils/lock_tables.py +++ b/specifyweb/specify/models_utils/lock_tables.py @@ -168,7 +168,7 @@ def __init__(self, lock_prefix: str | None = None, case_sensitive_names = False) self.lock_prefix_parts.append(lock_prefix) self.case_sensitive_names = case_sensitive_names - self.locks: list[Lock] = [] + self.locks: dict[str, Lock] = dict() def __enter__(self): return self @@ -190,21 +190,20 @@ def lock_and_release(self, name: str, timeout: int = 5): def acquire(self, name: str, timeout: int = 5): lock_name = self.lock_name(name) + if self.locks.get(lock_name) is not None: + return lock = Lock(lock_name, timeout) - self.locks.append(lock) + self.locks[lock_name] = lock return lock.acquire() def release_all(self): - for lock in self.locks: + for lock in self.locks.values(): lock.release() - self.locks = [] + self.locks = dict() def release(self, name: str): lock_name = self.lock_name(name) - remaining: list[Lock] = [] - for lock in self.locks: - if lock.name == lock_name: - lock.release() - else: - remaining.append(lock) - self.locks = remaining + lock = self.locks.pop(lock_name, None) + if lock is None: + return + lock.release() From 0dd769a44181a75ded5820b7e9036bfe8b81556c Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 21:28:58 -0600 Subject: [PATCH 16/38] fix: resolve erronous fillin_year in apply_autonumbering --- specifyweb/specify/utils/uiformatters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/specifyweb/specify/utils/uiformatters.py b/specifyweb/specify/utils/uiformatters.py index 7a36b8b2162..4b84819a807 100644 --- a/specifyweb/specify/utils/uiformatters.py +++ b/specifyweb/specify/utils/uiformatters.py @@ -145,8 +145,7 @@ def _autonumber_queryset(self, collection, model, fieldname: str, with_year: lis def apply_autonumbering(self, collection, obj, vals): field_name = self.field_name.lower() - with_year = self.fillin_year(vals) - field_value = self.autonumber_now(collection, obj.__class__, vals, with_year) + field_value = self.autonumber_now(collection, obj.__class__, vals) setattr(obj, field_name, field_value) def fill_vals_after(self, prior: str) -> list[str]: From 6fad8f9a677d6e829899a7ef35a073e52ce3e135 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 27 Jan 2026 21:32:15 -0600 Subject: [PATCH 17/38] refactor: specify type in apply_autonumbering function --- specifyweb/specify/utils/uiformatters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specifyweb/specify/utils/uiformatters.py b/specifyweb/specify/utils/uiformatters.py index 4b84819a807..7b298d2061d 100644 --- a/specifyweb/specify/utils/uiformatters.py +++ b/specifyweb/specify/utils/uiformatters.py @@ -143,7 +143,7 @@ def _autonumber_queryset(self, collection, model, fieldname: str, with_year: lis objs = model.objects.filter(**{ fieldname + '__regex': self.autonumber_regexp(with_year) }) return group_filter(objs).order_by('-' + fieldname) - def apply_autonumbering(self, collection, obj, vals): + def apply_autonumbering(self, collection, obj, vals: Sequence[str]): field_name = self.field_name.lower() field_value = self.autonumber_now(collection, obj.__class__, vals) setattr(obj, field_name, field_value) From 4de5c06859fa7117ecbbeea793ebd059c2497bd4 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Wed, 28 Jan 2026 19:16:16 -0600 Subject: [PATCH 18/38] feat: integrate autonumbering locks with Workbench Addresses #5337 Fixes #4148, #7560 --- specifyweb/backend/redis_cache/connect.py | 70 +++++++-- specifyweb/backend/redis_cache/rqueue.py | 60 +++++-- .../backend/workbench/upload/scope_context.py | 5 - .../backend/workbench/upload/scoping.py | 31 +++- .../backend/workbench/upload/treerecord.py | 19 ++- specifyweb/backend/workbench/upload/upload.py | 10 +- .../backend/workbench/upload/upload_table.py | 22 ++- .../backend/workbench/upload/uploadable.py | 4 +- .../specify/models_utils/lock_tables.py | 91 ++++++++--- specifyweb/specify/utils/autonumbering.py | 146 ++++++++++++++---- specifyweb/specify/utils/uiformatters.py | 12 +- 11 files changed, 353 insertions(+), 117 deletions(-) diff --git a/specifyweb/backend/redis_cache/connect.py b/specifyweb/backend/redis_cache/connect.py index b40df1698b4..571f47a2b3f 100644 --- a/specifyweb/backend/redis_cache/connect.py +++ b/specifyweb/backend/redis_cache/connect.py @@ -21,44 +21,82 @@ def __init__(self, decode_responses=self.decode_responses ) + def delete(self, key: str): + return self.connection.delete(key) -class RedisDataType(RedisConnection): - @classmethod - def from_connection(cls, connection: RedisConnection): - return cls( - host=connection.host, - port=connection.port, - db_index=connection.db_index, - decode_responses=connection.decode_responses - ) +class RedisDataType: + def __init__(self, established: RedisConnection) -> None: + self._established = established + + @property + def connection(self): + return self._established.connection + + def delete(self, key: str): + return self._established.delete(key) class RedisList(RedisDataType): """ See https://redis.io/docs/latest/develop/data-types/lists/ """ - def lpush(self, key: str, value) -> int: + def left_push(self, key: str, value) -> int: return self.connection.lpush(key, value) - def rpush(self, key: str, value) -> int: + def right_push(self, key: str, value) -> int: return self.connection.rpush(key, value) - def rpop(self, key: str) -> str | bytes | None: + def right_pop(self, key: str) -> str | bytes | None: return self.connection.rpop(key) - def lpop(self, key: str) -> str | bytes | None: + def left_pop(self, key: str) -> str | bytes | None: return self.connection.lpop(key) - def llen(self, key: str) -> int: + def length(self, key: str) -> int: return self.connection.llen(key) - def lrange(self, key: str, start_index: int, end_index: int) -> list[str] | list[bytes]: + def range(self, key: str, start_index: int, end_index: int) -> list[str] | list[bytes]: return self.connection.lrange(key, start_index, end_index) - def ltrim(self, key: str, start_index: int, end_index: int) -> list[str] | list[bytes]: + def trim(self, key: str, start_index: int, end_index: int) -> list[str] | list[bytes]: return self.connection.ltrim(key, start_index, end_index) + + def blocking_left_pop(self, key: str, timeout: int) -> str | bytes | None: + response = self.connection.blpop(key, timeout=timeout) + if response is None: + return None + _filled_list_key, item = response + return item + +class RedisSet(RedisDataType): + """ + See https://redis.io/docs/latest/develop/data-types/sets/ + """ + def add(self, key: str, *values: str) -> int: + return self.connection.sadd(key, *values) + + def is_member(self, key: str, value: str) -> bool: + is_member = int(self.connection.sismember(key, value)) + return is_member == 1 + + def remove(self, key: str, value: str): + return self.connection.srem(key, value) + + def size(self, key: str) -> int: + return self.connection.scard(key) + + def members(self, key: str) -> set[str]: + return self.connection.smembers(key) + + def union(self, *keys: str) -> set[str]: + return self.connection.sunion(*keys) + + def intersection(self, *keys: str) -> set[str]: + return self.connection.sinter(*keys) + def difference(self, *keys: str) -> set[str]: + return self.connection.sdiff(*keys) class RedisString(RedisDataType): """ diff --git a/specifyweb/backend/redis_cache/rqueue.py b/specifyweb/backend/redis_cache/rqueue.py index 5d57f76da5f..2e1b1d38206 100644 --- a/specifyweb/backend/redis_cache/rqueue.py +++ b/specifyweb/backend/redis_cache/rqueue.py @@ -1,25 +1,61 @@ -from typing import Callable, Generator, cast +import json -from specifyweb.backend.redis_cache.connect import RedisList +from typing import Callable, Generator, Iterable, cast +from specifyweb.backend.redis_cache.connect import RedisConnection, RedisList, RedisSet + +type Serialized = str | bytes | bytearray type Serializer[T] = Callable[[T], str] +type Deserializer[T] = Callable[[Serialized], T] def default_serializer(obj) -> str: return str(obj) -class RQueue[T]: - def __init__(self, connection: RedisList, key: str, - serializer: Serializer[T] | None = None, key_prefix: str = "specify"): - self.connection = connection +def default_deserializer(serialized: Serialized): + return serialized + +class RedisQueue[T]: + def __init__(self, connection: RedisConnection, key: str, + serializer: Serializer[T] | None = None, + deserializer: Deserializer[T] | None = None): + self.connection = RedisList(connection) self.key = key self.serializer = serializer or cast(Serializer[T], default_serializer) - - def push(self, *objs: T) -> int: - return self.connection.rpush(self.key, *self._serialize_objs(*objs)) - - def pop(self) -> str | bytes | None: - return self.connection.lpop(self.key) + self.deserializer = deserializer or cast(Deserializer[T], default_deserializer) + + def key_name(self, *name_parts: str | None): + key_name = "_".join([self.key, *(part for part in name_parts if part is not None)]) + return key_name + + def push(self, *objs: T, sub_key: str | None = None) -> int: + key_name = self.key_name(sub_key) + return self.connection.right_push(key_name, *self._serialize_objs(*objs)) + + def pop(self, sub_key: str | None = None) -> T | None: + key_name = self.key_name(sub_key) + popped = self.connection.left_pop(key_name) + if popped is None: + return None + return self.deserializer(popped) + + def wait_and_pop(self, timeout: int = 0, sub_key: str | None = None) -> T: + key_name = self.key_name(sub_key) + popped = self.connection.blocking_left_pop(key_name, timeout) + if popped is None: + raise TimeoutError("No items in queue after timeout") + return self.deserializer(popped) + + def peek(self, sub_key: str | None = None) -> T | None: + key_name = self.key_name(sub_key) + top_value = self._deserialize_objs(*self.connection.range(key_name, 0, 0)) + if len(top_value) == 0: + return None + return top_value[0] def _serialize_objs(self, *objs: T) -> Generator[str, None, None]: return (self.serializer(obj) for obj in objs) + + def _deserialize_objs(self, *serialized: Serialized): + return tuple(self.deserializer(obj) for obj in serialized) + diff --git a/specifyweb/backend/workbench/upload/scope_context.py b/specifyweb/backend/workbench/upload/scope_context.py index 32458f2f3ad..dec68e0668a 100644 --- a/specifyweb/backend/workbench/upload/scope_context.py +++ b/specifyweb/backend/workbench/upload/scope_context.py @@ -2,7 +2,6 @@ from typing import Any, TypedDict from specifyweb.specify.utils.uiformatters import UIFormatter -from specifyweb.specify.models_utils.lock_tables import LockDispatcher # This stores some info we can reuse for caching. If this is empty, logic doesn't change, just it is slower. # IMPORTANT: In the current implementation, if there are no CRs or COTypes, it _automatically_ caches scoped upload plan @@ -23,7 +22,6 @@ class ScopeContext: def __init__(self): self.cache = {} - self.lock_dispatcher: LockDispatcher | None = None self.cache['cotypes'] = {} self.cache['date_format'] = None self.cache['fields'] = {} @@ -37,9 +35,6 @@ def set_is_variable(self): # for more info. We don't bother calling this function when we know we aren't variable so we # don't need any parameter to this function. self._is_variable = True - - def set_lock_dispatcher(self, dispatcher: LockDispatcher): - self.lock_dispatcher = dispatcher @property def is_variable(self): diff --git a/specifyweb/backend/workbench/upload/scoping.py b/specifyweb/backend/workbench/upload/scoping.py index f6ed44b1a48..5f21464dde0 100644 --- a/specifyweb/backend/workbench/upload/scoping.py +++ b/specifyweb/backend/workbench/upload/scoping.py @@ -1,11 +1,12 @@ from functools import reduce -from typing import Any, cast +from typing import Any, Callable, cast from collections.abc import Callable from specifyweb.specify.datamodel import datamodel, Table, is_tree_table from specifyweb.specify.utils.func import CustomRepr +from specifyweb.specify.utils.autonumbering import AutonumberingLockDispatcher from specifyweb.specify.models_utils.load_datamodel import DoesNotExistError from specifyweb.specify import models from specifyweb.backend.trees.utils import get_default_treedef @@ -113,7 +114,8 @@ def extend_columnoptions( fieldname: str, row: Row | None = None, toOne: dict[str, Uploadable] | None = None, - context: ScopeContext | None = None + context: ScopeContext | None = None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> ExtendedColumnOptions: context = context or ScopeContext() @@ -125,7 +127,7 @@ def extend_columnoptions( ui_formatter = get_or_defer_formatter(collection, tablename, fieldname, row, toOne, context) scoped_formatter = ( - None if ui_formatter is None else ui_formatter.apply_scope(collection, context.lock_dispatcher) + None if ui_formatter is None else ui_formatter.apply_scope(collection, lock_dispatcher) ) if tablename.lower() == "collectionobjecttype" and fieldname.lower() == "name": @@ -258,7 +260,8 @@ def apply_scoping_to_uploadtable( ut: UploadTable, collection, context: ScopeContext | None = None, - row=None + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> ScopedUploadTable: # IMPORTANT: # before this comment, collection is untrusted and unreliable @@ -301,7 +304,14 @@ def _backref(key): scoped_table = ScopedUploadTable( name=ut.name, wbcols={ - f: extend_columnoptions(colopts, collection, table.name, f, row, ut.toOne, context) + f: extend_columnoptions(colopts, + collection, + table.name, + f, + row, + ut.toOne, + context, + lock_dispatcher=lock_dispatcher) for f, colopts in ut.wbcols.items() }, static=ut.static, @@ -349,7 +359,10 @@ def set_order_number( return tmr._replace(strong_ignore=[*tmr.strong_ignore, *to_ignore]) -def apply_scoping_to_treerecord(tr: TreeRecord, collection, context: ScopeContext | None = None) -> ScopedTreeRecord: +def apply_scoping_to_treerecord(tr: TreeRecord, + collection, + context: ScopeContext | None = None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None) -> ScopedTreeRecord: table = datamodel.get_table_strict(tr.name) treedef = get_default_treedef(table, collection) @@ -378,7 +391,11 @@ def apply_scoping_to_treerecord(tr: TreeRecord, collection, context: ScopeContex else r._replace(treedef_id=treedef.id) if r.treedef_id is None # Adjust treeid for parsed JSON plans else r ): { - f: extend_columnoptions(colopts, collection, table.name, f) + f: extend_columnoptions(colopts, + collection, + table.name, + f, + lock_dispatcher=lock_dispatcher) for f, colopts in cols.items() } for r, cols in tr.ranks.items() diff --git a/specifyweb/backend/workbench/upload/treerecord.py b/specifyweb/backend/workbench/upload/treerecord.py index ffd2c250f8c..f12a74405bf 100644 --- a/specifyweb/backend/workbench/upload/treerecord.py +++ b/specifyweb/backend/workbench/upload/treerecord.py @@ -3,13 +3,14 @@ """ import logging -from typing import Any, NamedTuple, Union, Optional +from typing import Any, NamedTuple, Union, Optional, Callable from django.db import transaction, IntegrityError from typing_extensions import TypedDict from specifyweb.backend.businessrules.exceptions import BusinessRuleException from specifyweb.specify import models +from specifyweb.specify.utils.autonumbering import AutonumberingLockDispatcher from specifyweb.backend.workbench.upload.clone import clone_record from specifyweb.backend.workbench.upload.predicates import ( SPECIAL_TREE_FIELDS_TO_SKIP, @@ -149,11 +150,15 @@ class TreeRecord(NamedTuple): ranks: dict[str | TreeRankRecord, dict[str, ColumnOptions]] def apply_scoping( - self, collection, context: ScopeContext | None = None, row=None + self, + collection, + context: ScopeContext | None = None, + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> "ScopedTreeRecord": from .scoping import apply_scoping_to_treerecord as apply_scoping - return apply_scoping(self, collection, context) + return apply_scoping(self, collection, context, lock_dispatcher=lock_dispatcher) def get_cols(self) -> set[str]: return { @@ -464,9 +469,13 @@ def bind( class MustMatchTreeRecord(TreeRecord): def apply_scoping( - self, collection, context: ScopeContext | None = None, row=None + self, + collection, + context: ScopeContext | None = None, + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> "ScopedMustMatchTreeRecord": - s = super().apply_scoping(collection, context, row) + s = super().apply_scoping(collection, context, row, lock_dispatcher=lock_dispatcher) return ScopedMustMatchTreeRecord(*s) diff --git a/specifyweb/backend/workbench/upload/upload.py b/specifyweb/backend/workbench/upload/upload.py index 71ae2b7f240..a0d0f59e071 100644 --- a/specifyweb/backend/workbench/upload/upload.py +++ b/specifyweb/backend/workbench/upload/upload.py @@ -351,11 +351,11 @@ def do_upload( savepoint("row upload") if allow_partial else no_savepoint() as _, AutonumberingLockDispatcher() as autonum_dispatcher ): + get_lock_dispatcher = lambda: autonum_dispatcher + # the fact that upload plan is cachable, is invariant across rows. # so, we just apply scoping once. Honestly, see if it causes enough overhead to even warrant caching - scope_context.set_lock_dispatcher(autonum_dispatcher) - if has_attachments(row): # If there's an attachments column, add attachments to upload plan attachments_valid, result = validate_attachment(row, upload_plan) # type: ignore @@ -364,9 +364,9 @@ def do_upload( cache = _cache raise Rollback("failed row") row, row_upload_plan = add_attachments_to_plan(row, upload_plan) # type: ignore - scoped_table = row_upload_plan.apply_scoping(collection, scope_context, row) + scoped_table = row_upload_plan.apply_scoping(collection, scope_context, row, lock_dispatcher=get_lock_dispatcher) elif cached_scope_table is None: - scoped_table = upload_plan.apply_scoping(collection, scope_context, row) + scoped_table = upload_plan.apply_scoping(collection, scope_context, row, lock_dispatcher=get_lock_dispatcher) if not scope_context.is_variable: # This forces every row to rescope when not variable cached_scope_table = scoped_table @@ -400,6 +400,8 @@ def do_upload( if result.contains_failure(): cache = _cache raise Rollback("failed row") + + autonum_dispatcher.commit_highest() toc = time.perf_counter() logger.info(f"finished upload of {len(results)} rows in {toc-tic}s") diff --git a/specifyweb/backend/workbench/upload/upload_table.py b/specifyweb/backend/workbench/upload/upload_table.py index 0328ecd850b..00709cb36ef 100644 --- a/specifyweb/backend/workbench/upload/upload_table.py +++ b/specifyweb/backend/workbench/upload/upload_table.py @@ -8,6 +8,7 @@ from specifyweb.specify import models from specifyweb.specify.utils.func import Func from specifyweb.specify.utils.field_change_info import FieldChangeInfo +from specifyweb.specify.utils.autonumbering import AutonumberingLockDispatcher from specifyweb.backend.workbench.upload.clone import clone_record from specifyweb.backend.workbench.upload.predicates import ( ContetRef, @@ -72,11 +73,12 @@ def apply_scoping( self, collection, context: ScopeContext | None = None, - row=None + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> "ScopedUploadTable": from .scoping import apply_scoping_to_uploadtable - return apply_scoping_to_uploadtable(self, collection, context, row) + return apply_scoping_to_uploadtable(self, collection, context, row, lock_dispatcher=lock_dispatcher) def get_cols(self) -> set[str]: return ( @@ -274,9 +276,13 @@ def bind( class OneToOneTable(UploadTable): def apply_scoping( - self, collection, context: ScopeContext | None = None, row=None + self, + collection, + context: ScopeContext | None = None, + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> "ScopedOneToOneTable": - s = super().apply_scoping(collection, context, row) + s = super().apply_scoping(collection, context, row, lock_dispatcher=lock_dispatcher) return ScopedOneToOneTable(*s) def to_json(self) -> dict: @@ -297,9 +303,13 @@ def bind( class MustMatchTable(UploadTable): def apply_scoping( - self, collection, context: ScopeContext | None = None, row=None + self, + collection, + context: ScopeContext | None = None, + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> "ScopedMustMatchTable": - s = super().apply_scoping(collection, context, row) + s = super().apply_scoping(collection, context, row, lock_dispatcher=lock_dispatcher) return ScopedMustMatchTable(*s) def to_json(self) -> dict: diff --git a/specifyweb/backend/workbench/upload/uploadable.py b/specifyweb/backend/workbench/upload/uploadable.py index 1fc5d93488c..d3444dd08e2 100644 --- a/specifyweb/backend/workbench/upload/uploadable.py +++ b/specifyweb/backend/workbench/upload/uploadable.py @@ -3,6 +3,7 @@ from typing_extensions import Protocol from specifyweb.backend.workbench.upload.predicates import DjangoPredicates, ToRemove +from specifyweb.specify.utils.autonumbering import AutonumberingLockDispatcher from specifyweb.backend.workbench.upload.scope_context import ScopeContext @@ -47,7 +48,8 @@ def apply_scoping( self, collection, context: ScopeContext | None = None, - row=None + row=None, + lock_dispatcher: Callable[[], AutonumberingLockDispatcher] | None = None ) -> "ScopedUploadable": ... def get_cols(self) -> set[str]: ... diff --git a/specifyweb/specify/models_utils/lock_tables.py b/specifyweb/specify/models_utils/lock_tables.py index 35cda94e9ef..2b3c3289856 100644 --- a/specifyweb/specify/models_utils/lock_tables.py +++ b/specifyweb/specify/models_utils/lock_tables.py @@ -3,6 +3,7 @@ from contextlib import contextmanager from typing import Iterable import logging +import json logger = logging.getLogger(__name__) LOCK_NAME_SEPARATOR = "_" @@ -23,6 +24,7 @@ def lock_tables(*tables): finally: cursor.execute('unlock tables') + @contextmanager def named_lock(raw_lock_name: str, timeout: int = 5, retry_attempts: int = 0): """ @@ -71,22 +73,25 @@ def named_lock(raw_lock_name: str, timeout: int = 5, retry_attempts: int = 0): retry_attempts -= 1 if acquired == False: - raise TimeoutError(f"Unable to acquire named lock: '{lock_name}'. Held by other connection") + raise TimeoutError( + f"Unable to acquire named lock: '{lock_name}'. Held by other connection") if acquired is None: - raise ConnectionError(f"Unable to acquire named lock: '{lock_name}'. The process might have run out of memory") + raise ConnectionError( + f"Unable to acquire named lock: '{lock_name}'. The process might have run out of memory") try: yield acquired finally: release_named_lock(lock_name) + def acquired_named_lock(lock_name: str, timeout: int) -> bool | None: """ Attempts to acquire a named lock in the database. Will wait for timeout seconds for the lock to be released if held by another connection. - + See https://mariadb.com/docs/server/reference/sql-functions/secondary-functions/miscellaneous-functions/get_lock - + :param lock_name: The name of the lock to acquire :type lock_name: str :param timeout: The time in seconds to wait for lock release if another @@ -112,6 +117,7 @@ def acquired_named_lock(lock_name: str, timeout: int) -> bool | None: return None + def release_named_lock(lock_name: str) -> bool | None: """ Attempt to release one instance of a held named lock. Note that multiple @@ -119,7 +125,7 @@ def release_named_lock(lock_name: str) -> bool | None: case each instance of the lock needs to be released separately. See https://mariadb.com/docs/server/reference/sql-functions/secondary-functions/miscellaneous-functions/release_lock - + :param lock_name: The name of the lock to attempt to release :type lock_name: str :return: returns True if one instance of the lock was sucessfully released, False @@ -140,27 +146,61 @@ def release_named_lock(lock_name: str) -> bool | None: return False return None + class Lock: def __init__(self, name: str, timeout: int): self.name = name self.timeout = timeout + @classmethod + def from_json_str(cls, string: str | bytes | bytearray): + deserialized = json.loads(string) + return cls( + deserialized["name"], + deserialized["timeout"] + ) + def acquire(self): acquired = acquired_named_lock(self.name, self.timeout) if acquired == False: - raise TimeoutError(f"Unable to acquire named lock: '{self.name}'. Held by other connection") + raise TimeoutError( + f"Unable to acquire named lock: '{self.name}'. Held by other connection") if acquired is None: - raise ConnectionError(f"Unable to acquire named lock: '{self.name}'. The process might have run out of memory") - + raise ConnectionError( + f"Unable to acquire named lock: '{self.name}'. The process might have run out of memory") + return acquired def release(self): released = release_named_lock(self.name) return released + @staticmethod + def serializer(lock: "Lock") -> str: + return lock.as_json_str() + + @staticmethod + def deserializer(lock_as_string: str | bytes | bytearray) -> "Lock": + return Lock.from_json_str(lock_as_string) + + def as_json(self): + return { + "name": self.name, + "timeout": self.timeout + } + + def as_json_str(self) -> str: + return json.dumps(self.as_json()) + + def __eq__(self, other): + if isinstance(other, Lock): + return self.name == other.name and self.timeout == other.timeout + return False + + class LockDispatcher: - def __init__(self, lock_prefix: str | None = None, case_sensitive_names = False): + def __init__(self, lock_prefix: str | None = None, case_sensitive_names=False): db_name = getattr(settings, "DATABASE_NAME") self.lock_prefix_parts: list[str] = [db_name] @@ -169,18 +209,25 @@ def __init__(self, lock_prefix: str | None = None, case_sensitive_names = False) self.case_sensitive_names = case_sensitive_names self.locks: dict[str, Lock] = dict() + self.in_context = False + + def close(self): + self.release_all() def __enter__(self): + self.in_context = True return self def __exit__(self, exc_type, exc_val, exc_tb): - self.release_all() + self.close() + self.in_context = False - def lock_name(self, name: str): - final_name = LOCK_NAME_SEPARATOR.join((*self.lock_prefix_parts, name.lower())) + def lock_name(self, *name_parts: str): + final_name = LOCK_NAME_SEPARATOR.join( + (*self.lock_prefix_parts, *name_parts)) return final_name.lower() if self.case_sensitive_names else final_name - + @contextmanager def lock_and_release(self, name: str, timeout: int = 5): try: @@ -188,22 +235,24 @@ def lock_and_release(self, name: str, timeout: int = 5): finally: self.release(name) - def acquire(self, name: str, timeout: int = 5): + def create_lock(self, name: str, timeout: int = 5): lock_name = self.lock_name(name) - if self.locks.get(lock_name) is not None: + return Lock(lock_name, timeout) + + def acquire(self, name: str, timeout: int = 5): + if self.locks.get(name) is not None: return - lock = Lock(lock_name, timeout) - self.locks[lock_name] = lock + lock = self.create_lock(name, timeout) + self.locks[name] = lock return lock.acquire() def release_all(self): - for lock in self.locks.values(): - lock.release() + for lock_name in list(self.locks.keys()): + self.release(lock_name) self.locks = dict() def release(self, name: str): - lock_name = self.lock_name(name) - lock = self.locks.pop(lock_name, None) + lock = self.locks.pop(name, None) if lock is None: return lock.release() diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 866f1c28a94..4ec85ce7b61 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -2,16 +2,19 @@ Autonumbering logic """ -from contextlib import contextmanager +from collections import defaultdict + +from django.db.models import Value, Case, When from .uiformatters import UIFormatter, get_uiformatters from ..models_utils.lock_tables import LockDispatcher import logging -from typing import Generator, Literal, Any +from typing import MutableMapping, Callable from collections.abc import Sequence from specifyweb.specify.utils.scoping import Scoping from specifyweb.specify.datamodel import datamodel +from specifyweb.backend.redis_cache.connect import RedisConnection, RedisString logger = logging.getLogger(__name__) @@ -32,52 +35,127 @@ def autonumber_and_save(collection, user, obj) -> None: logger.debug("no fields to autonumber for %s", obj) obj.save() + class AutonumberingLockDispatcher(LockDispatcher): def __init__(self): lock_prefix = "autonumbering" super().__init__(lock_prefix=lock_prefix, case_sensitive_names=False) -@contextmanager -def autonumbering_lock(table_name: str, timeout: int = 10) -> Generator[Literal[True] | None, Any, None]: + # We use Redis for IPC, to maintain the current "highest" autonumbering + # value for each table + field + self.redis = RedisConnection(decode_responses=True) + # Before the records are created within a transaction, they're stored + # locally within this dictonary + # The whole dictonary can be committed to Redis via commit_highest + # The key hierarchy is generally: + # table -> field -> collection = "highest value" + self.highest_in_flight: MutableMapping[str, MutableMapping[str, MutableMapping[int, str]]] = defaultdict( + lambda: defaultdict(lambda: defaultdict(str))) + + def __exit__(self, exc_type, exc_val, exc_tb): + super().__exit__(exc_type, exc_val, exc_tb) + + def highest_stored_value(self, table_name: str, field_name: str, collection_id: int) -> str | None: + key_name = self.lock_name( + table_name, field_name, "highest", str(collection_id)) + highest = RedisString(self.redis).get(key_name) + if isinstance(highest, bytes): + return highest.decode() + elif highest is None: + return None + return str(highest) + + def cache_highest(self, table_name: str, field_name: str, collection_id: int, value: str): + self.highest_in_flight[table_name.lower( + )][field_name.lower()][collection_id] = value + + def commit_highest(self): + for table_name, tables in self.highest_in_flight.items(): + for field_name, fields in tables.items(): + for collection, value in fields.items(): + self.set_highest_value( + table_name, field_name, collection, value) + self.highest_in_flight.clear() + + def set_highest_value(self, table_name: str, field_name: str, collection_id: int, value: str, time_to_live: int = 10): + key_name = self.lock_name( + table_name, field_name, "highest", str(collection_id)) + RedisString(self.redis).set(key_name, value, + time_to_live, override_existing=True) + + +def highest_autonumbering_value( + collection, + model, + formatter: UIFormatter, + values: Sequence[str], + get_lock_dispatcher: Callable[[], + AutonumberingLockDispatcher] | None = None, + wait_for_lock=10) -> str: """ - A convienent wrapper for the named_lock generator that adds the 'autonumber' - prefix to the table_name string argument for the resulting lock name. - - Raises a TimeoutError if timeout seconds have elapsed without acquiring the - lock, and a ConnectionError if the database was otherwise unable to acquire - the lock. - - Example: - ``` - try: - with autonumbering_lock('Collectionobject') as lock: - ... # do something - except TimeoutError: - ... # handle case when lock is held by other connection for > timeout - ``` - - :param table_name: The name of the table that is being autonumbered - :type table_name: str - :param timeout: The time in seconds to wait for lock release if another - connection holds the lock - :type timeout: int - :return: yields True if the lock was obtained successfully and None - otherwise - :rtype: Generator[Literal[True] | None, Any, None] + Retrieves the next highest number in the autonumbering sequence for a given + autonumbering field format """ - with AutonumberingLockDispatcher() as locks: - locks.acquire(name=table_name.lower(), timeout=timeout) - yield + + if not formatter.needs_autonumber(values): + raise ValueError( + f"Formatter {formatter.format_name} does not need need autonumbered with {values}") + + if get_lock_dispatcher is not None: + lock_dispatcher = get_lock_dispatcher() + lock_dispatcher.acquire(model._meta.db_table, timeout=wait_for_lock) + + field_name = formatter.field_name.lower() + + stored_highest_value = (lock_dispatcher.highest_stored_value( + model._meta.db_table, field_name, collection.id) + if lock_dispatcher is not None else None) + + with_year = formatter.fillin_year(values) + largest_in_database = formatter._autonumber_queryset(collection, model, field_name, with_year).annotate( + greater_than_stored=Case( + When(**{field_name + '__gt': stored_highest_value}, + then=Value(True)), + default=Value(False)) + if stored_highest_value is not None + else Value(False)) + + if not largest_in_database.exists(): + if stored_highest_value is not None: + filled_values = formatter.fill_vals_after(stored_highest_value) + else: + filled_values = formatter.fill_vals_no_prior(with_year) + else: + largest = largest_in_database[0] + database_larger = largest.greater_than_stored + value_to_inc = (getattr(largest, field_name) + if database_larger or stored_highest_value is None + else stored_highest_value) + filled_values = formatter.fill_vals_after(value_to_inc) + + highest = ''.join(filled_values) + + if lock_dispatcher is not None and lock_dispatcher.in_context: + lock_dispatcher.cache_highest( + model._meta.db_table, field_name, collection.id, highest) + + return highest def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None: logger.debug("autonumbering %s fields: %s", obj, fields) - with autonumbering_lock(obj._meta.db_table): + with AutonumberingLockDispatcher() as locks: for formatter, vals in fields: - formatter.apply_autonumbering(collection, obj, vals) - + new_field_value = highest_autonumbering_value( + collection, + obj.__class__, + formatter, + vals, + get_lock_dispatcher=lambda: locks) + setattr(obj, formatter.field_name.lower(), new_field_value) obj.save() + locks.commit_highest() # REFACTOR: Remove this funtion as it is no longer used diff --git a/specifyweb/specify/utils/uiformatters.py b/specifyweb/specify/utils/uiformatters.py index 7b298d2061d..55119aff53b 100644 --- a/specifyweb/specify/utils/uiformatters.py +++ b/specifyweb/specify/utils/uiformatters.py @@ -18,7 +18,6 @@ logger = logging.getLogger(__name__) from specifyweb.backend.context.app_resource import get_app_resource -from specifyweb.specify.models_utils.lock_tables import LockDispatcher from specifyweb.specify.datamodel import Table from specifyweb.specify import models @@ -173,16 +172,17 @@ def fill_vals_no_prior(self, vals: Sequence[str]) -> list[str]: def canonicalize(self, values: Sequence[str]) -> str: return ''.join([field.canonicalize(value) for field, value in zip(self.fields, values)]) - def apply_scope(self, collection, lock_dispatcher: LockDispatcher | None) -> ScopedFormatter: + def apply_scope(self, collection, autonumbering_lock_dispatcher = None) -> ScopedFormatter: + from specifyweb.specify.utils.autonumbering import highest_autonumbering_value def parser(table: Table, value: str) -> str: parsed = self.parse(value) if self.needs_autonumber(parsed): - if lock_dispatcher is not None: - lock_dispatcher.acquire(table.name, timeout=10) - canonicalized = self.autonumber_now( + canonicalized = highest_autonumbering_value( collection, getattr(models, table.django_name), - parsed + self, + parsed, + get_lock_dispatcher=autonumbering_lock_dispatcher ) else: canonicalized = self.canonicalize(parsed) From 8fbec8ba2771ce9af401de5cb0e4d0c9e6c15e83 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 29 Jan 2026 19:43:09 -0600 Subject: [PATCH 19/38] feat: allow getting scope from model classes --- .../businessrules/rules/attachment_rules.py | 2 +- specifyweb/specify/tests/test_api.py | 12 +- specifyweb/specify/utils/autonumbering.py | 2 +- specifyweb/specify/utils/scoping.py | 196 ++++++++++++------ 4 files changed, 143 insertions(+), 69 deletions(-) diff --git a/specifyweb/backend/businessrules/rules/attachment_rules.py b/specifyweb/backend/businessrules/rules/attachment_rules.py index 991fc0ac704..cf7495b5b76 100644 --- a/specifyweb/backend/businessrules/rules/attachment_rules.py +++ b/specifyweb/backend/businessrules/rules/attachment_rules.py @@ -30,7 +30,7 @@ def attachment_jointable_save(sender, obj): attachee = get_attachee(obj) obj.attachment.tableid = attachee.specify_model.tableId - scopetype, scope = Scoping(attachee)() + scopetype, scope = Scoping.from_instance(attachee) obj.attachment.scopetype, obj.attachment.scopeid = scopetype, scope.id obj.attachment.save() diff --git a/specifyweb/specify/tests/test_api.py b/specifyweb/specify/tests/test_api.py index f6be54f4984..fb0438ec26d 100644 --- a/specifyweb/specify/tests/test_api.py +++ b/specifyweb/specify/tests/test_api.py @@ -1091,7 +1091,7 @@ def test_explicitly_defined_scope(self): accessionnumber="ACC_Test", division=self.division ) - accession_scope = scoping.Scoping(accession).get_scope_model() + accession_scope = scoping.Scoping.model_from_instance(accession) self.assertEqual(accession_scope.id, self.institution.id) loan = Loan.objects.create( @@ -1099,14 +1099,14 @@ def test_explicitly_defined_scope(self): discipline=self.other_discipline ) - loan_scope = scoping.Scoping(loan).get_scope_model() + loan_scope = scoping.Scoping.model_from_instance(loan) self.assertEqual(loan_scope.id, self.other_discipline.id) def test_infered_scope(self): disposal = Disposal.objects.create( disposalnumber = "DISPOSAL_TEST" ) - disposal_scope = scoping.Scoping(disposal).get_scope_model() + disposal_scope = scoping.Scoping.model_from_instance(disposal) self.assertEqual(disposal_scope.id, self.institution.id) loan = Loan.objects.create( @@ -1114,12 +1114,12 @@ def test_infered_scope(self): division=self.other_division, discipline=self.other_discipline, ) - inferred_loan_scope = scoping.Scoping(loan)._infer_scope()[1] + inferred_loan_scope = scoping.Scoping.model_from_instance(loan) self.assertEqual(inferred_loan_scope.id, self.other_division.id) - collection_object_scope = scoping.Scoping( + collection_object_scope = scoping.Scoping.model_from_instance( self.collectionobjects[0] - ).get_scope_model() + ) self.assertEqual(collection_object_scope.id, self.collection.id) def test_in_same_scope(self): diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 4ec85ce7b61..99e87088707 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -164,7 +164,7 @@ def get_tables_to_lock(collection, obj, field_names) -> set[str]: from specifyweb.backend.businessrules.models import UniquenessRule obj_table = obj._meta.db_table - scope_table = Scoping(obj).get_scope_model() + scope_table = Scoping.model_from_instance(obj) tables = {obj._meta.db_table, 'django_migrations', UniquenessRule._meta.db_table, 'discipline', scope_table._meta.db_table} diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 285cf6e6858..0aa9397ca7a 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -1,84 +1,155 @@ +from inspect import isclass -from collections import namedtuple -from typing import Tuple +from enum import Enum from django.db.models import Model from django.core.exceptions import ObjectDoesNotExist from .. import models -class ScopeType: +class ScopeType(Enum): COLLECTION = 0 DISCIPLINE = 1 DIVISION = 2 INSTITUTION = 3 GLOBAL = 10 - -class Scoping(namedtuple('Scoping', 'obj')): - def __call__(self) -> tuple[int, Model]: - """ - Returns the ScopeType and related Model instance of the - hierarchical position the `obj` occupies. - Tries and infers the scope based on the fields/relationships - on the model, and resolves the 'higher' scope before a more - specific scope if applicable for the object - """ - table = self.obj.__class__.__name__.lower() + def from_model(obj) -> "ScopeType": + clazz = obj.__class__ + + if clazz is models.Institution: + return ScopeType.INSTITUTION + if clazz is models.Division: + return ScopeType.DIVISION + if clazz is models.Discipline: + return ScopeType.DISCIPLINE + if clazz is models.Collection: + return ScopeType.COLLECTION + raise TypeError(f"{clazz.__name__} is not a hierarchy table") + +class ModelClassScope: + def __init__(self, model_class): + if not isclass(model_class): + raise TypeError(f"model_class: {model_class} is not a class!") + self.model_class = model_class + + @property + def scope_type(self) -> ScopeType: + table = self.model_class.__name__.lower() scope = getattr(self, table, lambda: None)() if scope is None: return self._infer_scope() - return scope - def get_scope_model(self) -> Model: - return self.__call__()[1] - - -################################################################################ - - def accession(self): institution = models.Institution.objects.get() if institution.isaccessionsglobal: - return ScopeType.INSTITUTION, institution + return ScopeType.INSTITUTION else: - return self._simple_division_scope() + return ScopeType.DIVISION - def conservevent(self): return Scoping(self.obj.conservdescription)() + def conservevent(self): return ModelClassScope(models.Conservdescription).scope_type - def fieldnotebookpage(self): return Scoping(self.obj.pageset)() + def fieldnotebookpage(self): return ModelClassScope(models.Fieldnotebookpageset).scope_type - def fieldnotebookpageset(self): return Scoping(self.obj.fieldnotebook)() + def fieldnotebookpageset(self): return ModelClassScope(models.Fieldnotebook).scope_type - def gift(self): - if has_related(self.obj, 'discipline'): - return self._simple_discipline_scope() + def gift(self): return ScopeType.DISCIPLINE - def loan(self): - if has_related(self.obj, 'discipline'): - return self._simple_discipline_scope() + def loan(self): return ScopeType.DISCIPLINE def permit(self): - if has_related(self.obj, 'institution'): - return ScopeType.INSTITUTION, self.obj.institution + return ScopeType.INSTITUTION def referencework(self): - if has_related(self.obj, 'institution'): - return ScopeType.INSTITUTION, self.obj.institution + return ScopeType.INSTITUTION def taxon(self): - return ScopeType.DISCIPLINE, self.obj.definition.discipline + ScopeType.DISCIPLINE ############################################################################# - def _simple_discipline_scope(self) -> tuple[int, Model]: - return ScopeType.DISCIPLINE, self.obj.discipline + def _infer_scope(self): + if hasattr(self.model_class, "division"): + return ScopeType.DIVISION + if hasattr(self.model_class, "discipline"): + return ScopeType.DISCIPLINE + if hasattr(self.model_class, "collectionmemberid") or hasattr(self.model_class, "collection"): + return ScopeType.COLLECTION + + return ScopeType.INSTITUTION + +class ModelInstanceScope: + def __init__(self, model_instance): + if isclass(model_instance): + raise ValueError(f"Expected object instead instead of class") + self.obj = model_instance + + @property + def scope_type(self) -> ScopeType: + return self._infer_scope_type() + + @property + def scope_model(self) -> Model: + return self._infer_scope_model() + + def _infer_scope_model(self) -> Model: + table = self.obj.__class__.__name__.lower() + scope = getattr(self, table, lambda: None)() + if scope is None: + return self._infer_scope() + + return scope + + def _infer_scope_type(self) -> ScopeType: + scope_obj = self._infer_scope_model() + return ScopeType.from_model(scope_obj) + + def accession(self) -> Model: + institution = models.Institution.objects.get() + if institution.isaccessionsglobal: + return models.Institution.objects.get() + return self.obj.division + + def conservevent(self) -> Model: + return ModelInstanceScope(self.obj.conservdescription).scope_model + + def fieldnotebookpage(self) -> Model: + return ModelInstanceScope(self.obj.pageset).scope_model + + def fieldnotebookpageset(self) -> Model: + return ModelInstanceScope(self.obj.fieldnotebook).scope_model + + def gift(self) -> Model: + if has_related(self.obj, "discipline"): + return self.obj.discipline + + def loan(self) -> Model: + if has_related(self.obj, 'discipline'): + return self.obj.discipline + + def permit(self) -> Model: + if has_related(self.obj, 'institution'): + return self.obj.institution + + def referencework(self) -> Model: + if has_related(self.obj, 'institution'): + return self.obj.institution - def _simple_division_scope(self) -> tuple[int, Model]: - return ScopeType.DIVISION, self.obj.division + def taxon(self) -> Model: + return self.obj.definition.discipline + + def _infer_scope_model(self) -> Model: + if has_related(self.obj, "division"): + return self.obj.division + if has_related(self.obj, "discipline"): + return self.obj.discipline + if has_related(self.obj, "collectionmemberid") or has_related(self.obj, "collection"): + return self._simple_collection_scope() - def _simple_collection_scope(self) -> tuple[int, Model]: + return models.Institution.objects.get() + + def _simple_collection_scope(self) -> Model: if hasattr(self.obj, "collectionmemberid"): try: """ @@ -95,22 +166,25 @@ def _simple_collection_scope(self) -> tuple[int, Model]: else: collection = self.obj.collection - return ScopeType.COLLECTION, collection + return collection - def _infer_scope(self): - if has_related(self.obj, "division"): - return self._simple_division_scope() - if has_related(self.obj, "discipline"): - return self._simple_discipline_scope() - if has_related(self.obj, "collectionmemberid") or has_related(self.obj, "collection"): - return self._simple_collection_scope() +class Scoping: + def __init__(self): + pass - return self._default_institution_scope() - - # If the table has no scope, and scope can not be inferred then scope to institution - def _default_institution_scope(self) -> tuple[int, Model]: - institution = models.Institution.objects.get() - return ScopeType.INSTITUTION, institution + @staticmethod + def from_model(model) -> ScopeType: + return ModelClassScope(model).scope_type + + @staticmethod + def from_instance(obj: Model) -> tuple[ScopeType, Model]: + instance = ModelInstanceScope(obj) + return instance.scope_type, instance.scope_model + + @staticmethod() + def model_from_instance(obj: Model) -> Model: + instance = ModelInstanceScope(obj) + return instance.scope_model def has_related(model_instance, field_name: str) -> bool: @@ -122,14 +196,14 @@ def in_same_scope(object1: Model, object2: Model) -> bool: Determines whether two Model Objects are in the same scope. Travels up the scoping heirarchy until a matching scope can be resolved """ - scope1_type, scope1 = Scoping(object1)() - scope2_type, scope2 = Scoping(object2)() + scope1_type, scope1 = Scoping.from_instance(object1) + scope2_type, scope2 = Scoping.from_instance(object2) if scope1_type > scope2_type: while scope2_type != scope1_type: - scope2_type, scope2 = Scoping(scope2)() + scope2_type, scope2 = Scoping.from_instance(scope2) elif scope1_type < scope2_type: while scope2_type != scope1_type: - scope1_type, scope1 = Scoping(scope1)() + scope1_type, scope1 = Scoping.from_instance(scope1) return scope1.id == scope2.id From 3870a0fd962bbf8e8c3800949c935b3d9dabb02b Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 29 Jan 2026 19:50:13 -0600 Subject: [PATCH 20/38] feat: add more scoping tables --- specifyweb/specify/utils/scoping.py | 33 ++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 0aa9397ca7a..db775f7db7d 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -65,7 +65,23 @@ def referencework(self): return ScopeType.INSTITUTION def taxon(self): - ScopeType.DISCIPLINE + return ScopeType.DISCIPLINE + + def geography(self): + return ScopeType.DISCIPLINE + + def geologictimeperiod(self): + return ScopeType.DISCIPLINE + + def lithostrat(self): + return ScopeType.DISCIPLINE + + def tectonicunit(self): + return ScopeType.DISCIPLINE + + def storage(self): + return ScopeType.INSTITUTION + ############################################################################# @@ -138,6 +154,21 @@ def referencework(self) -> Model: def taxon(self) -> Model: return self.obj.definition.discipline + + def geography(self): + return self.obj.definition.discipline + + def geologictimeperiod(self): + return self.obj.definition.discipline + + def lithostrat(self): + return self.obj.definition.discipline + + def tectonicunit(self): + return self.obj.definition.discipline + + def storage(self): + return self.obj.definition.institution def _infer_scope_model(self) -> Model: if has_related(self.obj, "division"): From e2edb111afae16899171ad99b2da9e1cfada553f Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 29 Jan 2026 20:13:36 -0600 Subject: [PATCH 21/38] fix: properly respect scope when setting autonumbering keys --- specifyweb/specify/utils/autonumbering.py | 62 +++++++++++++++++------ specifyweb/specify/utils/scoping.py | 59 +++++++++++++-------- 2 files changed, 83 insertions(+), 38 deletions(-) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 99e87088707..6a9d7b4bc61 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -12,7 +12,7 @@ from typing import MutableMapping, Callable from collections.abc import Sequence -from specifyweb.specify.utils.scoping import Scoping +from specifyweb.specify.utils.scoping import Scoping, ScopeType from specifyweb.specify.datamodel import datamodel from specifyweb.backend.redis_cache.connect import RedisConnection, RedisString @@ -48,16 +48,20 @@ def __init__(self): # locally within this dictonary # The whole dictonary can be committed to Redis via commit_highest # The key hierarchy is generally: - # table -> field -> collection = "highest value" - self.highest_in_flight: MutableMapping[str, MutableMapping[str, MutableMapping[int, str]]] = defaultdict( - lambda: defaultdict(lambda: defaultdict(str))) + # table -> field -> scope -> scope_id = "highest value" + self.highest_in_flight: MutableMapping[str, MutableMapping[str, MutableMapping[str, MutableMapping[int, str]]]] = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(str)))) def __exit__(self, exc_type, exc_val, exc_tb): super().__exit__(exc_type, exc_val, exc_tb) - def highest_stored_value(self, table_name: str, field_name: str, collection_id: int) -> str | None: - key_name = self.lock_name( - table_name, field_name, "highest", str(collection_id)) + def highest_stored_value(self, + table_name: str, + field_name: str, + scope_type: ScopeType, + scope_id: int) -> str | None: + key_name = self.autonumbering_redis_key( + table_name, field_name, scope_type, scope_id) highest = RedisString(self.redis).get(key_name) if isinstance(highest, bytes): return highest.decode() @@ -65,24 +69,47 @@ def highest_stored_value(self, table_name: str, field_name: str, collection_id: return None return str(highest) - def cache_highest(self, table_name: str, field_name: str, collection_id: int, value: str): + def cache_highest(self, + table_name: str, + field_name: str, + scope_type: ScopeType, + scope_id: int, + value: str): self.highest_in_flight[table_name.lower( - )][field_name.lower()][collection_id] = value + )][field_name.lower()][scope_type.name][scope_id] = value def commit_highest(self): for table_name, tables in self.highest_in_flight.items(): for field_name, fields in tables.items(): - for collection, value in fields.items(): - self.set_highest_value( - table_name, field_name, collection, value) + for scope_type, scope_ids in fields.items(): + for scope_id, value in scope_ids.items(): + self.set_highest_value( + table_name, field_name, scope_type, scope_id, value) self.highest_in_flight.clear() - def set_highest_value(self, table_name: str, field_name: str, collection_id: int, value: str, time_to_live: int = 10): - key_name = self.lock_name( - table_name, field_name, "highest", str(collection_id)) + def set_highest_value(self, + table_name: str, + field_name: str, + scope_type: ScopeType, + scope_id: int, + value: str, + time_to_live: int = 10): + key_name = self.autonumbering_redis_key( + table_name, field_name, scope_type, scope_id) RedisString(self.redis).set(key_name, value, time_to_live, override_existing=True) + def autonumbering_redis_key(self, + table_name: str, + field_name: str, + scope_type: ScopeType, + scope_id: int): + return self.lock_name(table_name, + field_name, + "highest", + scope_type.name, + str(scope_id)) + def highest_autonumbering_value( collection, @@ -107,8 +134,11 @@ def highest_autonumbering_value( field_name = formatter.field_name.lower() + scope_type = Scoping.from_model(model) + hierarchy_model = Scoping.get_hierarchy_model(collection, scope_type) + stored_highest_value = (lock_dispatcher.highest_stored_value( - model._meta.db_table, field_name, collection.id) + model._meta.db_table, field_name, scope_type, hierarchy_model.id) if lock_dispatcher is not None else None) with_year = formatter.fillin_year(values) diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index db775f7db7d..da9fbf15932 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -27,6 +27,7 @@ def from_model(obj) -> "ScopeType": return ScopeType.COLLECTION raise TypeError(f"{clazz.__name__} is not a hierarchy table") + class ModelClassScope: def __init__(self, model_class): if not isclass(model_class): @@ -48,11 +49,14 @@ def accession(self): else: return ScopeType.DIVISION - def conservevent(self): return ModelClassScope(models.Conservdescription).scope_type + def conservevent(self): return ModelClassScope( + models.Conservdescription).scope_type - def fieldnotebookpage(self): return ModelClassScope(models.Fieldnotebookpageset).scope_type + def fieldnotebookpage(self): return ModelClassScope( + models.Fieldnotebookpageset).scope_type - def fieldnotebookpageset(self): return ModelClassScope(models.Fieldnotebook).scope_type + def fieldnotebookpageset(self): return ModelClassScope( + models.Fieldnotebook).scope_type def gift(self): return ScopeType.DISCIPLINE @@ -69,16 +73,16 @@ def taxon(self): def geography(self): return ScopeType.DISCIPLINE - + def geologictimeperiod(self): return ScopeType.DISCIPLINE - + def lithostrat(self): return ScopeType.DISCIPLINE - + def tectonicunit(self): return ScopeType.DISCIPLINE - + def storage(self): return ScopeType.INSTITUTION @@ -95,6 +99,7 @@ def _infer_scope(self): return ScopeType.INSTITUTION + class ModelInstanceScope: def __init__(self, model_instance): if isclass(model_instance): @@ -116,7 +121,7 @@ def _infer_scope_model(self) -> Model: return self._infer_scope() return scope - + def _infer_scope_type(self) -> ScopeType: scope_obj = self._infer_scope_model() return ScopeType.from_model(scope_obj) @@ -129,25 +134,25 @@ def accession(self) -> Model: def conservevent(self) -> Model: return ModelInstanceScope(self.obj.conservdescription).scope_model - + def fieldnotebookpage(self) -> Model: return ModelInstanceScope(self.obj.pageset).scope_model - + def fieldnotebookpageset(self) -> Model: return ModelInstanceScope(self.obj.fieldnotebook).scope_model - + def gift(self) -> Model: if has_related(self.obj, "discipline"): return self.obj.discipline - + def loan(self) -> Model: if has_related(self.obj, 'discipline'): return self.obj.discipline - + def permit(self) -> Model: if has_related(self.obj, 'institution'): return self.obj.institution - + def referencework(self) -> Model: if has_related(self.obj, 'institution'): return self.obj.institution @@ -157,19 +162,19 @@ def taxon(self) -> Model: def geography(self): return self.obj.definition.discipline - + def geologictimeperiod(self): return self.obj.definition.discipline - + def lithostrat(self): return self.obj.definition.discipline - + def tectonicunit(self): return self.obj.definition.discipline - + def storage(self): return self.obj.definition.institution - + def _infer_scope_model(self) -> Model: if has_related(self.obj, "division"): return self.obj.division @@ -179,7 +184,7 @@ def _infer_scope_model(self) -> Model: return self._simple_collection_scope() return models.Institution.objects.get() - + def _simple_collection_scope(self) -> Model: if hasattr(self.obj, "collectionmemberid"): try: @@ -199,6 +204,7 @@ def _simple_collection_scope(self) -> Model: return collection + class Scoping: def __init__(self): pass @@ -206,17 +212,26 @@ def __init__(self): @staticmethod def from_model(model) -> ScopeType: return ModelClassScope(model).scope_type - + @staticmethod def from_instance(obj: Model) -> tuple[ScopeType, Model]: instance = ModelInstanceScope(obj) return instance.scope_type, instance.scope_model - + @staticmethod() def model_from_instance(obj: Model) -> Model: instance = ModelInstanceScope(obj) return instance.scope_model + @staticmethod + def get_hierarchy_model(collection, scope_type: ScopeType) -> Model: + steps = [ScopeType.COLLECTION, ScopeType.DISCIPLINE, + ScopeType.DIVISION, ScopeType.INSTITUTION] + num_steps = steps.index(scope_type) + model = collection + for _ in range(num_steps): + model = Scoping.model_from_instance(model) + return model def has_related(model_instance, field_name: str) -> bool: return hasattr(model_instance, field_name) and getattr(model_instance, field_name, None) is not None From d48a7a0401bedb5f96b8ccd90eea7468e0e6e63f Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 29 Jan 2026 20:37:46 -0600 Subject: [PATCH 22/38] fix: resolve typo in static method decorator --- specifyweb/specify/utils/scoping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index da9fbf15932..f815e996836 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -218,7 +218,7 @@ def from_instance(obj: Model) -> tuple[ScopeType, Model]: instance = ModelInstanceScope(obj) return instance.scope_type, instance.scope_model - @staticmethod() + @staticmethod def model_from_instance(obj: Model) -> Model: instance = ModelInstanceScope(obj) return instance.scope_model From 957ba26a37d9bcc4104c6d2eb7692e6e9d0a1c3d Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 29 Jan 2026 20:52:46 -0600 Subject: [PATCH 23/38] fix: implement comparison operators for scope type --- specifyweb/specify/utils/autonumbering.py | 2 +- specifyweb/specify/utils/scoping.py | 27 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 6a9d7b4bc61..bf266d7b6a6 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -167,7 +167,7 @@ def highest_autonumbering_value( if lock_dispatcher is not None and lock_dispatcher.in_context: lock_dispatcher.cache_highest( - model._meta.db_table, field_name, collection.id, highest) + model._meta.db_table, field_name, scope_type, hierarchy_model.id, highest) return highest diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index f815e996836..0103885245b 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -27,6 +27,31 @@ def from_model(obj) -> "ScopeType": return ScopeType.COLLECTION raise TypeError(f"{clazz.__name__} is not a hierarchy table") + def __gt__(self, other): + if not isinstance(other, ScopeType): + return NotImplemented + return self.value > other.value + + def __ge__(self, other): + if not isinstance(other, ScopeType): + return NotImplemented + return self.value >= other.value + + def __lt__(self, other): + if not isinstance(other, ScopeType): + return NotImplemented + return self.value < other.value + + def __le__(self, other): + if not isinstance(other, ScopeType): + return NotImplemented + return self.value <= other.value + + def __eq__(self, other): + if not isinstance(other, ScopeType): + return NotImplemented + return self.value == other.value + class ModelClassScope: def __init__(self, model_class): @@ -89,6 +114,7 @@ def storage(self): ############################################################################# + def _infer_scope(self): if hasattr(self.model_class, "division"): return ScopeType.DIVISION @@ -233,6 +259,7 @@ def get_hierarchy_model(collection, scope_type: ScopeType) -> Model: model = Scoping.model_from_instance(model) return model + def has_related(model_instance, field_name: str) -> bool: return hasattr(model_instance, field_name) and getattr(model_instance, field_name, None) is not None From 00398297c2523d4ae653f505acfa73a14a9f8afd Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 29 Jan 2026 21:01:30 -0600 Subject: [PATCH 24/38] fix: internally always use strings for scope types when autonumbering --- specifyweb/specify/utils/autonumbering.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index bf266d7b6a6..ec078cb266b 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -58,10 +58,10 @@ def __exit__(self, exc_type, exc_val, exc_tb): def highest_stored_value(self, table_name: str, field_name: str, - scope_type: ScopeType, + scope_name: str, scope_id: int) -> str | None: key_name = self.autonumbering_redis_key( - table_name, field_name, scope_type, scope_id) + table_name, field_name, scope_name, scope_id) highest = RedisString(self.redis).get(key_name) if isinstance(highest, bytes): return highest.decode() @@ -72,11 +72,11 @@ def highest_stored_value(self, def cache_highest(self, table_name: str, field_name: str, - scope_type: ScopeType, + scope_name: str, scope_id: int, value: str): self.highest_in_flight[table_name.lower( - )][field_name.lower()][scope_type.name][scope_id] = value + )][field_name.lower()][scope_name][scope_id] = value def commit_highest(self): for table_name, tables in self.highest_in_flight.items(): @@ -90,24 +90,24 @@ def commit_highest(self): def set_highest_value(self, table_name: str, field_name: str, - scope_type: ScopeType, + scope_name: str, scope_id: int, value: str, time_to_live: int = 10): key_name = self.autonumbering_redis_key( - table_name, field_name, scope_type, scope_id) + table_name, field_name, scope_name, scope_id) RedisString(self.redis).set(key_name, value, time_to_live, override_existing=True) def autonumbering_redis_key(self, table_name: str, field_name: str, - scope_type: ScopeType, + scope_name: str, scope_id: int): return self.lock_name(table_name, field_name, "highest", - scope_type.name, + scope_name, str(scope_id)) @@ -138,7 +138,7 @@ def highest_autonumbering_value( hierarchy_model = Scoping.get_hierarchy_model(collection, scope_type) stored_highest_value = (lock_dispatcher.highest_stored_value( - model._meta.db_table, field_name, scope_type, hierarchy_model.id) + model._meta.db_table, field_name, scope_type.name, hierarchy_model.id) if lock_dispatcher is not None else None) with_year = formatter.fillin_year(values) @@ -167,7 +167,7 @@ def highest_autonumbering_value( if lock_dispatcher is not None and lock_dispatcher.in_context: lock_dispatcher.cache_highest( - model._meta.db_table, field_name, scope_type, hierarchy_model.id, highest) + model._meta.db_table, field_name, scope_type.name, hierarchy_model.id, highest) return highest From 75964c1c5999e74fe87f81cb779f60d33b78c4a3 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Fri, 30 Jan 2026 15:35:44 -0600 Subject: [PATCH 25/38] feat: spin up a redis instance during backend tests --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7f661d6ff35..adc0f32420c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -68,6 +68,9 @@ jobs: --health-cmd="mariadb-admin ping" --health-interval=5s --health-timeout=2s --health-retries=3 + redis: + image: redis:latest + steps: - uses: actions/checkout@v4 From f6e5550d29b4e95f0f6bd1811c6d2bd192e226e6 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Fri, 30 Jan 2026 17:27:14 -0600 Subject: [PATCH 26/38] fix: resolve more failing tests --- .github/workflows/test.yml | 6 +++++- specifyweb/backend/businessrules/rules/attachment_rules.py | 2 +- specifyweb/specify/utils/scoping.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index adc0f32420c..7ec558b027a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -67,9 +67,11 @@ jobs: options: --health-cmd="mariadb-admin ping" --health-interval=5s --health-timeout=2s --health-retries=3 - redis: image: redis:latest + ports: + - 6379 + steps: - uses: actions/checkout@v4 @@ -119,6 +121,8 @@ jobs: echo "THICK_CLIENT_LOCATION = '${{ github.workspace }}/Specify6'" >> specifyweb/settings/local_specify_settings.py echo "DATABASE_HOST = '127.0.0.1'" >> specifyweb/settings/local_specify_settings.py echo "DATABASE_PORT = ${{ job.services.mariadb.ports[3306] }}" >> specifyweb/settings/local_specify_settings.py + echo "REDIS_HOST = '127.0.0.1'" >> specifyweb/settings/local_specify_settings.py + echo "REDIS_PORT = ${{ job.services.redis.ports[6379] }}" >> specifyweb/settings/local_specify_settings.py - name: Need these files to be present run: diff --git a/specifyweb/backend/businessrules/rules/attachment_rules.py b/specifyweb/backend/businessrules/rules/attachment_rules.py index cf7495b5b76..fc52e109979 100644 --- a/specifyweb/backend/businessrules/rules/attachment_rules.py +++ b/specifyweb/backend/businessrules/rules/attachment_rules.py @@ -31,7 +31,7 @@ def attachment_jointable_save(sender, obj): attachee = get_attachee(obj) obj.attachment.tableid = attachee.specify_model.tableId scopetype, scope = Scoping.from_instance(attachee) - obj.attachment.scopetype, obj.attachment.scopeid = scopetype, scope.id + obj.attachment.scopetype, obj.attachment.scopeid = scopetype.value, scope.id obj.attachment.save() diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 0103885245b..4864afa11cd 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -14,7 +14,7 @@ class ScopeType(Enum): INSTITUTION = 3 GLOBAL = 10 - def from_model(obj) -> "ScopeType": + def from_model(self, obj) -> "ScopeType": clazz = obj.__class__ if clazz is models.Institution: From 43fc35db037eab272145480e30f6ee5265c9f013 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Fri, 30 Jan 2026 22:12:16 -0600 Subject: [PATCH 27/38] fix: make scopetype helper method static --- specifyweb/specify/utils/scoping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 4864afa11cd..b6c6f004349 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -14,7 +14,8 @@ class ScopeType(Enum): INSTITUTION = 3 GLOBAL = 10 - def from_model(self, obj) -> "ScopeType": + @staticmethod + def from_model(obj) -> "ScopeType": clazz = obj.__class__ if clazz is models.Institution: From 654c0cd43baaedce6d8d2b639b4f94326ec1bcf5 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Fri, 30 Jan 2026 22:36:40 -0600 Subject: [PATCH 28/38] fix: use ScopeType ints when creating attachments --- specifyweb/specify/api/filter_by_col.py | 10 +++++----- .../test_filter_by_col/test_filter_by_collection.py | 8 ++++---- specifyweb/specify/utils/autonumbering.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/specifyweb/specify/api/filter_by_col.py b/specifyweb/specify/api/filter_by_col.py index ef83f347414..9056683c83b 100644 --- a/specifyweb/specify/api/filter_by_col.py +++ b/specifyweb/specify/api/filter_by_col.py @@ -28,12 +28,12 @@ def filter_by_collection(queryset, collection, strict=True): if queryset.model is Attachment: return queryset.filter( Q(scopetype=None) - | Q(scopetype=ScopeType.GLOBAL) - | Q(scopetype=ScopeType.COLLECTION, scopeid=collection.id) - | Q(scopetype=ScopeType.DISCIPLINE, scopeid=collection.discipline.id) - | Q(scopetype=ScopeType.DIVISION, scopeid=collection.discipline.division.id) + | Q(scopetype=ScopeType.GLOBAL.value) + | Q(scopetype=ScopeType.COLLECTION.value, scopeid=collection.id) + | Q(scopetype=ScopeType.DISCIPLINE.value, scopeid=collection.discipline.id) + | Q(scopetype=ScopeType.DIVISION.value, scopeid=collection.discipline.division.id) | Q( - scopetype=ScopeType.INSTITUTION, + scopetype=ScopeType.INSTITUTION.value, scopeid=collection.discipline.division.institution.id, ) ) diff --git a/specifyweb/specify/tests/test_filter_by_col/test_filter_by_collection.py b/specifyweb/specify/tests/test_filter_by_col/test_filter_by_collection.py index 7f62d2b8a85..282b534ada9 100644 --- a/specifyweb/specify/tests/test_filter_by_col/test_filter_by_collection.py +++ b/specifyweb/specify/tests/test_filter_by_col/test_filter_by_collection.py @@ -39,17 +39,17 @@ def test_attachment(self): attachment_1 = Attachment.objects.create(scopetype=None) attachment_2 = Attachment.objects.create( - scopetype=ScopeType.COLLECTION, scopeid=self.collection.id + scopetype=ScopeType.COLLECTION.value, scopeid=self.collection.id ) attachment_3 = Attachment.objects.create( - scopetype=ScopeType.COLLECTION, scopeid=collection_2.id + scopetype=ScopeType.COLLECTION.value, scopeid=collection_2.id ) attachment_4 = Attachment.objects.create( - scopetype=ScopeType.DISCIPLINE, scopeid=self.discipline.id + scopetype=ScopeType.DISCIPLINE.value, scopeid=self.discipline.id ) attachment_5 = Attachment.objects.create( - scopetype=ScopeType.DISCIPLINE, scopeid=discipline_2.id + scopetype=ScopeType.DISCIPLINE.value, scopeid=discipline_2.id ) queryset = filter_by_collection(Attachment.objects.all(), self.collection) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index ec078cb266b..07aa734a13c 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -12,7 +12,7 @@ from typing import MutableMapping, Callable from collections.abc import Sequence -from specifyweb.specify.utils.scoping import Scoping, ScopeType +from specifyweb.specify.utils.scoping import Scoping from specifyweb.specify.datamodel import datamodel from specifyweb.backend.redis_cache.connect import RedisConnection, RedisString From 04a6922b64d2bbaa7f0cac48706b074a33a7a96d Mon Sep 17 00:00:00 2001 From: melton-jason Date: Mon, 2 Feb 2026 15:05:29 -0600 Subject: [PATCH 29/38] fix: handle edge case where inferred scope is not related object --- specifyweb/specify/utils/scoping.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index b6c6f004349..193274b252b 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -117,11 +117,11 @@ def storage(self): def _infer_scope(self): - if hasattr(self.model_class, "division"): + if is_related(self.model_class, "division"): return ScopeType.DIVISION - if hasattr(self.model_class, "discipline"): + if is_related(self.model_class, "discipline"): return ScopeType.DISCIPLINE - if hasattr(self.model_class, "collectionmemberid") or hasattr(self.model_class, "collection"): + if hasattr(self.model_class, "collectionmemberid") or is_related(self.model_class, "collection"): return ScopeType.COLLECTION return ScopeType.INSTITUTION @@ -139,13 +139,10 @@ def scope_type(self) -> ScopeType: @property def scope_model(self) -> Model: - return self._infer_scope_model() - - def _infer_scope_model(self) -> Model: table = self.obj.__class__.__name__.lower() scope = getattr(self, table, lambda: None)() if scope is None: - return self._infer_scope() + return self._infer_scope_model() return scope @@ -203,11 +200,11 @@ def storage(self): return self.obj.definition.institution def _infer_scope_model(self) -> Model: - if has_related(self.obj, "division"): + if is_related(self.obj.__class__, "division") and has_related(self.obj, "division"): return self.obj.division - if has_related(self.obj, "discipline"): + if is_related(self.obj.__class__, "discipline") and has_related(self.obj, "discipline"): return self.obj.discipline - if has_related(self.obj, "collectionmemberid") or has_related(self.obj, "collection"): + if has_related(self.obj, "collectionmemberid") or (is_related(self.obj.__class__, "collection") and has_related(self.obj, "collection")): return self._simple_collection_scope() return models.Institution.objects.get() @@ -233,8 +230,6 @@ def _simple_collection_scope(self) -> Model: class Scoping: - def __init__(self): - pass @staticmethod def from_model(model) -> ScopeType: @@ -261,9 +256,14 @@ def get_hierarchy_model(collection, scope_type: ScopeType) -> Model: return model -def has_related(model_instance, field_name: str) -> bool: +def has_related(model_instance: Model, field_name: str) -> bool: return hasattr(model_instance, field_name) and getattr(model_instance, field_name, None) is not None +def is_related(model_class: Model, field_name: str) -> bool: + if not hasattr(model_class, field_name): + return False + field = getattr(model_class, field_name) + return getattr(field, "is_relation", False) def in_same_scope(object1: Model, object2: Model) -> bool: """ From 79428175084dcea5992d24d46b88cfc3138fa3a9 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Mon, 2 Feb 2026 15:31:52 -0600 Subject: [PATCH 30/38] fix: handle case when lock dispatcher is not present when autonumbering --- specifyweb/backend/workbench/upload/scoping.py | 2 +- specifyweb/specify/utils/autonumbering.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/specifyweb/backend/workbench/upload/scoping.py b/specifyweb/backend/workbench/upload/scoping.py index 5f21464dde0..b23ac1cda41 100644 --- a/specifyweb/backend/workbench/upload/scoping.py +++ b/specifyweb/backend/workbench/upload/scoping.py @@ -281,7 +281,7 @@ def apply_scoping_to_uploadtable( apply_scoping = lambda key, value: get_deferred_scoping( key, table.django_name, value, row, ut, context - ).apply_scoping(collection, context, row) + ).apply_scoping(collection, context, row, lock_dispatcher=lock_dispatcher) to_ones = { key: adjuster(apply_scoping(key, value), key) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 07aa734a13c..7957a1cc3c3 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -128,7 +128,9 @@ def highest_autonumbering_value( raise ValueError( f"Formatter {formatter.format_name} does not need need autonumbered with {values}") - if get_lock_dispatcher is not None: + if get_lock_dispatcher is None: + lock_dispatcher = None + else: lock_dispatcher = get_lock_dispatcher() lock_dispatcher.acquire(model._meta.db_table, timeout=wait_for_lock) From d5f8120901787745c3b5401506d90056c635002c Mon Sep 17 00:00:00 2001 From: melton-jason Date: Mon, 2 Feb 2026 18:34:03 -0600 Subject: [PATCH 31/38] chore: add doc-strings to scoping functions --- .../specify/models_utils/lock_tables.py | 2 +- specifyweb/specify/utils/autonumbering.py | 2 +- specifyweb/specify/utils/scoping.py | 77 ++++++++++++++++++- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/specifyweb/specify/models_utils/lock_tables.py b/specifyweb/specify/models_utils/lock_tables.py index 2b3c3289856..ef6762b0c17 100644 --- a/specifyweb/specify/models_utils/lock_tables.py +++ b/specifyweb/specify/models_utils/lock_tables.py @@ -226,7 +226,7 @@ def lock_name(self, *name_parts: str): final_name = LOCK_NAME_SEPARATOR.join( (*self.lock_prefix_parts, *name_parts)) - return final_name.lower() if self.case_sensitive_names else final_name + return final_name.lower() if not self.case_sensitive_names else final_name @contextmanager def lock_and_release(self, name: str, timeout: int = 5): diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 7957a1cc3c3..63badeb3c7d 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -136,7 +136,7 @@ def highest_autonumbering_value( field_name = formatter.field_name.lower() - scope_type = Scoping.from_model(model) + scope_type = Scoping.scope_type_from_class(model) hierarchy_model = Scoping.get_hierarchy_model(collection, scope_type) stored_highest_value = (lock_dispatcher.highest_stored_value( diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 193274b252b..8b5d683e482 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -153,7 +153,7 @@ def _infer_scope_type(self) -> ScopeType: def accession(self) -> Model: institution = models.Institution.objects.get() if institution.isaccessionsglobal: - return models.Institution.objects.get() + return institution return self.obj.division def conservevent(self) -> Model: @@ -232,8 +232,26 @@ def _simple_collection_scope(self) -> Model: class Scoping: @staticmethod - def from_model(model) -> ScopeType: - return ModelClassScope(model).scope_type + def scope_type_from_class(model_class) -> ScopeType: + """ + Returns the ScopeType that a particular class can be scoped to. + If you have an instantiated instance of the class, prefer using the + other methods like `from_instance` or `model_from_instance` + + Example: + ``` + loan_scope = Scoping.scope_type_from_class(models.Loan) + # ScopeType.Discipline + + accession_scope = Scoping.scope_type_from_class(models.Accession) + # ScopeType.Institution if accessions are global else ScopeType.Division + ``` + + :param model_class: + :return: + :rtype: ScopeType + """ + return ModelClassScope(model_class).scope_type @staticmethod def from_instance(obj: Model) -> tuple[ScopeType, Model]: @@ -242,11 +260,44 @@ def from_instance(obj: Model) -> tuple[ScopeType, Model]: @staticmethod def model_from_instance(obj: Model) -> Model: + """ + Returns the Model that the provided Model instance can be scoped to. + Usually always one of: Collection, Discipline, Division, or Institution + + Example: + ``` + my_co = Collectionobject.objects.get(some_filters) + scoped = Scoping.model_from_instance(my_co) + isinstance(scoped, models.Collection) #-> True + ``` + + :param obj: + :type obj: Model + :return: + :rtype: Model + """ instance = ModelInstanceScope(obj) return instance.scope_model @staticmethod def get_hierarchy_model(collection, scope_type: ScopeType) -> Model: + """ + Given a collection and desired ScopeType, returns the model associated + with the ScopeType. + + Example: + ``` + my_collection = Collection.objects.get(some_filters) + my_div = Scoping.get_hierarchy_model(ny_collection, ScopeType.Division) + my_dis = Scoping.get_hierarchy_model(ny_collection, ScopeType.Discipline) + ``` + + :param collection: Description + :param scope_type: Description + :type scope_type: ScopeType + :return: + :rtype: Model + """ steps = [ScopeType.COLLECTION, ScopeType.DISCIPLINE, ScopeType.DIVISION, ScopeType.INSTITUTION] num_steps = steps.index(scope_type) @@ -257,9 +308,29 @@ def get_hierarchy_model(collection, scope_type: ScopeType) -> Model: def has_related(model_instance: Model, field_name: str) -> bool: + """ + + :param model_instance: Description + :type model_instance: Model + :param field_name: Description + :type field_name: str + :return: Returns true if the model instance contains some non-None value in + the given field name + :rtype: bool + """ return hasattr(model_instance, field_name) and getattr(model_instance, field_name, None) is not None def is_related(model_class: Model, field_name: str) -> bool: + """ + + :param model_class: Description + :type model_class: Model + :param field_name: Description + :type field_name: str + :return: Returns true if the field name for the model class is a + relationship + :rtype: bool + """ if not hasattr(model_class, field_name): return False field = getattr(model_class, field_name) From ac87330d8433cf6e849be6ccdb70864a5aaa98cf Mon Sep 17 00:00:00 2001 From: melton-jason Date: Mon, 2 Feb 2026 21:15:38 -0600 Subject: [PATCH 32/38] fix: use proper field object in is_related --- specifyweb/specify/utils/autonumbering.py | 2 +- specifyweb/specify/utils/scoping.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index 63badeb3c7d..aeefda37c37 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -93,7 +93,7 @@ def set_highest_value(self, scope_name: str, scope_id: int, value: str, - time_to_live: int = 10): + time_to_live: int = 5): key_name = self.autonumbering_redis_key( table_name, field_name, scope_name, scope_id) RedisString(self.redis).set(key_name, value, diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 8b5d683e482..458e8217a79 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -135,7 +135,7 @@ def __init__(self, model_instance): @property def scope_type(self) -> ScopeType: - return self._infer_scope_type() + return ScopeType.from_model(self.scope_model) @property def scope_model(self) -> Model: @@ -146,10 +146,6 @@ def scope_model(self) -> Model: return scope - def _infer_scope_type(self) -> ScopeType: - scope_obj = self._infer_scope_model() - return ScopeType.from_model(scope_obj) - def accession(self) -> Model: institution = models.Institution.objects.get() if institution.isaccessionsglobal: @@ -333,7 +329,8 @@ def is_related(model_class: Model, field_name: str) -> bool: """ if not hasattr(model_class, field_name): return False - field = getattr(model_class, field_name) + field_wrapper = getattr(model_class, field_name) + field = getattr(field_wrapper, "field") return getattr(field, "is_relation", False) def in_same_scope(object1: Model, object2: Model) -> bool: From 967617302676578220ddfd3d2abc7365eafcf586 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 3 Feb 2026 08:03:00 -0600 Subject: [PATCH 33/38] chore: update loan autonumber test --- specifyweb/specify/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specifyweb/specify/tests/test_api.py b/specifyweb/specify/tests/test_api.py index fb0438ec26d..41ee93c34ea 100644 --- a/specifyweb/specify/tests/test_api.py +++ b/specifyweb/specify/tests/test_api.py @@ -1115,7 +1115,7 @@ def test_infered_scope(self): discipline=self.other_discipline, ) inferred_loan_scope = scoping.Scoping.model_from_instance(loan) - self.assertEqual(inferred_loan_scope.id, self.other_division.id) + self.assertEqual(inferred_loan_scope.id, self.other_discipline.id) collection_object_scope = scoping.Scoping.model_from_instance( self.collectionobjects[0] From 0c3e863bfb1967c176e5858c7f7f3ea5d6108ff8 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 3 Feb 2026 08:11:36 -0600 Subject: [PATCH 34/38] fix: correct collection being passed to autonumbering function in tests --- .../specify/tests/test_autonumbering/test_do_autonumbering.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specifyweb/specify/tests/test_autonumbering/test_do_autonumbering.py b/specifyweb/specify/tests/test_autonumbering/test_do_autonumbering.py index 50b3580a937..d9524caaf78 100644 --- a/specifyweb/specify/tests/test_autonumbering/test_do_autonumbering.py +++ b/specifyweb/specify/tests/test_autonumbering/test_do_autonumbering.py @@ -137,7 +137,7 @@ def test_increment_across_collections(self, group_filter: Mock): collection=second_collection, catalognumber="#########" ) - do_autonumbering(self.collection, third_co_different_collection, fields) + do_autonumbering(second_collection, third_co_different_collection, fields) third_co_different_collection.refresh_from_db() # This third CO must be created now. self.assertIsNotNone(third_co_different_collection.id) @@ -158,7 +158,7 @@ def test_increment_across_collections(self, group_filter: Mock): collection=third_collection, catalognumber="#########" ) - do_autonumbering(self.collection, fourth_co_irrelevant_collection, fields) + do_autonumbering(third_collection, fourth_co_irrelevant_collection, fields) fourth_co_irrelevant_collection.refresh_from_db() # This CO must be created now. From 07d003e89a4fe91abc09c0f133a242272596f71e Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 3 Feb 2026 08:33:38 -0600 Subject: [PATCH 35/38] fix: respect accession scope when filtering query by collection --- specifyweb/specify/api/filter_by_col.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/specifyweb/specify/api/filter_by_col.py b/specifyweb/specify/api/filter_by_col.py index 9056683c83b..ffef18caa9f 100644 --- a/specifyweb/specify/api/filter_by_col.py +++ b/specifyweb/specify/api/filter_by_col.py @@ -5,7 +5,7 @@ from django.core.exceptions import FieldError from django.db.models import Q -from ..utils.scoping import ScopeType +from ..utils.scoping import Scoping, ScopeType from specifyweb.specify.models import ( Geography, Geologictimeperiod, @@ -13,7 +13,8 @@ Taxon, Storage, Attachment, - Tectonicunit + Tectonicunit, + Accession ) CONCRETE_HIERARCHY = ["collection", "discipline", "division", "institution"] @@ -24,6 +25,7 @@ class HierarchyException(Exception): pass +# REFACTOR: Using Scoping here where possible def filter_by_collection(queryset, collection, strict=True): if queryset.model is Attachment: return queryset.filter( @@ -38,6 +40,13 @@ def filter_by_collection(queryset, collection, strict=True): ) ) + if queryset.model is Accession: + scope = Scoping.scope_type_from_class(Accession) + filters = ({"division": collection.discipline.division} + if scope == ScopeType.DIVISION + else {"division__institution": collection.discipline.division.institution}) + return queryset.filter(**filters) + if queryset.model in (Geography, Geologictimeperiod, Lithostrat, Tectonicunit): return queryset.filter(definition__disciplines=collection.discipline) From efa93966773c53b25950fb329b83f9dcc368b2c5 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Tue, 3 Feb 2026 09:26:14 -0600 Subject: [PATCH 36/38] fix: respect regexp when reading highest stored value --- specifyweb/specify/utils/autonumbering.py | 54 ++++++++++++----------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/specifyweb/specify/utils/autonumbering.py b/specifyweb/specify/utils/autonumbering.py index aeefda37c37..6e2ae871f66 100644 --- a/specifyweb/specify/utils/autonumbering.py +++ b/specifyweb/specify/utils/autonumbering.py @@ -2,6 +2,8 @@ Autonumbering logic """ +import re + from collections import defaultdict from django.db.models import Value, Case, When @@ -44,13 +46,9 @@ def __init__(self): # We use Redis for IPC, to maintain the current "highest" autonumbering # value for each table + field self.redis = RedisConnection(decode_responses=True) - # Before the records are created within a transaction, they're stored - # locally within this dictonary - # The whole dictonary can be committed to Redis via commit_highest - # The key hierarchy is generally: - # table -> field -> scope -> scope_id = "highest value" - self.highest_in_flight: MutableMapping[str, MutableMapping[str, MutableMapping[str, MutableMapping[int, str]]]] = defaultdict( - lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(str)))) + # Before the records are created within a transaction, and committed to + # Redis, they're stored locally within this dictonary + self.highest_in_flight: dict[str, str] = dict() def __exit__(self, exc_type, exc_val, exc_tb): super().__exit__(exc_type, exc_val, exc_tb) @@ -58,10 +56,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): def highest_stored_value(self, table_name: str, field_name: str, + auto_num_regex: str, scope_name: str, scope_id: int) -> str | None: key_name = self.autonumbering_redis_key( - table_name, field_name, scope_name, scope_id) + table_name, field_name, auto_num_regex, scope_name, scope_id) highest = RedisString(self.redis).get(key_name) if isinstance(highest, bytes): return highest.decode() @@ -72,40 +71,34 @@ def highest_stored_value(self, def cache_highest(self, table_name: str, field_name: str, + auto_num_regex: str, scope_name: str, scope_id: int, value: str): - self.highest_in_flight[table_name.lower( - )][field_name.lower()][scope_name][scope_id] = value + key_name = self.autonumbering_redis_key(table_name, field_name, auto_num_regex, scope_name, scope_id) + self.highest_in_flight[key_name] = value def commit_highest(self): - for table_name, tables in self.highest_in_flight.items(): - for field_name, fields in tables.items(): - for scope_type, scope_ids in fields.items(): - for scope_id, value in scope_ids.items(): - self.set_highest_value( - table_name, field_name, scope_type, scope_id, value) + for key_name, value in self.highest_in_flight.items(): + self.set_highest_value(key_name, value) self.highest_in_flight.clear() def set_highest_value(self, - table_name: str, - field_name: str, - scope_name: str, - scope_id: int, + key_name: str, value: str, time_to_live: int = 5): - key_name = self.autonumbering_redis_key( - table_name, field_name, scope_name, scope_id) RedisString(self.redis).set(key_name, value, time_to_live, override_existing=True) def autonumbering_redis_key(self, table_name: str, field_name: str, + auto_num_regex: str, scope_name: str, scope_id: int): return self.lock_name(table_name, field_name, + auto_num_regex, "highest", scope_name, str(scope_id)) @@ -139,11 +132,17 @@ def highest_autonumbering_value( scope_type = Scoping.scope_type_from_class(model) hierarchy_model = Scoping.get_hierarchy_model(collection, scope_type) + with_year = formatter.fillin_year(values) + auto_number_regex = formatter.autonumber_regexp(with_year) + stored_highest_value = (lock_dispatcher.highest_stored_value( - model._meta.db_table, field_name, scope_type.name, hierarchy_model.id) + model._meta.db_table, + field_name, + auto_number_regex, + scope_type.name, + hierarchy_model.id) if lock_dispatcher is not None else None) - with_year = formatter.fillin_year(values) largest_in_database = formatter._autonumber_queryset(collection, model, field_name, with_year).annotate( greater_than_stored=Case( When(**{field_name + '__gt': stored_highest_value}, @@ -169,7 +168,12 @@ def highest_autonumbering_value( if lock_dispatcher is not None and lock_dispatcher.in_context: lock_dispatcher.cache_highest( - model._meta.db_table, field_name, scope_type.name, hierarchy_model.id, highest) + model._meta.db_table, + field_name, + auto_number_regex, + scope_type.name, + hierarchy_model.id, + highest) return highest From cdf0bd16226a36593ff5981c036abcff7a745b70 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 5 Feb 2026 19:22:37 -0600 Subject: [PATCH 37/38] fix: use app and label name instead of object reference to determine scope model --- specifyweb/specify/utils/scoping.py | 30 ++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/specifyweb/specify/utils/scoping.py b/specifyweb/specify/utils/scoping.py index 458e8217a79..71eebc5c709 100644 --- a/specifyweb/specify/utils/scoping.py +++ b/specifyweb/specify/utils/scoping.py @@ -16,17 +16,25 @@ class ScopeType(Enum): @staticmethod def from_model(obj) -> "ScopeType": - clazz = obj.__class__ - - if clazz is models.Institution: - return ScopeType.INSTITUTION - if clazz is models.Division: - return ScopeType.DIVISION - if clazz is models.Discipline: - return ScopeType.DISCIPLINE - if clazz is models.Collection: - return ScopeType.COLLECTION - raise TypeError(f"{clazz.__name__} is not a hierarchy table") + app_and_model_name = obj._meta.label_lower + + # We can't directly use `obj.__class__ is SomeScopeModel` here because + # that will break historical fake models during migrations + # Using the app and model name means this will work in both migration + # and normal runtimes + # See https://docs.djangoproject.com/en/6.0/topics/migrations/#historical-models + # for more information about Django's Histroical models + mapping = { + 'specify.institution': ScopeType.INSTITUTION, + 'specify.division': ScopeType.DIVISION, + 'specify.discipline': ScopeType.DISCIPLINE, + 'specify.collection': ScopeType.COLLECTION + } + + scope_type = mapping.get(app_and_model_name, None) + if scope_type is None: + raise TypeError(f"{app_and_model_name} is not a hierarchy table") + return scope_type def __gt__(self, other): if not isinstance(other, ScopeType): From 997724e240963aa5461c91f554ebc7699ed707d4 Mon Sep 17 00:00:00 2001 From: melton-jason Date: Thu, 5 Feb 2026 19:38:43 -0600 Subject: [PATCH 38/38] chore: remove unused imports --- .../components/InitialContext/systemInfo.ts | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts b/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts index 282d49e6eba..da17d142718 100644 --- a/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts +++ b/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts @@ -23,29 +23,8 @@ type SystemInfo = { readonly discipline_type: string; }; -type StatsCounts = { - readonly Collectionobject: number; - readonly Collection: number; - readonly Specifyuser: number; -}; - let systemInfo: SystemInfo; -function buildStatsLambdaUrl(base: string | null | undefined): string | null { - if (!base) return null; - let u = base.trim(); - - if (!/^https?:\/\//i.test(u)) u = `https://${u}`; - - const hasRoute = /\/(prod|default)\/[^\s/]+/.test(u); - if (!hasRoute) { - const stage = 'prod'; - const route = 'AggrgatedSp7Stats'; - u = `${u.replace(/\/$/, '') }/${stage}/${route}`; - } - return u; -} - export const fetchContext = load( '/context/system_info.json', 'application/json'