From 2dd3964d9f116aff6a0f8a4a73e9650a37d08eae Mon Sep 17 00:00:00 2001 From: shaohuzhang1 Date: Tue, 26 May 2026 18:29:39 +0800 Subject: [PATCH] feat: Add homepage trend export related interface --- apps/common/utils/common.py | 21 ++- apps/homepage/api/home_page_api.py | 37 ++++ apps/homepage/serializers/homepage.py | 260 ++++++++++++++++++-------- apps/homepage/urls.py | 5 +- apps/homepage/views/homepage.py | 67 ++++++- 5 files changed, 303 insertions(+), 87 deletions(-) diff --git a/apps/common/utils/common.py b/apps/common/utils/common.py index 5c6e17b35bb..13c4e482e94 100644 --- a/apps/common/utils/common.py +++ b/apps/common/utils/common.py @@ -6,6 +6,7 @@ @date:2025/4/14 18:23 @desc: """ +import datetime import hashlib import io import json @@ -17,13 +18,15 @@ import uuid from functools import reduce from typing import List, Dict - +import pytz from django.contrib.auth.hashers import check_password, make_password from django.core.files.uploadedfile import InMemoryUploadedFile from django.db.models import QuerySet from django.utils.translation import gettext as _ +from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE from pydub import AudioSegment +from maxkb.settings import TIME_ZONE from ..database_model_manage.database_model_manage import DatabaseModelManage from ..exception.app_exception import AppApiException @@ -168,7 +171,8 @@ def markdown_to_plain_text(md: str) -> str: # 使用正则表达式去除所有 HTML 标签 text = re.sub(r'<[^>]+>', '', text) # 先移除特定媒体标签(优先级高于通用HTML标签移除) - text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>.*?(?:)?', '', text, flags=re.DOTALL | re.IGNORECASE) + text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>.*?(?:)?', '', text, + flags=re.DOTALL | re.IGNORECASE) text = re.sub(r']*>', '', text) # 匹配图片标签 # 去除多余的空白字符(包括换行符、制表符等) text = re.sub(r'\s+', ' ', text) @@ -409,6 +413,7 @@ def is_valid_uuid(uuid_string): except ValueError: return False + def common_convert_value(_type, value): if value is None: return None @@ -436,3 +441,15 @@ def common_convert_value(_type, value): return v raise Exception(_('type error')) return value + + +def reset_value(value): + if isinstance(value, str): + value = re.sub(ILLEGAL_CHARACTERS_RE, '', value) + if value.startswith(('=', '+', '-', '@')): + value = "'" + value + if isinstance(value, datetime.datetime): + eastern = pytz.timezone(TIME_ZONE) + c = datetime.timezone(eastern._utcoffset) + value = value.astimezone(c) + return value diff --git a/apps/homepage/api/home_page_api.py b/apps/homepage/api/home_page_api.py index 261acf63720..8afba490fa1 100644 --- a/apps/homepage/api/home_page_api.py +++ b/apps/homepage/api/home_page_api.py @@ -105,6 +105,43 @@ def get_parameters(): ] +class RankingBaseExportAPI(APIMixin): + + @staticmethod + def get_request(): + return None + + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + type=OpenApiTypes.STR, + location=OpenApiParameter.PATH, + required=True, + description=_("Workspace ID"), + ), + OpenApiParameter( + name="start_time", + description="start Time", + type=OpenApiTypes.STR, + required=True, + ), + OpenApiParameter( + name="name", + description="Name", + type=OpenApiTypes.STR, + required=False, + ), + OpenApiParameter( + name="end_time", + description="end Time", + type=OpenApiTypes.STR, + required=True, + ), + ] + + class ApplicationTokensRankingAPI(RankingBaseAPI): @staticmethod diff --git a/apps/homepage/serializers/homepage.py b/apps/homepage/serializers/homepage.py index df4e8139ea2..26a2040ad35 100644 --- a/apps/homepage/serializers/homepage.py +++ b/apps/homepage/serializers/homepage.py @@ -10,12 +10,14 @@ import os from typing import List, Dict +import openpyxl from django.db import models from django.db.models import QuerySet, Count, Q, UUIDField, Sum, F, BigIntegerField, Value, ExpressionWrapper, \ - IntegerField, OuterRef, Subquery, JSONField -from django.db.models.functions import Cast, Coalesce + IntegerField, Window +from django.db.models.functions import Cast, Coalesce, RowNumber +from django.http import HttpResponse from django.utils import timezone -from django.utils.translation import gettext_lazy as _ +from django.utils.translation import gettext_lazy as _, gettext from rest_framework import serializers from application.models import Application, ApplicationChatUserStats, Chat, ChatRecord @@ -29,6 +31,10 @@ from system_manage.models import WorkspaceUserResourcePermission from tools.models import Tool, ToolType +_PERM_WITH_ROLE = ["VIEW", "MANAGE", "ROLE"] +_PERM_DEFAULT = ["VIEW", "MANAGE"] +TOKEN_EXPR = F("chatrecord__message_tokens") + F("chatrecord__answer_tokens") + def hasPermission(auth, permission): if 'USER' in auth.role_list: @@ -169,89 +175,55 @@ class ApplicationUserTokenRanking(serializers.Serializer): name = serializers.CharField(required=False, allow_null=True, allow_blank=True, label=_("User Name")) end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time")) - def ranking(self, auth, current_page, page_size, with_valid=True): - if with_valid: - self.is_valid(raise_exception=True) - + def get_queryset(self, auth): workspace_id = self.validated_data.get("workspace_id") user_id = self.validated_data.get("user_id") start_time = get_format_time(self.data.get("start_time")) end_time = get_format_time(self.data.get("end_time")) name = self.data.get("name") - base_queryset = Chat.objects.filter( - is_deleted=False, - chat_user_id__isnull=False, - create_time__gte=start_time, - create_time__lte=end_time - ).exclude( - chat_user_id="" - ) - if name: - base_queryset = base_queryset.filter(asker__username__contains=name) - workspace_manage = is_workspace_manage(auth, workspace_id) - if workspace_manage: - base_queryset = base_queryset.filter( - application__workspace_id=workspace_id - ) - else: - permission_list = ( - ["VIEW", "MANAGE", "ROLE"] - if hasPermission(auth, "APPLICATION:READ") - else ["VIEW", "MANAGE"] - ) - - application_id_queryset = QuerySet(WorkspaceUserResourcePermission).filter( - workspace_id=workspace_id, - user_id=user_id, - auth_type="APPLICATION", - permission_list__overlap=permission_list, - ).annotate( - target_uuid=Cast("target", output_field=UUIDField()) - ).values_list( - "target_uuid", - flat=True + # ---- 基础查询 ---- + base_queryset = ( + Chat.objects.filter( + is_deleted=False, + chat_user_id__isnull=False, + create_time__gte=start_time, + create_time__lte=end_time, ) + .exclude(chat_user_id="") + ) - base_queryset = base_queryset.filter( - application_id__in=application_id_queryset - ) + if name: + base_queryset = base_queryset.filter(asker__username__contains=name) - token_expr = ExpressionWrapper( - F("chatrecord__message_tokens") + F("chatrecord__answer_tokens"), - output_field=BigIntegerField() + # ---- 权限过滤 ---- + base_queryset = self._apply_permission_filter( + base_queryset, auth, workspace_id, user_id ) - latest_asker_queryset = base_queryset.filter( - chat_user_id=OuterRef("chat_user_id"), - chat_user_type=OuterRef("chat_user_type"), - ).order_by( - "-create_time" - ).values( - "asker" - )[:1] - - queryset = base_queryset.values( - "chat_user_id", - "chat_user_type", - ).annotate( - total_tokens=Coalesce( - Sum(token_expr), - Value(0), - output_field=BigIntegerField() - ), - chat_record_count=Count( - "chatrecord__id", - distinct=True - ), - asker=Subquery( - latest_asker_queryset, - output_field=JSONField() + # ---- 窗口函数:一次查询拿到每个用户最新的 asker ---- + asker_map = self._build_asker_map(base_queryset) + + # ---- 聚合统计 ---- + queryset = ( + base_queryset + .values("chat_user_id", "chat_user_type") + .annotate( + total_tokens=Coalesce( + Sum(TOKEN_EXPR), + Value(0), + output_field=BigIntegerField(), + ), + chat_record_count=Count("chatrecord__id", distinct=True), ) - ).order_by( - "-total_tokens" + .order_by("-total_tokens") ) + return queryset, asker_map + def ranking(self, auth, current_page, page_size, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + queryset, asker_map = self.get_queryset(auth) return page_search( current_page, page_size, @@ -259,12 +231,87 @@ def ranking(self, auth, current_page, page_size, with_valid=True): lambda item: { "chat_user_id": item["chat_user_id"], "chat_user_type": item["chat_user_type"], - "asker": item["asker"], + "asker": asker_map.get( + (item["chat_user_id"], item["chat_user_type"]) + ), "total_tokens": item["total_tokens"], "chat_record_count": item["chat_record_count"], - } + }, ) + def export(self, auth, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + queryset, asker_map = self.get_queryset(auth) + workbook = openpyxl.Workbook(write_only=True) + worksheet = workbook.create_sheet(title='Sheet1') + headers = [gettext('ranking'), + gettext('User Name'), + gettext('Token consumption'), + gettext('number of questions'), + ] + worksheet.append(headers) + index = 0 + for item in queryset: + index += 1 + row = [index, asker_map.get( + (item["chat_user_id"], item["chat_user_type"]) + ).get('username'), item['total_tokens'], item['chat_record_count']] + worksheet.append(row) + response = HttpResponse(content_type="application/vnd.ms-excel") + response["Content-Disposition"] = f'attachment; filename="data.xlsx"' + workbook.save(response) + return response + + def _apply_permission_filter(self, queryset, auth, workspace_id, user_id): + """根据用户角色过滤可见的应用范围""" + if is_workspace_manage(auth, workspace_id): + return queryset.filter(application__workspace_id=workspace_id) + + permission_list = ( + _PERM_WITH_ROLE + if hasPermission(auth, "APPLICATION:READ") + else _PERM_DEFAULT + ) + + allowed_app_ids = ( + QuerySet(WorkspaceUserResourcePermission) + .filter( + workspace_id=workspace_id, + user_id=user_id, + auth_type="APPLICATION", + permission_list__overlap=permission_list, + ) + .annotate(target_uuid=Cast("target", output_field=UUIDField())) + .values_list("target_uuid", flat=True) + ) + + return queryset.filter(application_id__in=allowed_app_ids) + + @staticmethod + def _build_asker_map(base_queryset): + """ + 用窗口函数一次查询拿到每个 (chat_user_id, chat_user_type) 最新的 asker, + 替代原来每行一次的 Subquery。 + """ + latest_rows = ( + base_queryset + .annotate( + _rn=Window( + expression=RowNumber(), + partition_by=[F("chat_user_id"), F("chat_user_type")], + order_by=F("create_time").desc(), + ) + ) + .filter(_rn=1) + .values("chat_user_id", "chat_user_type", "asker") + ) + + return { + (row["chat_user_id"], row["chat_user_type"]): row["asker"] + for row in latest_rows + } + class ApplicationQuestionRanking(serializers.Serializer): workspace_id = serializers.CharField(required=False, label=_('Workspace ID')) user_id = serializers.UUIDField(required=True, label=_("User ID")) @@ -272,10 +319,7 @@ class ApplicationQuestionRanking(serializers.Serializer): start_time = serializers.DateField(format='%Y-%m-%d', label=_("Start time")) end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time")) - def ranking(self, auth, current_page, page_size, with_valid=True): - if with_valid: - self.is_valid(raise_exception=True) - + def get_queryset(self, auth): workspace_id = self.validated_data.get("workspace_id") user_id = self.validated_data.get("user_id") queryset = Application.objects.filter(workspace_id=workspace_id) @@ -309,7 +353,7 @@ def ranking(self, auth, current_page, page_size, with_valid=True): .values_list("target_uuid", flat=True) ) - queryset = queryset.annotate( + return queryset.annotate( # 问题数 / 对话轮次数量 chat_record_count_total=Coalesce( Sum( @@ -334,6 +378,10 @@ def ranking(self, auth, current_page, page_size, with_valid=True): "-chat_record_count_total" ) + def ranking(self, auth, current_page, page_size, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + queryset = self.get_queryset(auth) return page_search( current_page, page_size, @@ -346,6 +394,28 @@ def ranking(self, auth, current_page, page_size, with_valid=True): }, ) + def export(self, auth, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + queryset = self.get_queryset(auth) + workbook = openpyxl.Workbook(write_only=True) + worksheet = workbook.create_sheet(title='Sheet1') + headers = [gettext('ranking'), + gettext('Application Name'), + gettext('number of questions'), + gettext('active users') + ] + worksheet.append(headers) + index = 0 + for item in queryset: + index += 1 + row = [index, item.name, item.chat_record_count_total, item.chat_user_count] + worksheet.append(row) + response = HttpResponse(content_type="application/vnd.ms-excel") + response["Content-Disposition"] = f'attachment; filename="data.xlsx"' + workbook.save(response) + return response + class ApplicationTokensRanking(serializers.Serializer): workspace_id = serializers.CharField(required=False, label=_('Workspace ID')) user_id = serializers.UUIDField(required=True, label=_("User ID")) @@ -353,9 +423,7 @@ class ApplicationTokensRanking(serializers.Serializer): start_time = serializers.DateField(format='%Y-%m-%d', label=_("Start time")) end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time")) - def ranking(self, auth, current_page, page_size, with_valid=True): - if with_valid: - self.is_valid(raise_exception=True) + def get_queryset(self, auth): start_time = get_format_time(self.data.get('start_time')) end_time = get_format_time(self.data.get('end_time')) name = self.data.get("name") @@ -395,7 +463,7 @@ def ranking(self, auth, current_page, page_size, with_valid=True): .values_list("target_uuid", flat=True) ) - queryset = queryset.annotate( + return queryset.annotate( total_tokens=Coalesce( Sum( token_expr, @@ -411,6 +479,10 @@ def ranking(self, auth, current_page, page_size, with_valid=True): ) ).order_by("-total_tokens") + def ranking(self, auth, current_page, page_size, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + queryset = self.get_queryset(auth) return page_search( current_page, page_size, @@ -423,6 +495,28 @@ def ranking(self, auth, current_page, page_size, with_valid=True): } ) + def export(self, auth, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + queryset = self.get_queryset(auth) + workbook = openpyxl.Workbook(write_only=True) + worksheet = workbook.create_sheet(title='Sheet1') + headers = [gettext('ranking'), + gettext('Application Name'), + gettext('Token consumption'), + gettext('number of questions') + ] + worksheet.append(headers) + index = 0 + for item in queryset: + index += 1 + row = [index, item.name, item.total_tokens, item.chat_record_count_total] + worksheet.append(row) + response = HttpResponse(content_type="application/vnd.ms-excel") + response["Content-Disposition"] = f'attachment; filename="data.xlsx"' + workbook.save(response) + return response + class ApplicationMonitoring(serializers.Serializer): workspace_id = serializers.CharField(required=False, label=_('Workspace ID')) user_id = serializers.UUIDField(required=True, label=_("User ID")) diff --git a/apps/homepage/urls.py b/apps/homepage/urls.py index f8dca911243..1c5a51ec7fa 100644 --- a/apps/homepage/urls.py +++ b/apps/homepage/urls.py @@ -15,5 +15,8 @@ path("workspace//homepage/tool/aggregation",views.HomePageAPI.ToolAggregation.as_view()), path("workspace//homepage/model/aggregation",views.HomePageAPI.ModelAggregation.as_view()), path("workspace//homepage/tokens/aggregation",views.HomePageAPI.TokensAggregation.as_view()), - path("workspace//homepage/chat_record/aggregation",views.HomePageAPI.ChatRecordAggregation.as_view()) + path("workspace//homepage/chat_record/aggregation",views.HomePageAPI.ChatRecordAggregation.as_view()), + path("workspace//homepage/question_ranking/export",views.HomePageAPI.ApplicationQuestionRankingExport.as_view()), + path("workspace//homepage/tokens_ranking/export",views.HomePageAPI.ApplicationTokensRankingExport.as_view()), + path("workspace//homepage/user_tokens_ranking/export",views.HomePageAPI.UserTokensRankingExport.as_view()) ] diff --git a/apps/homepage/views/homepage.py b/apps/homepage/views/homepage.py index 04350e409ca..1dea5cf8d47 100644 --- a/apps/homepage/views/homepage.py +++ b/apps/homepage/views/homepage.py @@ -15,7 +15,7 @@ from common.auth import TokenAuth from homepage.api.home_page_api import ApplicationTokensRankingAPI, ApplicationQuestionRankingAPI, UserTokensRankingAPI, \ ApplicationAggregationAPI, KnowledgeAggregationAPI, ToolAggregationAPI, ModelAggregationAPI, \ - ApplicationMonitoringAPI, RankingBaseAPI, TokensAggregationAPI + ApplicationMonitoringAPI, RankingBaseAPI, TokensAggregationAPI, RankingBaseExportAPI from homepage.serializers.homepage import HomePageSerializer from django.utils.translation import gettext_lazy as _ @@ -67,6 +67,28 @@ def get(self, request: Request, workspace_id: str): 'end_time')}).aggregation( request.auth)) + class ApplicationTokensRankingExport(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + methods=["GET"], + description=_("Top applications by token consumption export"), + summary=_("Top applications by token consumption export"), + operation_id="homepage_application_tokens_ranking_export", + parameters=RankingBaseExportAPI.get_parameters(), + responses=RankingBaseExportAPI.get_response(), + tags=[_("Home page")], + ) + def get(self, request: Request, workspace_id: str): + return HomePageSerializer.ApplicationTokensRanking( + data={'user_id': request.user.id, 'workspace_id': workspace_id, + 'start_time': request.query_params.get( + 'start_time'), + 'end_time': request.query_params.get( + 'end_time'), + "name": request.query_params.get("name") + }).export(request.auth) + class ApplicationTokensRanking(APIView): authentication_classes = [TokenAuth] @@ -89,6 +111,28 @@ def get(self, request: Request, workspace_id: str, current_page: int, page_size: "name": request.query_params.get("name") }).ranking(request.auth, current_page, page_size)) + class ApplicationQuestionRankingExport(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + methods=["GET"], + description=_("Top applications by question count export"), + summary=_("Top applications by question count export"), + operation_id="homepage_application_question_ranking_export", + parameters=RankingBaseExportAPI.get_parameters(), + responses=RankingBaseExportAPI.get_response(), + tags=[_("Home page")], + ) + def get(self, request: Request, workspace_id: str): + return HomePageSerializer.ApplicationQuestionRanking( + data={'user_id': request.user.id, 'workspace_id': workspace_id, + 'start_time': request.query_params.get( + 'start_time'), + 'end_time': request.query_params.get( + 'end_time'), + "name": request.query_params.get("name") + }).export(request.auth) + class ApplicationQuestionRanking(APIView): authentication_classes = [TokenAuth] @@ -111,6 +155,27 @@ def get(self, request: Request, workspace_id: str, current_page: int, page_size: "name": request.query_params.get("name") }).ranking(request.auth, current_page, page_size)) + class UserTokensRankingExport(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + methods=["GET"], + description=_("Top users by token consumption export"), + summary=_("Top users by token consumption export"), + operation_id="homepage_user_tokens_ranking_export", + parameters=RankingBaseExportAPI.get_parameters(), + responses=RankingBaseExportAPI.get_response(), + tags=[_("Home page")], + ) + def get(self, request: Request, workspace_id: str): + return HomePageSerializer.ApplicationUserTokenRanking( + data={'user_id': request.user.id, 'workspace_id': workspace_id, + 'start_time': request.query_params.get( + 'start_time'), + 'end_time': request.query_params.get( + 'end_time'), + "name": request.query_params.get("name")}).export(request.auth) + class UserTokensRanking(APIView): authentication_classes = [TokenAuth]