diff --git a/apps/common/utils/common.py b/apps/common/utils/common.py
index 5c6e17b35bb..13c4e482e94 100644
--- a/apps/common/utils/common.py
+++ b/apps/common/utils/common.py
@@ -6,6 +6,7 @@
@date:2025/4/14 18:23
@desc:
"""
+import datetime
import hashlib
import io
import json
@@ -17,13 +18,15 @@
import uuid
from functools import reduce
from typing import List, Dict
-
+import pytz
from django.contrib.auth.hashers import check_password, make_password
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.db.models import QuerySet
from django.utils.translation import gettext as _
+from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
from pydub import AudioSegment
+from maxkb.settings import TIME_ZONE
from ..database_model_manage.database_model_manage import DatabaseModelManage
from ..exception.app_exception import AppApiException
@@ -168,7 +171,8 @@ def markdown_to_plain_text(md: str) -> str:
# 使用正则表达式去除所有 HTML 标签
text = re.sub(r'<[^>]+>', '', text)
# 先移除特定媒体标签(优先级高于通用HTML标签移除)
- text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>.*?(?:(?:audio|video)>)?', '', text, flags=re.DOTALL | re.IGNORECASE)
+ text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>.*?(?:(?:audio|video)>)?', '', text,
+ flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r'
]*>', '', text) # 匹配图片标签
# 去除多余的空白字符(包括换行符、制表符等)
text = re.sub(r'\s+', ' ', text)
@@ -409,6 +413,7 @@ def is_valid_uuid(uuid_string):
except ValueError:
return False
+
def common_convert_value(_type, value):
if value is None:
return None
@@ -436,3 +441,15 @@ def common_convert_value(_type, value):
return v
raise Exception(_('type error'))
return value
+
+
+def reset_value(value):
+ if isinstance(value, str):
+ value = re.sub(ILLEGAL_CHARACTERS_RE, '', value)
+ if value.startswith(('=', '+', '-', '@')):
+ value = "'" + value
+ if isinstance(value, datetime.datetime):
+ eastern = pytz.timezone(TIME_ZONE)
+ c = datetime.timezone(eastern._utcoffset)
+ value = value.astimezone(c)
+ return value
diff --git a/apps/homepage/api/home_page_api.py b/apps/homepage/api/home_page_api.py
index 261acf63720..8afba490fa1 100644
--- a/apps/homepage/api/home_page_api.py
+++ b/apps/homepage/api/home_page_api.py
@@ -105,6 +105,43 @@ def get_parameters():
]
+class RankingBaseExportAPI(APIMixin):
+
+ @staticmethod
+ def get_request():
+ return None
+
+ @staticmethod
+ def get_parameters():
+ return [
+ OpenApiParameter(
+ name="workspace_id",
+ type=OpenApiTypes.STR,
+ location=OpenApiParameter.PATH,
+ required=True,
+ description=_("Workspace ID"),
+ ),
+ OpenApiParameter(
+ name="start_time",
+ description="start Time",
+ type=OpenApiTypes.STR,
+ required=True,
+ ),
+ OpenApiParameter(
+ name="name",
+ description="Name",
+ type=OpenApiTypes.STR,
+ required=False,
+ ),
+ OpenApiParameter(
+ name="end_time",
+ description="end Time",
+ type=OpenApiTypes.STR,
+ required=True,
+ ),
+ ]
+
+
class ApplicationTokensRankingAPI(RankingBaseAPI):
@staticmethod
diff --git a/apps/homepage/serializers/homepage.py b/apps/homepage/serializers/homepage.py
index df4e8139ea2..26a2040ad35 100644
--- a/apps/homepage/serializers/homepage.py
+++ b/apps/homepage/serializers/homepage.py
@@ -10,12 +10,14 @@
import os
from typing import List, Dict
+import openpyxl
from django.db import models
from django.db.models import QuerySet, Count, Q, UUIDField, Sum, F, BigIntegerField, Value, ExpressionWrapper, \
- IntegerField, OuterRef, Subquery, JSONField
-from django.db.models.functions import Cast, Coalesce
+ IntegerField, Window
+from django.db.models.functions import Cast, Coalesce, RowNumber
+from django.http import HttpResponse
from django.utils import timezone
-from django.utils.translation import gettext_lazy as _
+from django.utils.translation import gettext_lazy as _, gettext
from rest_framework import serializers
from application.models import Application, ApplicationChatUserStats, Chat, ChatRecord
@@ -29,6 +31,10 @@
from system_manage.models import WorkspaceUserResourcePermission
from tools.models import Tool, ToolType
+_PERM_WITH_ROLE = ["VIEW", "MANAGE", "ROLE"]
+_PERM_DEFAULT = ["VIEW", "MANAGE"]
+TOKEN_EXPR = F("chatrecord__message_tokens") + F("chatrecord__answer_tokens")
+
def hasPermission(auth, permission):
if 'USER' in auth.role_list:
@@ -169,89 +175,55 @@ class ApplicationUserTokenRanking(serializers.Serializer):
name = serializers.CharField(required=False, allow_null=True, allow_blank=True, label=_("User Name"))
end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time"))
- def ranking(self, auth, current_page, page_size, with_valid=True):
- if with_valid:
- self.is_valid(raise_exception=True)
-
+ def get_queryset(self, auth):
workspace_id = self.validated_data.get("workspace_id")
user_id = self.validated_data.get("user_id")
start_time = get_format_time(self.data.get("start_time"))
end_time = get_format_time(self.data.get("end_time"))
name = self.data.get("name")
- base_queryset = Chat.objects.filter(
- is_deleted=False,
- chat_user_id__isnull=False,
- create_time__gte=start_time,
- create_time__lte=end_time
- ).exclude(
- chat_user_id=""
- )
- if name:
- base_queryset = base_queryset.filter(asker__username__contains=name)
- workspace_manage = is_workspace_manage(auth, workspace_id)
- if workspace_manage:
- base_queryset = base_queryset.filter(
- application__workspace_id=workspace_id
- )
- else:
- permission_list = (
- ["VIEW", "MANAGE", "ROLE"]
- if hasPermission(auth, "APPLICATION:READ")
- else ["VIEW", "MANAGE"]
- )
-
- application_id_queryset = QuerySet(WorkspaceUserResourcePermission).filter(
- workspace_id=workspace_id,
- user_id=user_id,
- auth_type="APPLICATION",
- permission_list__overlap=permission_list,
- ).annotate(
- target_uuid=Cast("target", output_field=UUIDField())
- ).values_list(
- "target_uuid",
- flat=True
+ # ---- 基础查询 ----
+ base_queryset = (
+ Chat.objects.filter(
+ is_deleted=False,
+ chat_user_id__isnull=False,
+ create_time__gte=start_time,
+ create_time__lte=end_time,
)
+ .exclude(chat_user_id="")
+ )
- base_queryset = base_queryset.filter(
- application_id__in=application_id_queryset
- )
+ if name:
+ base_queryset = base_queryset.filter(asker__username__contains=name)
- token_expr = ExpressionWrapper(
- F("chatrecord__message_tokens") + F("chatrecord__answer_tokens"),
- output_field=BigIntegerField()
+ # ---- 权限过滤 ----
+ base_queryset = self._apply_permission_filter(
+ base_queryset, auth, workspace_id, user_id
)
- latest_asker_queryset = base_queryset.filter(
- chat_user_id=OuterRef("chat_user_id"),
- chat_user_type=OuterRef("chat_user_type"),
- ).order_by(
- "-create_time"
- ).values(
- "asker"
- )[:1]
-
- queryset = base_queryset.values(
- "chat_user_id",
- "chat_user_type",
- ).annotate(
- total_tokens=Coalesce(
- Sum(token_expr),
- Value(0),
- output_field=BigIntegerField()
- ),
- chat_record_count=Count(
- "chatrecord__id",
- distinct=True
- ),
- asker=Subquery(
- latest_asker_queryset,
- output_field=JSONField()
+ # ---- 窗口函数:一次查询拿到每个用户最新的 asker ----
+ asker_map = self._build_asker_map(base_queryset)
+
+ # ---- 聚合统计 ----
+ queryset = (
+ base_queryset
+ .values("chat_user_id", "chat_user_type")
+ .annotate(
+ total_tokens=Coalesce(
+ Sum(TOKEN_EXPR),
+ Value(0),
+ output_field=BigIntegerField(),
+ ),
+ chat_record_count=Count("chatrecord__id", distinct=True),
)
- ).order_by(
- "-total_tokens"
+ .order_by("-total_tokens")
)
+ return queryset, asker_map
+ def ranking(self, auth, current_page, page_size, with_valid=True):
+ if with_valid:
+ self.is_valid(raise_exception=True)
+ queryset, asker_map = self.get_queryset(auth)
return page_search(
current_page,
page_size,
@@ -259,12 +231,87 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
lambda item: {
"chat_user_id": item["chat_user_id"],
"chat_user_type": item["chat_user_type"],
- "asker": item["asker"],
+ "asker": asker_map.get(
+ (item["chat_user_id"], item["chat_user_type"])
+ ),
"total_tokens": item["total_tokens"],
"chat_record_count": item["chat_record_count"],
- }
+ },
)
+ def export(self, auth, with_valid=True):
+ if with_valid:
+ self.is_valid(raise_exception=True)
+ queryset, asker_map = self.get_queryset(auth)
+ workbook = openpyxl.Workbook(write_only=True)
+ worksheet = workbook.create_sheet(title='Sheet1')
+ headers = [gettext('ranking'),
+ gettext('User Name'),
+ gettext('Token consumption'),
+ gettext('number of questions'),
+ ]
+ worksheet.append(headers)
+ index = 0
+ for item in queryset:
+ index += 1
+ row = [index, asker_map.get(
+ (item["chat_user_id"], item["chat_user_type"])
+ ).get('username'), item['total_tokens'], item['chat_record_count']]
+ worksheet.append(row)
+ response = HttpResponse(content_type="application/vnd.ms-excel")
+ response["Content-Disposition"] = f'attachment; filename="data.xlsx"'
+ workbook.save(response)
+ return response
+
+ def _apply_permission_filter(self, queryset, auth, workspace_id, user_id):
+ """根据用户角色过滤可见的应用范围"""
+ if is_workspace_manage(auth, workspace_id):
+ return queryset.filter(application__workspace_id=workspace_id)
+
+ permission_list = (
+ _PERM_WITH_ROLE
+ if hasPermission(auth, "APPLICATION:READ")
+ else _PERM_DEFAULT
+ )
+
+ allowed_app_ids = (
+ QuerySet(WorkspaceUserResourcePermission)
+ .filter(
+ workspace_id=workspace_id,
+ user_id=user_id,
+ auth_type="APPLICATION",
+ permission_list__overlap=permission_list,
+ )
+ .annotate(target_uuid=Cast("target", output_field=UUIDField()))
+ .values_list("target_uuid", flat=True)
+ )
+
+ return queryset.filter(application_id__in=allowed_app_ids)
+
+ @staticmethod
+ def _build_asker_map(base_queryset):
+ """
+ 用窗口函数一次查询拿到每个 (chat_user_id, chat_user_type) 最新的 asker,
+ 替代原来每行一次的 Subquery。
+ """
+ latest_rows = (
+ base_queryset
+ .annotate(
+ _rn=Window(
+ expression=RowNumber(),
+ partition_by=[F("chat_user_id"), F("chat_user_type")],
+ order_by=F("create_time").desc(),
+ )
+ )
+ .filter(_rn=1)
+ .values("chat_user_id", "chat_user_type", "asker")
+ )
+
+ return {
+ (row["chat_user_id"], row["chat_user_type"]): row["asker"]
+ for row in latest_rows
+ }
+
class ApplicationQuestionRanking(serializers.Serializer):
workspace_id = serializers.CharField(required=False, label=_('Workspace ID'))
user_id = serializers.UUIDField(required=True, label=_("User ID"))
@@ -272,10 +319,7 @@ class ApplicationQuestionRanking(serializers.Serializer):
start_time = serializers.DateField(format='%Y-%m-%d', label=_("Start time"))
end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time"))
- def ranking(self, auth, current_page, page_size, with_valid=True):
- if with_valid:
- self.is_valid(raise_exception=True)
-
+ def get_queryset(self, auth):
workspace_id = self.validated_data.get("workspace_id")
user_id = self.validated_data.get("user_id")
queryset = Application.objects.filter(workspace_id=workspace_id)
@@ -309,7 +353,7 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
.values_list("target_uuid", flat=True)
)
- queryset = queryset.annotate(
+ return queryset.annotate(
# 问题数 / 对话轮次数量
chat_record_count_total=Coalesce(
Sum(
@@ -334,6 +378,10 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
"-chat_record_count_total"
)
+ def ranking(self, auth, current_page, page_size, with_valid=True):
+ if with_valid:
+ self.is_valid(raise_exception=True)
+ queryset = self.get_queryset(auth)
return page_search(
current_page,
page_size,
@@ -346,6 +394,28 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
},
)
+ def export(self, auth, with_valid=True):
+ if with_valid:
+ self.is_valid(raise_exception=True)
+ queryset = self.get_queryset(auth)
+ workbook = openpyxl.Workbook(write_only=True)
+ worksheet = workbook.create_sheet(title='Sheet1')
+ headers = [gettext('ranking'),
+ gettext('Application Name'),
+ gettext('number of questions'),
+ gettext('active users')
+ ]
+ worksheet.append(headers)
+ index = 0
+ for item in queryset:
+ index += 1
+ row = [index, item.name, item.chat_record_count_total, item.chat_user_count]
+ worksheet.append(row)
+ response = HttpResponse(content_type="application/vnd.ms-excel")
+ response["Content-Disposition"] = f'attachment; filename="data.xlsx"'
+ workbook.save(response)
+ return response
+
class ApplicationTokensRanking(serializers.Serializer):
workspace_id = serializers.CharField(required=False, label=_('Workspace ID'))
user_id = serializers.UUIDField(required=True, label=_("User ID"))
@@ -353,9 +423,7 @@ class ApplicationTokensRanking(serializers.Serializer):
start_time = serializers.DateField(format='%Y-%m-%d', label=_("Start time"))
end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time"))
- def ranking(self, auth, current_page, page_size, with_valid=True):
- if with_valid:
- self.is_valid(raise_exception=True)
+ def get_queryset(self, auth):
start_time = get_format_time(self.data.get('start_time'))
end_time = get_format_time(self.data.get('end_time'))
name = self.data.get("name")
@@ -395,7 +463,7 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
.values_list("target_uuid", flat=True)
)
- queryset = queryset.annotate(
+ return queryset.annotate(
total_tokens=Coalesce(
Sum(
token_expr,
@@ -411,6 +479,10 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
)
).order_by("-total_tokens")
+ def ranking(self, auth, current_page, page_size, with_valid=True):
+ if with_valid:
+ self.is_valid(raise_exception=True)
+ queryset = self.get_queryset(auth)
return page_search(
current_page,
page_size,
@@ -423,6 +495,28 @@ def ranking(self, auth, current_page, page_size, with_valid=True):
}
)
+ def export(self, auth, with_valid=True):
+ if with_valid:
+ self.is_valid(raise_exception=True)
+ queryset = self.get_queryset(auth)
+ workbook = openpyxl.Workbook(write_only=True)
+ worksheet = workbook.create_sheet(title='Sheet1')
+ headers = [gettext('ranking'),
+ gettext('Application Name'),
+ gettext('Token consumption'),
+ gettext('number of questions')
+ ]
+ worksheet.append(headers)
+ index = 0
+ for item in queryset:
+ index += 1
+ row = [index, item.name, item.total_tokens, item.chat_record_count_total]
+ worksheet.append(row)
+ response = HttpResponse(content_type="application/vnd.ms-excel")
+ response["Content-Disposition"] = f'attachment; filename="data.xlsx"'
+ workbook.save(response)
+ return response
+
class ApplicationMonitoring(serializers.Serializer):
workspace_id = serializers.CharField(required=False, label=_('Workspace ID'))
user_id = serializers.UUIDField(required=True, label=_("User ID"))
diff --git a/apps/homepage/urls.py b/apps/homepage/urls.py
index f8dca911243..1c5a51ec7fa 100644
--- a/apps/homepage/urls.py
+++ b/apps/homepage/urls.py
@@ -15,5 +15,8 @@
path("workspace//homepage/tool/aggregation",views.HomePageAPI.ToolAggregation.as_view()),
path("workspace//homepage/model/aggregation",views.HomePageAPI.ModelAggregation.as_view()),
path("workspace//homepage/tokens/aggregation",views.HomePageAPI.TokensAggregation.as_view()),
- path("workspace//homepage/chat_record/aggregation",views.HomePageAPI.ChatRecordAggregation.as_view())
+ path("workspace//homepage/chat_record/aggregation",views.HomePageAPI.ChatRecordAggregation.as_view()),
+ path("workspace//homepage/question_ranking/export",views.HomePageAPI.ApplicationQuestionRankingExport.as_view()),
+ path("workspace//homepage/tokens_ranking/export",views.HomePageAPI.ApplicationTokensRankingExport.as_view()),
+ path("workspace//homepage/user_tokens_ranking/export",views.HomePageAPI.UserTokensRankingExport.as_view())
]
diff --git a/apps/homepage/views/homepage.py b/apps/homepage/views/homepage.py
index 04350e409ca..1dea5cf8d47 100644
--- a/apps/homepage/views/homepage.py
+++ b/apps/homepage/views/homepage.py
@@ -15,7 +15,7 @@
from common.auth import TokenAuth
from homepage.api.home_page_api import ApplicationTokensRankingAPI, ApplicationQuestionRankingAPI, UserTokensRankingAPI, \
ApplicationAggregationAPI, KnowledgeAggregationAPI, ToolAggregationAPI, ModelAggregationAPI, \
- ApplicationMonitoringAPI, RankingBaseAPI, TokensAggregationAPI
+ ApplicationMonitoringAPI, RankingBaseAPI, TokensAggregationAPI, RankingBaseExportAPI
from homepage.serializers.homepage import HomePageSerializer
from django.utils.translation import gettext_lazy as _
@@ -67,6 +67,28 @@ def get(self, request: Request, workspace_id: str):
'end_time')}).aggregation(
request.auth))
+ class ApplicationTokensRankingExport(APIView):
+ authentication_classes = [TokenAuth]
+
+ @extend_schema(
+ methods=["GET"],
+ description=_("Top applications by token consumption export"),
+ summary=_("Top applications by token consumption export"),
+ operation_id="homepage_application_tokens_ranking_export",
+ parameters=RankingBaseExportAPI.get_parameters(),
+ responses=RankingBaseExportAPI.get_response(),
+ tags=[_("Home page")],
+ )
+ def get(self, request: Request, workspace_id: str):
+ return HomePageSerializer.ApplicationTokensRanking(
+ data={'user_id': request.user.id, 'workspace_id': workspace_id,
+ 'start_time': request.query_params.get(
+ 'start_time'),
+ 'end_time': request.query_params.get(
+ 'end_time'),
+ "name": request.query_params.get("name")
+ }).export(request.auth)
+
class ApplicationTokensRanking(APIView):
authentication_classes = [TokenAuth]
@@ -89,6 +111,28 @@ def get(self, request: Request, workspace_id: str, current_page: int, page_size:
"name": request.query_params.get("name")
}).ranking(request.auth, current_page, page_size))
+ class ApplicationQuestionRankingExport(APIView):
+ authentication_classes = [TokenAuth]
+
+ @extend_schema(
+ methods=["GET"],
+ description=_("Top applications by question count export"),
+ summary=_("Top applications by question count export"),
+ operation_id="homepage_application_question_ranking_export",
+ parameters=RankingBaseExportAPI.get_parameters(),
+ responses=RankingBaseExportAPI.get_response(),
+ tags=[_("Home page")],
+ )
+ def get(self, request: Request, workspace_id: str):
+ return HomePageSerializer.ApplicationQuestionRanking(
+ data={'user_id': request.user.id, 'workspace_id': workspace_id,
+ 'start_time': request.query_params.get(
+ 'start_time'),
+ 'end_time': request.query_params.get(
+ 'end_time'),
+ "name": request.query_params.get("name")
+ }).export(request.auth)
+
class ApplicationQuestionRanking(APIView):
authentication_classes = [TokenAuth]
@@ -111,6 +155,27 @@ def get(self, request: Request, workspace_id: str, current_page: int, page_size:
"name": request.query_params.get("name")
}).ranking(request.auth, current_page, page_size))
+ class UserTokensRankingExport(APIView):
+ authentication_classes = [TokenAuth]
+
+ @extend_schema(
+ methods=["GET"],
+ description=_("Top users by token consumption export"),
+ summary=_("Top users by token consumption export"),
+ operation_id="homepage_user_tokens_ranking_export",
+ parameters=RankingBaseExportAPI.get_parameters(),
+ responses=RankingBaseExportAPI.get_response(),
+ tags=[_("Home page")],
+ )
+ def get(self, request: Request, workspace_id: str):
+ return HomePageSerializer.ApplicationUserTokenRanking(
+ data={'user_id': request.user.id, 'workspace_id': workspace_id,
+ 'start_time': request.query_params.get(
+ 'start_time'),
+ 'end_time': request.query_params.get(
+ 'end_time'),
+ "name": request.query_params.get("name")}).export(request.auth)
+
class UserTokensRanking(APIView):
authentication_classes = [TokenAuth]