diff --git a/dashscope/aigc/video_synthesis.py b/dashscope/aigc/video_synthesis.py index 008faec..b2ab58a 100644 --- a/dashscope/aigc/video_synthesis.py +++ b/dashscope/aigc/video_synthesis.py @@ -3,12 +3,19 @@ from typing import Any, Dict, Union, List +from concurrent.futures import ThreadPoolExecutor, as_completed + from dashscope.api_entities.dashscope_response import ( DashScopeAPIResponse, VideoSynthesisResponse, ) from dashscope.client.base_api import BaseAsyncApi, BaseAsyncAioApi -from dashscope.common.constants import PROMPT, REFERENCE_VIDEO_URLS +from dashscope.common.constants import ( + PROMPT, + REFERENCE_VIDEO_URLS, + REFERENCE_URLS, + MEDIA_URLS, +) from dashscope.common.utils import _get_task_group_and_task from dashscope.utils.oss_utils import check_and_upload_local @@ -34,6 +41,16 @@ class Models: wanx_2_1_kf2v_plus = "wanx2.1-kf2v-plus" wanx_kf2v = "wanx-kf2v" + class MediaType: + FIRST_FRAME = "first_frame" + LAST_FRAME = "last_frame" + REFERENCE_IMAGE = "reference_image" + REFERENCE_VIDEO = "reference_video" + REFERENCE_VOICE = "reference_voice" + VIDEO = "video" + FIRST_CLIP = "first_clip" + DRIVING_AUDIO = "driving_audio" + @classmethod def call( # type: ignore[override] cls, @@ -46,6 +63,8 @@ def call( # type: ignore[override] img_url: str = None, audio_url: str = None, reference_video_urls: List[str] = None, + reference_urls: List[str] = None, + reference_url: str = None, reference_video_description: List[str] = None, api_key: str = None, extra_input: Dict = None, @@ -55,6 +74,7 @@ def call( # type: ignore[override] tail_frame: str = None, first_frame_url: str = None, last_frame_url: str = None, + media: List[Dict] = None, **kwargs, ) -> VideoSynthesisResponse: """Call video synthesis service and get result. @@ -68,13 +88,18 @@ def call( # type: ignore[override] img_url (str): The input image url, Generate the URL of the image referenced by the video. # pylint: disable=line-too-long audio_url (str): The input audio url reference_video_urls (List[str]): list of character reference video file urls uploaded by the user # pylint: disable=line-too-long + reference_urls (List[str]): list of character reference file urls uploaded by the user # pylint: disable=line-too-long + reference_url str: reference file url uploaded by the user # pylint: disable=line-too-long reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported # pylint: disable=line-too-long api_key (str, optional): The api api_key. Defaults to None. workspace (str): The dashscope workspace id. extra_input (Dict): The extra input parameters. task (str): The task of api, ref doc. + head_frame (str): The URL of the first frame image for generating the video. + tail_frame (str): The URL of the last frame image for generating the video. first_frame_url (str): The URL of the first frame image for generating the video. last_frame_url (str): The URL of the last frame image for generating the video. + media (list): media file list **kwargs: size(str, `optional`): The output video size(width*height). duration( @@ -95,6 +120,8 @@ def call( # type: ignore[override] img_url=img_url, audio_url=audio_url, reference_video_urls=reference_video_urls, + reference_urls=reference_urls, + reference_url=reference_url, reference_video_description=reference_video_description, api_key=api_key, extend_prompt=extend_prompt, @@ -107,6 +134,7 @@ def call( # type: ignore[override] tail_frame=tail_frame, first_frame_url=first_frame_url, last_frame_url=last_frame_url, + media=media, **kwargs, ) @@ -119,6 +147,8 @@ def _get_input( # pylint: disable=too-many-branches img_url: str = None, audio_url: str = None, reference_video_urls: List[str] = None, + reference_urls: List[str] = None, + reference_url: str = None, reference_video_description: List[str] = None, # """@deprecated, use prompt_extend in parameters """ extend_prompt: bool = True, @@ -132,6 +162,7 @@ def _get_input( # pylint: disable=too-many-branches tail_frame: str = None, first_frame_url: str = None, last_frame_url: str = None, + media: List[Dict] = None, **kwargs, ): inputs = {PROMPT: prompt, "extend_prompt": extend_prompt} @@ -147,117 +178,138 @@ def _get_input( # pylint: disable=too-many-branches has_upload = False upload_certificate = None - if img_url is not None and img_url: - ( - is_upload, - res_img_url, - upload_certificate, - ) = check_and_upload_local( - model, - img_url, - api_key, - upload_certificate, # type: ignore[arg-type] - ) - if is_upload: - has_upload = True - inputs["img_url"] = res_img_url - - if audio_url is not None and audio_url: - ( - is_upload, - res_audio_url, - upload_certificate, - ) = check_and_upload_local( - model, - audio_url, - api_key, - upload_certificate, # type: ignore[arg-type] - ) - if is_upload: - has_upload = True - inputs["audio_url"] = res_audio_url - - if head_frame is not None and head_frame: - ( - is_upload, - res_head_frame, - upload_certificate, - ) = check_and_upload_local( - model, - head_frame, - api_key, - upload_certificate, # type: ignore[arg-type] - ) - if is_upload: - has_upload = True - inputs["head_frame"] = res_head_frame - - if tail_frame is not None and tail_frame: - ( - is_upload, - res_tail_frame, - upload_certificate, - ) = check_and_upload_local( - model, - tail_frame, - api_key, - upload_certificate, # type: ignore[arg-type] - ) - if is_upload: - has_upload = True - inputs["tail_frame"] = res_tail_frame - - if first_frame_url is not None and first_frame_url: - ( - is_upload, - res_first_frame_url, - upload_certificate, - ) = check_and_upload_local( - model, - first_frame_url, - api_key, - upload_certificate, # type: ignore[arg-type] - ) - if is_upload: - has_upload = True - inputs["first_frame_url"] = res_first_frame_url - - if last_frame_url is not None and last_frame_url: - ( - is_upload, - res_last_frame_url, - upload_certificate, - ) = check_and_upload_local( + tasks: List[Dict] = [] + + single_params = { + "img_url": img_url, + "audio_url": audio_url, + "head_frame": head_frame, + "tail_frame": tail_frame, + "first_frame_url": first_frame_url, + "last_frame_url": last_frame_url, + "reference_url": reference_url, + } + + for key, url in single_params.items(): + if url is not None and url: + tasks.append( + { + "type": "single", + "key": key, + "url": url, + }, + ) + + if reference_video_urls: + for idx, url in enumerate(reference_video_urls): + if url: + tasks.append( + { + "type": "list_ref_video", + "index": idx, + "url": url, + }, + ) + + if reference_urls: + for idx, url in enumerate(reference_urls): + if url: + tasks.append( + { + "type": "list_ref_file", + "index": idx, + "url": url, + }, + ) + + if media: + for i, m_file in enumerate(media): + if isinstance(m_file, dict): + if m_file.get("url"): + tasks.append( + { + "type": "media", + "index": i, + "field": "url", + "url": m_file["url"], + }, + ) + if m_file.get("reference_voice"): + tasks.append( + { + "type": "media", + "index": i, + "field": "reference_voice", + "url": m_file["reference_voice"], + }, + ) + + def upload_worker(task_item, current_cert): + url = task_item["url"] + is_up, res_url, cert = check_and_upload_local( model, - last_frame_url, + url, api_key, - upload_certificate, # type: ignore[arg-type] + current_cert, ) - if is_upload: - has_upload = True - inputs["last_frame_url"] = res_last_frame_url - - if ( - reference_video_urls is not None - and reference_video_urls - and len(reference_video_urls) > 0 - ): - new_videos = [] - for video in reference_video_urls: - ( - is_upload, - new_video, - upload_certificate, - ) = check_and_upload_local( - model, - video, - api_key, - upload_certificate, # type: ignore[arg-type] + return task_item, is_up, res_url, cert + + results = [] + + with ThreadPoolExecutor(max_workers=10) as executor: + futures = [ + executor.submit(upload_worker, t, upload_certificate) + for t in tasks + ] + + for future in as_completed(futures): + task_item, is_up, res_url, cert = future.result() + + results.append( + { + "task": task_item, + "is_upload": is_up, + "new_url": res_url, + "cert": cert, + }, ) - if is_upload: - has_upload = True - new_videos.append(new_video) - inputs[REFERENCE_VIDEO_URLS] = new_videos + + for res in results: + if res["is_upload"]: + has_upload = True + + new_url = res["new_url"] + task_info = res["task"] + t_type = task_info["type"] + + if t_type == "single": + inputs[task_info["key"]] = new_url + + elif t_type == "list_ref_video": + if REFERENCE_VIDEO_URLS not in inputs: + inputs[REFERENCE_VIDEO_URLS] = ( + list(reference_video_urls) + if reference_video_urls + else [] + ) + + inputs[REFERENCE_VIDEO_URLS][task_info["index"]] = new_url + + elif t_type == "list_ref_file": + if REFERENCE_URLS not in inputs: + inputs[REFERENCE_URLS] = ( + list(reference_urls) if reference_urls else [] + ) + inputs[REFERENCE_URLS][task_info["index"]] = new_url + + elif t_type == "media": + if MEDIA_URLS not in inputs: + inputs[MEDIA_URLS] = media + + idx = task_info["index"] + field = task_info["field"] + if idx < len(inputs[MEDIA_URLS]): + inputs[MEDIA_URLS][idx][field] = new_url if extra_input is not None and extra_input: inputs = {**inputs, **extra_input} @@ -282,6 +334,8 @@ def async_call( # pylint: disable=arguments-renamed # type: ignore[override] # img_url: str = None, audio_url: str = None, reference_video_urls: List[str] = None, + reference_urls: List[str] = None, + reference_url: str = None, reference_video_description: List[str] = None, # """@deprecated, use prompt_extend in parameters """ extend_prompt: bool = True, @@ -295,6 +349,7 @@ def async_call( # pylint: disable=arguments-renamed # type: ignore[override] # tail_frame: str = None, first_frame_url: str = None, last_frame_url: str = None, + media: List[Dict] = None, **kwargs, ) -> VideoSynthesisResponse: """Create a video synthesis task, and return task information. @@ -308,13 +363,18 @@ def async_call( # pylint: disable=arguments-renamed # type: ignore[override] # img_url (str): The input image url, Generate the URL of the image referenced by the video. # pylint: disable=line-too-long audio_url (str): The input audio url. reference_video_urls (List[str]): list of character reference video file urls uploaded by the user # pylint: disable=line-too-long + reference_urls (List[str]): list of character reference file urls uploaded by the user # pylint: disable=line-too-long + reference_url (str): reference file url uploaded by the user # pylint: disable=line-too-long reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported # pylint: disable=line-too-long api_key (str, optional): The api api_key. Defaults to None. workspace (str): The dashscope workspace id. extra_input (Dict): The extra input parameters. task (str): The task of api, ref doc. + head_frame (str): The URL of the first frame image for generating the video. + tail_frame (str): The URL of the last frame image for generating the video. first_frame_url (str): The URL of the first frame image for generating the video. last_frame_url (str): The URL of the last frame image for generating the video. + media (list): media file list **kwargs: size(str, `optional`): The output video size(width*height). duration( @@ -338,6 +398,8 @@ def async_call( # pylint: disable=arguments-renamed # type: ignore[override] # img_url, audio_url, reference_video_urls, + reference_urls, + reference_url, reference_video_description, extend_prompt, negative_prompt, @@ -350,6 +412,7 @@ def async_call( # pylint: disable=arguments-renamed # type: ignore[override] # tail_frame, first_frame_url, last_frame_url, + media, **kwargs, ) @@ -490,6 +553,8 @@ async def call( # type: ignore[override] # pylint: disable=arguments-renamed # img_url: str = None, audio_url: str = None, reference_video_urls: List[str] = None, + reference_urls: List[str] = None, + reference_url: str = None, reference_video_description: List[str] = None, # """@deprecated, use prompt_extend in parameters """ extend_prompt: bool = True, @@ -503,6 +568,7 @@ async def call( # type: ignore[override] # pylint: disable=arguments-renamed # tail_frame: str = None, first_frame_url: str = None, last_frame_url: str = None, + media: List[Dict] = None, **kwargs, ) -> VideoSynthesisResponse: """Call video synthesis service and get result. @@ -516,13 +582,18 @@ async def call( # type: ignore[override] # pylint: disable=arguments-renamed # img_url (str): The input image url, Generate the URL of the image referenced by the video. # pylint: disable=line-too-long audio_url (str): The input audio url. reference_video_urls (List[str]): list of character reference video file urls uploaded by the user # pylint: disable=line-too-long + reference_urls (List[str]): list of character reference file urls uploaded by the user # pylint: disable=line-too-long + reference_url (str): reference file url uploaded by the user # pylint: disable=line-too-long reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported # pylint: disable=line-too-long api_key (str, optional): The api api_key. Defaults to None. workspace (str): The dashscope workspace id. extra_input (Dict): The extra input parameters. task (str): The task of api, ref doc. + head_frame (str): The URL of the first frame image for generating the video. + tail_frame (str): The URL of the last frame image for generating the video. first_frame_url (str): The URL of the first frame image for generating the video. last_frame_url (str): The URL of the last frame image for generating the video. + media (list): media file list **kwargs: size(str, `optional`): The output video size(width*height). duration( @@ -546,6 +617,8 @@ async def call( # type: ignore[override] # pylint: disable=arguments-renamed # img_url, audio_url, reference_video_urls, + reference_urls, + reference_url, reference_video_description, extend_prompt, negative_prompt, @@ -558,6 +631,7 @@ async def call( # type: ignore[override] # pylint: disable=arguments-renamed # tail_frame, first_frame_url, last_frame_url, + media, **kwargs, ) response = await super().call( @@ -582,6 +656,8 @@ async def async_call( # type: ignore[override] # pylint: disable=arguments-rena img_url: str = None, audio_url: str = None, reference_video_urls: List[str] = None, + reference_urls: List[str] = None, + reference_url: str = None, reference_video_description: List[str] = None, # """@deprecated, use prompt_extend in parameters """ extend_prompt: bool = True, @@ -595,6 +671,7 @@ async def async_call( # type: ignore[override] # pylint: disable=arguments-rena tail_frame: str = None, first_frame_url: str = None, last_frame_url: str = None, + media: List[Dict] = None, **kwargs, ) -> VideoSynthesisResponse: """Create a video synthesis task, and return task information. @@ -608,13 +685,18 @@ async def async_call( # type: ignore[override] # pylint: disable=arguments-rena img_url (str): The input image url, Generate the URL of the image referenced by the video. # pylint: disable=line-too-long audio_url (str): The input audio url. reference_video_urls (List[str]): list of character reference video file urls uploaded by the user # pylint: disable=line-too-long + reference_urls (List[str]): list of character reference file urls uploaded by the user # pylint: disable=line-too-long + reference_url (str): reference file url uploaded by the user # pylint: disable=line-too-long reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported # pylint: disable=line-too-long api_key (str, optional): The api api_key. Defaults to None. workspace (str): The dashscope workspace id. extra_input (Dict): The extra input parameters. task (str): The task of api, ref doc. + head_frame (str): The URL of the first frame image for generating the video. + tail_frame (str): The URL of the last frame image for generating the video. first_frame_url (str): The URL of the first frame image for generating the video. last_frame_url (str): The URL of the last frame image for generating the video. + media (list): media file list **kwargs: size(str, `optional`): The output video size(width*height). duration( @@ -640,6 +722,8 @@ async def async_call( # type: ignore[override] # pylint: disable=arguments-rena img_url, audio_url, reference_video_urls, + reference_urls, + reference_url, reference_video_description, extend_prompt, negative_prompt, @@ -652,6 +736,7 @@ async def async_call( # type: ignore[override] # pylint: disable=arguments-rena tail_frame, first_frame_url, last_frame_url, + media, **kwargs, ) diff --git a/dashscope/common/constants.py b/dashscope/common/constants.py index b8331f0..6fbecd9 100644 --- a/dashscope/common/constants.py +++ b/dashscope/common/constants.py @@ -29,6 +29,8 @@ CUSTOMIZED_MODEL_ID = "customized_model_id" IMAGES = "images" REFERENCE_VIDEO_URLS = "reference_video_urls" +REFERENCE_URLS = "reference_urls" +MEDIA_URLS = "media" TEXT_EMBEDDING_INPUT_KEY = "texts" SERVICE_503_MESSAGE = "Service temporarily unavailable, possibly overloaded or not ready." # noqa E501 # pylint: disable=line-too-long WEBSOCKET_ERROR_CODE = 44 diff --git a/samples/test_video_synthesis.py b/samples/test_video_synthesis.py index b9d6cb7..dc99778 100644 --- a/samples/test_video_synthesis.py +++ b/samples/test_video_synthesis.py @@ -5,7 +5,7 @@ prompt = "一只小猫在月光下奔跑" audio_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3" -reference_video_urls = [ +reference_urls = [ "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/with_human_voice_11s.mov", ] api_key = os.getenv("DASHSCOPE_API_KEY") @@ -16,7 +16,7 @@ def simple_call(): rsp = VideoSynthesis.call( api_key=api_key, model="wan2.6-r2v", - reference_video_urls=reference_video_urls, + reference_urls=reference_urls, shot_type="multi", audio=True, watermark=True, @@ -31,5 +31,106 @@ def simple_call(): ) +def simple_call_t2v(): + print("----sync call, please wait a moment----") + rsp = VideoSynthesis.call( + api_key=api_key, + model="wan2.7-t2v", + audio=True, + watermark=True, + prompt=prompt, + ) + if rsp.status_code == HTTPStatus.OK: + print("response: %s" % rsp) + else: + print( + "sync_call Failed, status_code: %s, code: %s, message: %s" + % (rsp.status_code, rsp.code, rsp.message), + ) + +media_i2v = [ + { + "type": VideoSynthesis.MediaType.LAST_FRAME, + "url": "https://wanx.alicdn.com/material/20250318/last_frame.png" + }, + { + "type": VideoSynthesis.MediaType.FIRST_FRAME, + "url": "https://wanx.alicdn.com/material/20250318/first_frame.png" + }, + { + "url": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/mp3_3s.mp3", + "type": VideoSynthesis.MediaType.DRIVING_AUDIO + } +] + +def simple_call_wan27_i2v(): + print("----sync call, please wait a moment----") + rsp = VideoSynthesis.call( + api_key=api_key, + model="wan2.7-i2v", + media=media_i2v, + audio=True, + watermark=True, + prompt=prompt, + ) + if rsp.status_code == HTTPStatus.OK: + print("response: %s" % rsp) + else: + print( + "sync_call Failed, status_code: %s, code: %s, message: %s" + % (rsp.status_code, rsp.code, rsp.message), + ) + +media_r2v = [ + { + "type": VideoSynthesis.MediaType.REFERENCE_IMAGE, + "url": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/image/res240_269.jpg" + }, + { + "type": VideoSynthesis.MediaType.REFERENCE_IMAGE, + "url": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/image/man_5K_7_7K_18_4M.JPG", + "reference_voice": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/2s.wav" + }, + { + "type": VideoSynthesis.MediaType.REFERENCE_VIDEO, + "url": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/cast/100M.mov", + "reference_voice": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/mp3_1s.mp3" + }, + { + "type": VideoSynthesis.MediaType.REFERENCE_VIDEO, + "url": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/cast/29_99s.mp4", + "reference_description": "这是一个身穿蓝衣的男子,他有着浓密的络腮胡" + }, + { + "type": VideoSynthesis.MediaType.REFERENCE_VIDEO, + "url": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/cast/cat_127.mp4", + "reference_voice": "https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/wav_10s.wav", + "reference_description": "这是一只毛绒小猫,它正在对着镜头微笑" + } +] + +def simple_call_wan27_r2v(): + print("----sync call, please wait a moment----") + rsp = VideoSynthesis.call( + api_key=api_key, + model="wan2.7-r2v", + media=media_r2v, + audio=True, + watermark=True, + prompt=prompt, + ) + if rsp.status_code == HTTPStatus.OK: + print("response: %s" % rsp) + else: + print( + "sync_call Failed, status_code: %s, code: %s, message: %s" + % (rsp.status_code, rsp.code, rsp.message), + ) + + + if __name__ == "__main__": simple_call() + # simple_call_t2v() + # simple_call_wan27_i2v() + # simple_call_wan27_r2v()