Add support for API Nodes in ComfyUI. (#7726)

* Add Ideogram generate node.

* Add staging api.

* COMFY_API_NODE_NAME node property

* switch to boolean flag and use original node name for id

* add optional to type

* Add API_NODE and common error for missing auth token (#5)

* Add Minimax Video Generation + Async Task queue polling example (#6)

* [Minimax] Show video preview and embed workflow in ouput (#7)

* [API Nodes] Send empty request body instead of empty dictionary. (#8)

* Fixed: removed function from rebase.

* Add pydantic.

* Remove uv.lock

* Remove polling operations.

* Update stubs workflow.

* Remove polling comments.

* Update stubs.

* Use pydantic v2.

* Use pydantic v2.

* Add basic OpenAITextToImage node

* Add.

* convert image to tensor.

* Improve types.

* Ruff.

* Push tests.

* Handle multi-form data.

- Don't set content-type for multi-part/form
- Use data field instead of JSON

* Change to api.comfy.org

* Handle error code 409.

* separate out nodes per openai model

* Update error message.

* fix wrong output type

* re-categorize nodes, remove ideogram (for now)

* oops, fix mappings

* fix ruff

* Update frontend  to 1.17.9

* embargo lift rename nodes

* remove unused autogenerated model code

* fix API type error and add b64 support for 4o

* fix ruff

* oops forgot mask scaling code

* Remove unused types.

---------

Co-authored-by: bymyself <cbyrne@comfy.org>
Co-authored-by: Yoland Y <4950057+yoland68@users.noreply.github.com>
Co-authored-by: thot-experiment <thot@thiic.cc>
This commit is contained in:
Robin Huang 2025-04-23 12:38:34 -07:00 committed by GitHub
parent 154f2911aa
commit dea1c7474a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 979 additions and 2 deletions

47
.github/workflows/update-api-stubs.yml vendored Normal file
View File

@ -0,0 +1,47 @@
name: Generate Pydantic Stubs from api.comfy.org
on:
schedule:
- cron: '0 0 * * 1'
workflow_dispatch:
jobs:
generate-models:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install 'datamodel-code-generator[http]'
- name: Generate API models
run: |
datamodel-codegen --use-subclass-enum --url https://api.comfy.org/openapi --output comfy_api_nodes/apis --output-model-type pydantic_v2.BaseModel
- name: Check for changes
id: git-check
run: |
git diff --exit-code comfy_api_nodes/apis || echo "changes=true" >> $GITHUB_OUTPUT
- name: Create Pull Request
if: steps.git-check.outputs.changes == 'true'
uses: peter-evans/create-pull-request@v5
with:
commit-message: 'chore: update API models from OpenAPI spec'
title: 'Update API models from api.comfy.org'
body: |
This PR updates the API models based on the latest api.comfy.org OpenAPI specification.
Generated automatically by the a Github workflow.
branch: update-api-stubs
delete-branch: true
base: main

View File

@ -0,0 +1,17 @@
# generated by datamodel-codegen:
# filename: https://api.comfy.org/openapi
# timestamp: 2025-04-23T15:56:33+00:00
from __future__ import annotations
from typing import Optional
from pydantic import BaseModel
from . import PixverseDto
class ResponseData(BaseModel):
ErrCode: Optional[int] = None
ErrMsg: Optional[str] = None
Resp: Optional[PixverseDto.V2OpenAPII2VResp] = None

View File

@ -0,0 +1,57 @@
# generated by datamodel-codegen:
# filename: https://api.comfy.org/openapi
# timestamp: 2025-04-23T15:56:33+00:00
from __future__ import annotations
from typing import Optional
from pydantic import BaseModel, Field, constr
class V2OpenAPII2VResp(BaseModel):
video_id: Optional[int] = Field(None, description='Video_id')
class V2OpenAPIT2VReq(BaseModel):
aspect_ratio: str = Field(
..., description='Aspect ratio (16:9, 4:3, 1:1, 3:4, 9:16)', examples=['16:9']
)
duration: int = Field(
...,
description='Video duration (5, 8 seconds, --model=v3.5 only allows 5,8; --quality=1080p does not support 8s)',
examples=[5],
)
model: str = Field(
..., description='Model version (only supports v3.5)', examples=['v3.5']
)
motion_mode: Optional[str] = Field(
'normal',
description='Motion mode (normal, fast, --fast only available when duration=5; --quality=1080p does not support fast)',
examples=['normal'],
)
negative_prompt: Optional[constr(max_length=2048)] = Field(
None, description='Negative prompt\n'
)
prompt: constr(max_length=2048) = Field(..., description='Prompt')
quality: str = Field(
...,
description='Video quality ("360p"(Turbo model), "540p", "720p", "1080p")',
examples=['540p'],
)
seed: Optional[int] = Field(None, description='Random seed, range: 0 - 2147483647')
style: Optional[str] = Field(
None,
description='Style (effective when model=v3.5, "anime", "3d_animation", "clay", "comic", "cyberpunk") Do not include style parameter unless needed',
examples=['anime'],
)
template_id: Optional[int] = Field(
None,
description='Template ID (template_id must be activated before use)',
examples=[302325299692608],
)
water_mark: Optional[bool] = Field(
False,
description='Watermark (true: add watermark, false: no watermark)',
examples=[False],
)

View File

@ -0,0 +1,422 @@
# generated by datamodel-codegen:
# filename: https://api.comfy.org/openapi
# timestamp: 2025-04-23T15:56:33+00:00
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import AnyUrl, BaseModel, Field, confloat, conint
class Customer(BaseModel):
createdAt: Optional[datetime] = Field(
None, description='The date and time the user was created'
)
email: Optional[str] = Field(None, description='The email address for this user')
id: str = Field(..., description='The firebase UID of the user')
name: Optional[str] = Field(None, description='The name for this user')
updatedAt: Optional[datetime] = Field(
None, description='The date and time the user was last updated'
)
class Error(BaseModel):
details: Optional[List[str]] = Field(
None,
description='Optional detailed information about the error or hints for resolving it.',
)
message: Optional[str] = Field(
None, description='A clear and concise description of the error.'
)
class ErrorResponse(BaseModel):
error: str
message: str
class ImageRequest(BaseModel):
aspect_ratio: Optional[str] = Field(
None,
description="Optional. The aspect ratio (e.g., 'ASPECT_16_9', 'ASPECT_1_1'). Cannot be used with resolution. Defaults to 'ASPECT_1_1' if unspecified.",
)
color_palette: Optional[Dict[str, Any]] = Field(
None, description='Optional. Color palette object. Only for V_2, V_2_TURBO.'
)
magic_prompt_option: Optional[str] = Field(
None, description="Optional. MagicPrompt usage ('AUTO', 'ON', 'OFF')."
)
model: str = Field(..., description="The model used (e.g., 'V_2', 'V_2A_TURBO')")
negative_prompt: Optional[str] = Field(
None,
description='Optional. Description of what to exclude. Only for V_1, V_1_TURBO, V_2, V_2_TURBO.',
)
num_images: Optional[conint(ge=1, le=8)] = Field(
1, description='Optional. Number of images to generate (1-8). Defaults to 1.'
)
prompt: str = Field(
..., description='Required. The prompt to use to generate the image.'
)
resolution: Optional[str] = Field(
None,
description="Optional. Resolution (e.g., 'RESOLUTION_1024_1024'). Only for model V_2. Cannot be used with aspect_ratio.",
)
seed: Optional[conint(ge=0, le=2147483647)] = Field(
None, description='Optional. A number between 0 and 2147483647.'
)
style_type: Optional[str] = Field(
None,
description="Optional. Style type ('AUTO', 'GENERAL', 'REALISTIC', 'DESIGN', 'RENDER_3D', 'ANIME'). Only for models V_2 and above.",
)
class Datum(BaseModel):
is_image_safe: Optional[bool] = Field(
None, description='Indicates whether the image is considered safe.'
)
prompt: Optional[str] = Field(
None, description='The prompt used to generate this image.'
)
resolution: Optional[str] = Field(
None, description="The resolution of the generated image (e.g., '1024x1024')."
)
seed: Optional[int] = Field(
None, description='The seed value used for this generation.'
)
style_type: Optional[str] = Field(
None,
description="The style type used for generation (e.g., 'REALISTIC', 'ANIME').",
)
url: Optional[str] = Field(None, description='URL to the generated image.')
class Code(Enum):
int_1100 = 1100
int_1101 = 1101
int_1102 = 1102
int_1103 = 1103
class Code1(Enum):
int_1000 = 1000
int_1001 = 1001
int_1002 = 1002
int_1003 = 1003
int_1004 = 1004
class AspectRatio(str, Enum):
field_16_9 = '16:9'
field_9_16 = '9:16'
field_1_1 = '1:1'
class Config(BaseModel):
horizontal: Optional[confloat(ge=-10.0, le=10.0)] = None
pan: Optional[confloat(ge=-10.0, le=10.0)] = None
roll: Optional[confloat(ge=-10.0, le=10.0)] = None
tilt: Optional[confloat(ge=-10.0, le=10.0)] = None
vertical: Optional[confloat(ge=-10.0, le=10.0)] = None
zoom: Optional[confloat(ge=-10.0, le=10.0)] = None
class Type(str, Enum):
simple = 'simple'
down_back = 'down_back'
forward_up = 'forward_up'
right_turn_forward = 'right_turn_forward'
left_turn_forward = 'left_turn_forward'
class CameraControl(BaseModel):
config: Optional[Config] = None
type: Optional[Type] = Field(None, description='Predefined camera movements type')
class Duration(str, Enum):
field_5 = 5
field_10 = 10
class Mode(str, Enum):
std = 'std'
pro = 'pro'
class TaskInfo(BaseModel):
external_task_id: Optional[str] = None
class Video(BaseModel):
duration: Optional[str] = Field(None, description='Total video duration')
id: Optional[str] = Field(None, description='Generated video ID')
url: Optional[AnyUrl] = Field(None, description='URL for generated video')
class TaskResult(BaseModel):
videos: Optional[List[Video]] = None
class TaskStatus(str, Enum):
submitted = 'submitted'
processing = 'processing'
succeed = 'succeed'
failed = 'failed'
class Data(BaseModel):
created_at: Optional[int] = Field(None, description='Task creation time')
task_id: Optional[str] = Field(None, description='Task ID')
task_info: Optional[TaskInfo] = None
task_result: Optional[TaskResult] = None
task_status: Optional[TaskStatus] = None
updated_at: Optional[int] = Field(None, description='Task update time')
class AspectRatio1(str, Enum):
field_16_9 = '16:9'
field_9_16 = '9:16'
field_1_1 = '1:1'
field_4_3 = '4:3'
field_3_4 = '3:4'
field_3_2 = '3:2'
field_2_3 = '2:3'
field_21_9 = '21:9'
class ImageReference(str, Enum):
subject = 'subject'
face = 'face'
class Image(BaseModel):
index: Optional[int] = Field(None, description='Image Number (0-9)')
url: Optional[AnyUrl] = Field(None, description='URL for generated image')
class TaskResult1(BaseModel):
images: Optional[List[Image]] = None
class Data1(BaseModel):
created_at: Optional[int] = Field(None, description='Task creation time')
task_id: Optional[str] = Field(None, description='Task ID')
task_result: Optional[TaskResult1] = None
task_status: Optional[TaskStatus] = None
task_status_msg: Optional[str] = Field(None, description='Task status information')
updated_at: Optional[int] = Field(None, description='Task update time')
class AspectRatio2(str, Enum):
field_16_9 = '16:9'
field_9_16 = '9:16'
field_1_1 = '1:1'
class CameraControl1(BaseModel):
config: Optional[Config] = None
type: Optional[Type] = Field(None, description='Predefined camera movements type')
class ModelName2(str, Enum):
kling_v1 = 'kling-v1'
kling_v1_6 = 'kling-v1-6'
class TaskResult2(BaseModel):
videos: Optional[List[Video]] = None
class Data2(BaseModel):
created_at: Optional[int] = Field(None, description='Task creation time')
task_id: Optional[str] = Field(None, description='Task ID')
task_info: Optional[TaskInfo] = None
task_result: Optional[TaskResult2] = None
task_status: Optional[TaskStatus] = None
updated_at: Optional[int] = Field(None, description='Task update time')
class Code2(Enum):
int_1200 = 1200
int_1201 = 1201
int_1202 = 1202
int_1203 = 1203
class ResourcePackType(str, Enum):
decreasing_total = 'decreasing_total'
constant_period = 'constant_period'
class Status(str, Enum):
toBeOnline = 'toBeOnline'
online = 'online'
expired = 'expired'
runOut = 'runOut'
class ResourcePackSubscribeInfo(BaseModel):
effective_time: Optional[int] = Field(
None, description='Effective time, Unix timestamp in ms'
)
invalid_time: Optional[int] = Field(
None, description='Expiration time, Unix timestamp in ms'
)
purchase_time: Optional[int] = Field(
None, description='Purchase time, Unix timestamp in ms'
)
remaining_quantity: Optional[float] = Field(
None, description='Remaining quantity (updated with a 12-hour delay)'
)
resource_pack_id: Optional[str] = Field(None, description='Resource package ID')
resource_pack_name: Optional[str] = Field(None, description='Resource package name')
resource_pack_type: Optional[ResourcePackType] = Field(
None,
description='Resource package type (decreasing_total=decreasing total, constant_period=constant periodicity)',
)
status: Optional[Status] = Field(None, description='Resource Package Status')
total_quantity: Optional[float] = Field(None, description='Total quantity')
class Background(str, Enum):
transparent = 'transparent'
opaque = 'opaque'
class Moderation(str, Enum):
low = 'low'
auto = 'auto'
class OutputFormat(str, Enum):
png = 'png'
webp = 'webp'
jpeg = 'jpeg'
class Quality(str, Enum):
low = 'low'
medium = 'medium'
high = 'high'
class OpenAIImageEditRequest(BaseModel):
background: Optional[str] = Field(
None, description='Background transparency', examples=['opaque']
)
model: str = Field(
..., description='The model to use for image editing', examples=['gpt-image-1']
)
moderation: Optional[Moderation] = Field(
None, description='Content moderation setting', examples=['auto']
)
n: Optional[int] = Field(
None, description='The number of images to generate', examples=[1]
)
output_compression: Optional[int] = Field(
None, description='Compression level for JPEG or WebP (0-100)', examples=[100]
)
output_format: Optional[OutputFormat] = Field(
None, description='Format of the output image', examples=['png']
)
prompt: str = Field(
...,
description='A text description of the desired edit',
examples=['Give the rocketship rainbow coloring'],
)
quality: Optional[str] = Field(
None, description='The quality of the edited image', examples=['low']
)
size: Optional[str] = Field(
None, description='Size of the output image', examples=['1024x1024']
)
user: Optional[str] = Field(
None,
description='A unique identifier for end-user monitoring',
examples=['user-1234'],
)
class Quality1(str, Enum):
low = 'low'
medium = 'medium'
high = 'high'
standard = 'standard'
hd = 'hd'
class ResponseFormat(str, Enum):
url = 'url'
b64_json = 'b64_json'
class Style(str, Enum):
vivid = 'vivid'
natural = 'natural'
class OpenAIImageGenerationRequest(BaseModel):
background: Optional[Background] = Field(
None, description='Background transparency', examples=['opaque']
)
model: Optional[str] = Field(
None, description='The model to use for image generation', examples=['dall-e-3']
)
moderation: Optional[Moderation] = Field(
None, description='Content moderation setting', examples=['auto']
)
n: Optional[int] = Field(
None,
description='The number of images to generate (1-10). Only 1 supported for dall-e-3.',
examples=[1],
)
output_compression: Optional[int] = Field(
None, description='Compression level for JPEG or WebP (0-100)', examples=[100]
)
output_format: Optional[OutputFormat] = Field(
None, description='Format of the output image', examples=['png']
)
prompt: str = Field(
...,
description='A text description of the desired image',
examples=['Draw a rocket in front of a blackhole in deep space'],
)
quality: Optional[Quality1] = Field(
None, description='The quality of the generated image', examples=['high']
)
response_format: Optional[ResponseFormat] = Field(
None, description='Response format of image data', examples=['b64_json']
)
size: Optional[str] = Field(
None,
description='Size of the image (e.g., 1024x1024, 1536x1024, auto)',
examples=['1024x1536'],
)
style: Optional[Style] = Field(
None, description='Style of the image (only for dall-e-3)', examples=['vivid']
)
user: Optional[str] = Field(
None,
description='A unique identifier for end-user monitoring',
examples=['user-1234'],
)
class Datum1(BaseModel):
b64_json: Optional[str] = Field(None, description='Base64 encoded image data')
revised_prompt: Optional[str] = Field(None, description='Revised prompt')
url: Optional[str] = Field(None, description='URL of the image')
class OpenAIImageGenerationResponse(BaseModel):
data: Optional[List[Datum1]] = None
class User(BaseModel):
email: Optional[str] = Field(None, description='The email address for this user.')
id: Optional[str] = Field(None, description='The unique id for this user.')
isAdmin: Optional[bool] = Field(
None, description='Indicates if the user has admin privileges.'
)
isApproved: Optional[bool] = Field(
None, description='Indicates if the user is approved.'
)
name: Optional[str] = Field(None, description='The name for this user.')

View File

@ -226,7 +226,7 @@ class ApiClient:
def check_auth_token(self, auth_token): def check_auth_token(self, auth_token):
"""Verify that an auth token is present.""" """Verify that an auth token is present."""
if auth_token is None: if auth_token is None:
raise Exception("Please login first to use this node.") raise Exception("Unauthorized: Please login first to use this node.")
return auth_token return auth_token

View File

@ -0,0 +1,425 @@
import io
from inspect import cleandoc
from comfy.utils import common_upscale
from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict
from comfy_api_nodes.apis import (
OpenAIImageGenerationRequest,
OpenAIImageEditRequest,
OpenAIImageGenerationResponse
)
from comfy_api_nodes.apis.client import ApiEndpoint, HttpMethod, SynchronousOperation
import numpy as np
from PIL import Image
import requests
import torch
import math
import base64
def downscale_input(image):
samples = image.movedim(-1,1)
#downscaling input images to roughly the same size as the outputs
total = int(1536 * 1024)
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
if scale_by >= 1:
return image
width = round(samples.shape[3] * scale_by)
height = round(samples.shape[2] * scale_by)
s = common_upscale(samples, width, height, "lanczos", "disabled")
s = s.movedim(1,-1)
return s
def validate_and_cast_response (response):
# validate raw JSON response
data = response.data
if not data or len(data) == 0:
raise Exception("No images returned from API endpoint")
# Get base64 image data
image_url = data[0].url
b64_data = data[0].b64_json
if not image_url and not b64_data:
raise Exception("No image was generated in the response")
if b64_data:
img_data = base64.b64decode(b64_data)
img = Image.open(io.BytesIO(img_data))
elif image_url:
img_response = requests.get(image_url)
if img_response.status_code != 200:
raise Exception("Failed to download the image")
img = Image.open(io.BytesIO(img_response.content))
img = img.convert("RGB") # Ensure RGB format
# Convert to numpy array, normalize to float32 between 0 and 1
img_array = np.array(img).astype(np.float32) / 255.0
# Convert to torch tensor and add batch dimension
return torch.from_numpy(img_array)[None,]
class OpenAIDalle2(ComfyNodeABC):
"""
Generates images synchronously via OpenAI's DALL·E 2 endpoint.
Uses the proxy at /proxy/openai/images/generations. Returned URLs are shortlived,
so download or cache results if you need to keep them.
"""
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls) -> InputTypeDict:
return {
"required": {
"prompt": (IO.STRING, {
"multiline": True,
"default": "",
"tooltip": "Text prompt for DALL·E",
}),
},
"optional": {
"seed": (IO.INT, {
"default": 0,
"min": 0,
"max": 2**31-1,
"step": 1,
"display": "number",
"tooltip": "not implemented yet in backend",
}),
"size": (IO.COMBO, {
"options": ["256x256", "512x512", "1024x1024"],
"default": "1024x1024",
"tooltip": "Image size",
}),
"n": (IO.INT, {
"default": 1,
"min": 1,
"max": 8,
"step": 1,
"display": "number",
"tooltip": "How many images to generate",
}),
"image": (IO.IMAGE, {
"default": None,
"tooltip": "Optional reference image for image editing.",
}),
"mask": (IO.MASK, {
"default": None,
"tooltip": "Optional mask for inpainting (white areas will be replaced)",
}),
},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG"
}
}
RETURN_TYPES = (IO.IMAGE,)
FUNCTION = "api_call"
CATEGORY = "api node"
DESCRIPTION = cleandoc(__doc__ or "")
API_NODE = True
def api_call(self, prompt, seed=0, image=None, mask=None, n=1, size="1024x1024", auth_token=None):
model = "dall-e-2"
path = "/proxy/openai/images/generations"
request_class = OpenAIImageGenerationRequest
img_binary = None
if image is not None and mask is not None:
path = "/proxy/openai/images/edits"
request_class = OpenAIImageEditRequest
input_tensor = image.squeeze().cpu()
height, width, channels = input_tensor.shape
rgba_tensor = torch.ones(height, width, 4, device="cpu")
rgba_tensor[:, :, :channels] = input_tensor
if mask.shape[1:] != image.shape[1:-1]:
raise Exception("Mask and Image must be the same size")
rgba_tensor[:,:,3] = (1-mask.squeeze().cpu())
rgba_tensor = downscale_input(rgba_tensor.unsqueeze(0)).squeeze()
image_np = (rgba_tensor.numpy() * 255).astype(np.uint8)
img = Image.fromarray(image_np)
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
img_binary = img_byte_arr#.getvalue()
img_binary.name = "image.png"
elif image is not None or mask is not None:
raise Exception("Dall-E 2 image editing requires an image AND a mask")
# Build the operation
operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=path,
method=HttpMethod.POST,
request_model=request_class,
response_model=OpenAIImageGenerationResponse
),
request=request_class(
model=model,
prompt=prompt,
n=n,
size=size,
seed=seed,
),
files={
"image": img_binary,
} if img_binary else None,
auth_token=auth_token
)
response = operation.execute()
img_tensor = validate_and_cast_response(response)
return (img_tensor,)
class OpenAIDalle3(ComfyNodeABC):
"""
Generates images synchronously via OpenAI's DALL·E 3 endpoint.
Uses the proxy at /proxy/openai/images/generations. Returned URLs are shortlived,
so download or cache results if you need to keep them.
"""
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls) -> InputTypeDict:
return {
"required": {
"prompt": (IO.STRING, {
"multiline": True,
"default": "",
"tooltip": "Text prompt for DALL·E",
}),
},
"optional": {
"seed": (IO.INT, {
"default": 0,
"min": 0,
"max": 2**31-1,
"step": 1,
"display": "number",
"tooltip": "not implemented yet in backend",
}),
"quality" : (IO.COMBO, {
"options": ["standard","hd"],
"default": "standard",
"tooltip": "Image quality",
}),
"style": (IO.COMBO, {
"options": ["natural","vivid"],
"default": "natural",
"tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.",
}),
"size": (IO.COMBO, {
"options": ["1024x1024", "1024x1792", "1792x1024"],
"default": "1024x1024",
"tooltip": "Image size",
}),
},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG"
}
}
RETURN_TYPES = (IO.IMAGE,)
FUNCTION = "api_call"
CATEGORY = "api node"
DESCRIPTION = cleandoc(__doc__ or "")
API_NODE = True
def api_call(self, prompt, seed=0, style="natural", quality="standard", size="1024x1024", auth_token=None):
model = "dall-e-3"
# build the operation
operation = SynchronousOperation(
endpoint=ApiEndpoint(
path="/proxy/openai/images/generations",
method=HttpMethod.POST,
request_model=OpenAIImageGenerationRequest,
response_model=OpenAIImageGenerationResponse
),
request=OpenAIImageGenerationRequest(
model=model,
prompt=prompt,
quality=quality,
size=size,
style=style,
seed=seed,
),
auth_token=auth_token
)
response = operation.execute()
img_tensor = validate_and_cast_response(response)
return (img_tensor,)
class OpenAIGPTImage1(ComfyNodeABC):
"""
Generates images synchronously via OpenAI's GPT Image 1 endpoint.
Uses the proxy at /proxy/openai/images/generations. Returned URLs are shortlived,
so download or cache results if you need to keep them.
"""
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls) -> InputTypeDict:
return {
"required": {
"prompt": (IO.STRING, {
"multiline": True,
"default": "",
"tooltip": "Text prompt for GPT Image 1",
}),
},
"optional": {
"seed": (IO.INT, {
"default": 0,
"min": 0,
"max": 2**31-1,
"step": 1,
"display": "number",
"tooltip": "not implemented yet in backend",
}),
"quality": (IO.COMBO, {
"options": ["low","medium","high"],
"default": "low",
"tooltip": "Image quality, affects cost and generation time.",
}),
"background": (IO.COMBO, {
"options": ["opaque","transparent"],
"default": "opaque",
"tooltip": "Return image with or without background",
}),
"size": (IO.COMBO, {
"options": ["auto", "1024x1024", "1024x1536", "1536x1024"],
"default": "auto",
"tooltip": "Image size",
}),
"n": (IO.INT, {
"default": 1,
"min": 1,
"max": 8,
"step": 1,
"display": "number",
"tooltip": "How many images to generate",
}),
"image": (IO.IMAGE, {
"default": None,
"tooltip": "Optional reference image for image editing.",
}),
"mask": (IO.MASK, {
"default": None,
"tooltip": "Optional mask for inpainting (white areas will be replaced)",
}),
},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG"
}
}
RETURN_TYPES = (IO.IMAGE,)
FUNCTION = "api_call"
CATEGORY = "api node"
DESCRIPTION = cleandoc(__doc__ or "")
API_NODE = True
def api_call(self, prompt, seed=0, quality="low", background="opaque", image=None, mask=None, n=1, size="1024x1024", auth_token=None):
model = "gpt-image-1"
path = "/proxy/openai/images/generations"
request_class = OpenAIImageGenerationRequest
img_binary = None
mask_binary = None
if image is not None:
path = "/proxy/openai/images/edits"
request_class = OpenAIImageEditRequest
scaled_image = downscale_input(image).squeeze()
image_np = (scaled_image.numpy() * 255).astype(np.uint8)
img = Image.fromarray(image_np)
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
img_binary = img_byte_arr#.getvalue()
img_binary.name = "image.png"
if mask is not None:
if image is None:
raise Exception("Cannot use a mask without an input image")
if mask.shape[1:] != image.shape[1:-1]:
raise Exception("Mask and Image must be the same size")
batch, height, width = mask.shape
rgba_mask = torch.zeros(height, width, 4, device="cpu")
rgba_mask[:,:,3] = (1-mask.squeeze().cpu())
scaled_mask = downscale_input(rgba_mask.unsqueeze(0)).squeeze()
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
mask_img = Image.fromarray(mask_np)
mask_img_byte_arr = io.BytesIO()
mask_img.save(mask_img_byte_arr, format='PNG')
mask_img_byte_arr.seek(0)
mask_binary = mask_img_byte_arr#.getvalue()
mask_binary.name = "mask.png"
files = {}
if img_binary:
files["image"] = img_binary
if mask_binary:
files["mask"] = mask_binary
# Build the operation
operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=path,
method=HttpMethod.POST,
request_model=request_class,
response_model=OpenAIImageGenerationResponse
),
request=request_class(
model=model,
prompt=prompt,
quality=quality,
background=background,
n=n,
seed=seed,
size=size,
),
files=files if files else None,
auth_token=auth_token
)
response = operation.execute()
img_tensor = validate_and_cast_response(response)
return (img_tensor,)
# A dictionary that contains all nodes you want to export with their names
# NOTE: names should be globally unique
NODE_CLASS_MAPPINGS = {
"OpenAIDalle2": OpenAIDalle2,
"OpenAIDalle3": OpenAIDalle3,
"OpenAIGPTImage1": OpenAIGPTImage1,
}
# A dictionary that contains the friendly/humanly readable titles for the nodes
NODE_DISPLAY_NAME_MAPPINGS = {
"OpenAIDalle2": "OpenAI DALL·E 2",
"OpenAIDalle3": "OpenAI DALL·E 3",
"OpenAIGPTImage1": "OpenAI GPT Image 1",
}

View File

@ -2260,11 +2260,20 @@ def init_builtin_extra_nodes():
"nodes_fresca.py", "nodes_fresca.py",
] ]
api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes")
api_nodes_files = [
"nodes_api.py",
]
import_failed = [] import_failed = []
for node_file in extras_files: for node_file in extras_files:
if not load_custom_node(os.path.join(extras_dir, node_file), module_parent="comfy_extras"): if not load_custom_node(os.path.join(extras_dir, node_file), module_parent="comfy_extras"):
import_failed.append(node_file) import_failed.append(node_file)
for node_file in api_nodes_files:
if not load_custom_node(os.path.join(api_nodes_dir, node_file), module_parent="comfy_api_nodes"):
import_failed.append(node_file)
return import_failed return import_failed

View File

@ -1,4 +1,4 @@
comfyui-frontend-package==1.16.9 comfyui-frontend-package==1.17.9
comfyui-workflow-templates==0.1.3 comfyui-workflow-templates==0.1.3
torch torch
torchsde torchsde