a1111 example

5 months ago · 93da5c5544
7 changed files with 54 additions and 12 deletions
--- a/api-txt2img/README.md
+++ b/api-txt2img/README.md
@ -1,4 +1,4 @@
-# Image Generation Example with AUTOMATIC1111 API
+# Image Generation Example with AUTOMATIC1111 API (`sd3_medium` and `clip`)
 This example demonstrates how to use Python to generate images of a cat using the AUTOMATIC1111 Stable Diffusion API on a custom port. In this example, we'll generate **5 images** using a fixed prompt and save them locally as PNG files.
@ -10,6 +10,20 @@ The provided Python script does the following:
 - Decodes the returned base64 image data.
 - Saves each image in the local directory as a PNG file.
 In addition to image generation, the example also shows how to use the server's CLIP Interrogator endpoint to generate captions from the created images.
 ## Server Details
 For this example, the AUTOMATIC1111 server is running on the following network details:
 - **IP Address:** `172.30.200.3`
 - **Custom Port:** `35000`
 You can use these details to interact with the server for:
 - **Image Generation:** Sending requests to the `/sdapi/v1/txt2img` endpoint.
 - **CLIP Interrogation:** Sending requests to the `/sdapi/v1/clip-interrogate` endpoint.
 This means if you want to use the server for generating images or obtaining image captions via CLIP interrogation, you should point your API calls to `http://172.30.200.3:35000`.
 ## Prerequisites
 Before running the script, ensure you have:
--- a/api-txt2img/cat_0.png
+++ b/api-txt2img/cat_0.png
--- a/api-txt2img/cat_1.png
+++ b/api-txt2img/cat_1.png
--- a/api-txt2img/cat_2.png
+++ b/api-txt2img/cat_2.png
--- a/api-txt2img/cat_3.png
+++ b/api-txt2img/cat_3.png
--- a/api-txt2img/cat_4.png
+++ b/api-txt2img/cat_4.png
--- a/api-txt2img/generate_cat_images.py
+++ b/api-txt2img/generate_cat_images.py
@ -4,7 +4,8 @@ import io
 import base64
 from PIL import Image
-def generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5):
+def generate_cat_images(ip, port, model_params, prompt="cat", num_images=5):
    filenames = []
    for i in range(num_images):
        # Generate a random seed for each image
        seed = random.randint(0, 4294967295)
@ -13,7 +14,6 @@ def generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5):
        payload = {
            "prompt": prompt,
            "negative_prompt": "",  # Modify if needed
            "sd_model_checkpoint": model_params["model"],
            "steps": model_params["steps"],
            "cfg_scale": model_params["cfg_scale"],
            "width": model_params["width"],
@ -33,31 +33,59 @@ def generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5):
            r = response.json()
            # Process returned images (the API returns a list in r['images'])
-            for img_data in r.get('images', []):
+            for img_data in r['images']:
                # Remove any header if present (e.g., "data:image/png;base64,")
-                img_base64 = img_data.split(",", 1)[-1]
+                image = Image.open(io.BytesIO(base64.b64decode(img_data.split(",",1)[0])))
                image = Image.open(io.BytesIO(base64.b64decode(img_base64)))
                # Save the image with a unique filename
                filename = f"cat_{i}.png"
                image.save(filename)
                print(f"Saved {filename}")
                filenames.append(filename)
        except requests.exceptions.Timeout:
            print(f"Timeout occurred while generating image {i}")
        except Exception as e:
            print(f"An error occurred on iteration {i}: {e}")
    return filenames
 def interrogate_images(ip, port, image_files):
    import requests, base64
    for filename in image_files:
        try:
            # Open the image file and encode it in base64
            with open(filename, "rb") as f:
                img_bytes = f.read()
            img_base64 = base64.b64encode(img_bytes).decode("utf-8")
            payload = {"image": f"data:image/png;base64,{img_base64}", "model": "clip"}
            # Send a POST request to the CLIP Interrogator API endpoint on the A1111 server
            response = requests.post(
                url=f"http://{ip}:{port}/sdapi/v1/interrogate",
                json=payload,
                timeout=30
            )
            response.raise_for_status()
            data = response.json()
            caption = data.get("caption", "No caption returned")
            print(f"Image {filename} caption: {caption}")
        except Exception as e:
            print(f"An error occurred during interrogation of {filename}: {e}")
 if __name__ == "__main__":
    ip = "172.30.200.3"
    port = 35000  # Custom port for the API
-    # Define your model parameters (update 'model' to your actual checkpoint name)
+    # Define your model parameters (update as needed)
    model_params = {
        "model": "sd3.5_checkpoint.ckpt",  # Replace with your actual model checkpoint file
        "steps": 20,
-        "cfg_scale": 7.0,
+        "cfg_scale": 7,
-        "width": 512,
+        "width": 1024, # Model specific, don't change
-        "height": 512,
+        "height": 1024, # Model specific, don't change
        "sampler": "Euler"  # Adjust based on your setup
    }
-    generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5)
+    # Generate cat images and get the list of filenames
    image_files = generate_cat_images(ip, port, model_params, prompt="cat, cute, perfect, anime", num_images=5)
    # Call the CLIP Interrogator function from main using the same ip and port
    interrogate_images(ip, port, image_files)