diff --git a/api-txt2img/README.md b/api-txt2img/README.md index 676c44c..ccc65e7 100644 --- a/api-txt2img/README.md +++ b/api-txt2img/README.md @@ -1,4 +1,4 @@ -# Image Generation Example with AUTOMATIC1111 API +# Image Generation Example with AUTOMATIC1111 API (`sd3_medium` and `clip`) This example demonstrates how to use Python to generate images of a cat using the AUTOMATIC1111 Stable Diffusion API on a custom port. In this example, we'll generate **5 images** using a fixed prompt and save them locally as PNG files. @@ -10,6 +10,20 @@ The provided Python script does the following: - Decodes the returned base64 image data. - Saves each image in the local directory as a PNG file. +In addition to image generation, the example also shows how to use the server's CLIP Interrogator endpoint to generate captions from the created images. + +## Server Details + +For this example, the AUTOMATIC1111 server is running on the following network details: +- **IP Address:** `172.30.200.3` +- **Custom Port:** `35000` + +You can use these details to interact with the server for: +- **Image Generation:** Sending requests to the `/sdapi/v1/txt2img` endpoint. +- **CLIP Interrogation:** Sending requests to the `/sdapi/v1/clip-interrogate` endpoint. + +This means if you want to use the server for generating images or obtaining image captions via CLIP interrogation, you should point your API calls to `http://172.30.200.3:35000`. + ## Prerequisites Before running the script, ensure you have: diff --git a/api-txt2img/cat_0.png b/api-txt2img/cat_0.png new file mode 100644 index 0000000..b485f28 Binary files /dev/null and b/api-txt2img/cat_0.png differ diff --git a/api-txt2img/cat_1.png b/api-txt2img/cat_1.png new file mode 100644 index 0000000..d99fe65 Binary files /dev/null and b/api-txt2img/cat_1.png differ diff --git a/api-txt2img/cat_2.png b/api-txt2img/cat_2.png new file mode 100644 index 0000000..5ab7122 Binary files /dev/null and b/api-txt2img/cat_2.png differ diff --git a/api-txt2img/cat_3.png b/api-txt2img/cat_3.png new file mode 100644 index 0000000..b4d7f4c Binary files /dev/null and b/api-txt2img/cat_3.png differ diff --git a/api-txt2img/cat_4.png b/api-txt2img/cat_4.png new file mode 100644 index 0000000..ddb7044 Binary files /dev/null and b/api-txt2img/cat_4.png differ diff --git a/api-txt2img/generate_cat_images.py b/api-txt2img/generate_cat_images.py index f09aeb2..4a6d1b2 100644 --- a/api-txt2img/generate_cat_images.py +++ b/api-txt2img/generate_cat_images.py @@ -4,7 +4,8 @@ import io import base64 from PIL import Image -def generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5): +def generate_cat_images(ip, port, model_params, prompt="cat", num_images=5): + filenames = [] for i in range(num_images): # Generate a random seed for each image seed = random.randint(0, 4294967295) @@ -13,7 +14,6 @@ def generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5): payload = { "prompt": prompt, "negative_prompt": "", # Modify if needed - "sd_model_checkpoint": model_params["model"], "steps": model_params["steps"], "cfg_scale": model_params["cfg_scale"], "width": model_params["width"], @@ -33,31 +33,59 @@ def generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5): r = response.json() # Process returned images (the API returns a list in r['images']) - for img_data in r.get('images', []): + for img_data in r['images']: # Remove any header if present (e.g., "data:image/png;base64,") - img_base64 = img_data.split(",", 1)[-1] - image = Image.open(io.BytesIO(base64.b64decode(img_base64))) + image = Image.open(io.BytesIO(base64.b64decode(img_data.split(",",1)[0]))) # Save the image with a unique filename filename = f"cat_{i}.png" image.save(filename) print(f"Saved {filename}") + filenames.append(filename) except requests.exceptions.Timeout: print(f"Timeout occurred while generating image {i}") except Exception as e: print(f"An error occurred on iteration {i}: {e}") + return filenames + +def interrogate_images(ip, port, image_files): + import requests, base64 + for filename in image_files: + try: + # Open the image file and encode it in base64 + with open(filename, "rb") as f: + img_bytes = f.read() + img_base64 = base64.b64encode(img_bytes).decode("utf-8") + payload = {"image": f"data:image/png;base64,{img_base64}", "model": "clip"} + + # Send a POST request to the CLIP Interrogator API endpoint on the A1111 server + response = requests.post( + url=f"http://{ip}:{port}/sdapi/v1/interrogate", + json=payload, + timeout=30 + ) + response.raise_for_status() + data = response.json() + + caption = data.get("caption", "No caption returned") + print(f"Image {filename} caption: {caption}") + except Exception as e: + print(f"An error occurred during interrogation of {filename}: {e}") if __name__ == "__main__": ip = "172.30.200.3" port = 35000 # Custom port for the API - # Define your model parameters (update 'model' to your actual checkpoint name) + # Define your model parameters (update as needed) model_params = { - "model": "sd3.5_checkpoint.ckpt", # Replace with your actual model checkpoint file "steps": 20, - "cfg_scale": 7.0, - "width": 512, - "height": 512, + "cfg_scale": 7, + "width": 1024, # Model specific, don't change + "height": 1024, # Model specific, don't change "sampler": "Euler" # Adjust based on your setup } - generate_cat_images(ip, port, model_params, prompt="a cat", num_images=5) + # Generate cat images and get the list of filenames + image_files = generate_cat_images(ip, port, model_params, prompt="cat, cute, perfect, anime", num_images=5) + + # Call the CLIP Interrogator function from main using the same ip and port + interrogate_images(ip, port, image_files)