{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json\n", "import random" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "<>:1: SyntaxWarning: invalid escape sequence '\\c'\n", "<>:3: SyntaxWarning: invalid escape sequence '\\d'\n", "<>:1: SyntaxWarning: invalid escape sequence '\\c'\n", "<>:3: SyntaxWarning: invalid escape sequence '\\d'\n", "C:\\Users\\rajst\\AppData\\Local\\Temp\\ipykernel_11856\\1444736939.py:1: SyntaxWarning: invalid escape sequence '\\c'\n", " image_data=pd.read_csv(\"data_set_formation\\custom_prompts_df.csv\")\n", "C:\\Users\\rajst\\AppData\\Local\\Temp\\ipykernel_11856\\1444736939.py:3: SyntaxWarning: invalid escape sequence '\\d'\n", " with open(\"data_set_formation\\data.json\") as read:\n" ] } ], "source": [ "image_data=pd.read_csv(\"data_set_formation\\custom_prompts_df.csv\")\n", "\n", "with open(\"data_set_formation\\data.json\") as read:\n", " text_data=json.load(read)\n", "# prompt_data=" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
promptimage_file
0painting of King Henry VIII carrying an umbrellaimages/0/custom_0_0.png
1Fox Mulder and a chinchilla walking down a roa...images/0/custom_1_0.png
2photo of a gas burner by a soft pretzelimages/0/custom_2_0.png
3photo of Shyster standing street lights on at ...images/0/custom_3_0.png
4cute young man eating a plant over a fence in ...images/0/custom_5_0.png
.........
99995photo of a natural kite at Westminster Abbeyimages/102/custom_102419_0.png
99996smooth rum with a clock in the style of a digi...images/102/custom_102420_0.png
99997a lovable elephant by the Gamla Stan, Stockholmimages/102/custom_102421_0.png
99998photo of Courtney Love with a hot dogimages/102/custom_102422_0.png
99999Maniac jumping on a skateboard near a fenceimages/102/custom_102423_0.png
\n", "

100000 rows × 2 columns

\n", "
" ], "text/plain": [ " prompt \\\n", "0 painting of King Henry VIII carrying an umbrella \n", "1 Fox Mulder and a chinchilla walking down a roa... \n", "2 photo of a gas burner by a soft pretzel \n", "3 photo of Shyster standing street lights on at ... \n", "4 cute young man eating a plant over a fence in ... \n", "... ... \n", "99995 photo of a natural kite at Westminster Abbey \n", "99996 smooth rum with a clock in the style of a digi... \n", "99997 a lovable elephant by the Gamla Stan, Stockholm \n", "99998 photo of Courtney Love with a hot dog \n", "99999 Maniac jumping on a skateboard near a fence \n", "\n", " image_file \n", "0 images/0/custom_0_0.png \n", "1 images/0/custom_1_0.png \n", "2 images/0/custom_2_0.png \n", "3 images/0/custom_3_0.png \n", "4 images/0/custom_5_0.png \n", "... ... \n", "99995 images/102/custom_102419_0.png \n", "99996 images/102/custom_102420_0.png \n", "99997 images/102/custom_102421_0.png \n", "99998 images/102/custom_102422_0.png \n", "99999 images/102/custom_102423_0.png \n", "\n", "[100000 rows x 2 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "image_data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "data_dict={\"prompt\":[],\"label\":[]}" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "queries = [\n", " # General Descriptions\n", " \"Generate a beautiful sunset over the ocean.\",\n", " \"Create a futuristic cityscape at night.\",\n", " \"Show a cozy cabin in the middle of a snowy forest.\",\n", " \"Draw a tropical beach with palm trees and clear blue water.\",\n", " \"Design a medieval castle on a hilltop.\",\n", " \n", " # Character-Focused Queries\n", " \"Generate a young woman with long red hair in a fantasy setting.\",\n", " \"Create a warrior in futuristic armor holding a glowing sword.\",\n", " \"Draw a friendly robot helping people in a park.\",\n", " \"Design a wise old wizard with a long beard and staff.\",\n", " \"Illustrate a child playing with a puppy in a garden.\",\n", " \n", " # Animal and Nature Queries\n", " \"Show a majestic tiger in a dense jungle.\",\n", " \"Create a flock of birds flying over a mountain range.\",\n", " \"Draw a koi fish pond with colorful fish.\",\n", " \"Generate a close-up of a butterfly on a flower.\",\n", " \"Illustrate a desert landscape with cacti and a setting sun.\",\n", " \n", " # Architectural and Object Queries\n", " \"Design a futuristic spaceship hovering above Earth.\",\n", " \"Create a vintage car driving on a country road.\",\n", " \"Draw a small café on a busy European street.\",\n", " \"Generate a treehouse in the middle of a forest.\",\n", " \"Show a steampunk-style clock tower.\",\n", " \n", " # Abstract or Conceptual Queries\n", " \"Create an image representing the concept of time.\",\n", " \"Design a surreal landscape with floating islands.\",\n", " \"Generate an artwork of colors blending like a rainbow.\",\n", " \"Illustrate the feeling of calmness in visual form.\",\n", " \"Show a dreamlike city made of crystal.\",\n", " \n", " # Cultural or Historical Themes\n", " \"Illustrate an ancient Egyptian pyramid under the stars.\",\n", " \"Show a samurai in traditional armor standing in a bamboo forest.\",\n", " \"Draw a Viking ship sailing through a storm.\",\n", " \"Create an Indian temple with intricate carvings.\",\n", " \"Generate a Renaissance-style painting of a feast.\",\n", " \n", " # Event or Scene Queries\n", " \"Show a birthday party with balloons and a cake.\",\n", " \"Create an image of people camping under the stars.\",\n", " \"Draw a bustling market in a small village.\",\n", " \"Illustrate a concert with a crowd and colorful lights.\",\n", " \"Generate an image of a wedding ceremony by the beach.\",\n", " \n", " # Seasonal and Holiday Themes\n", " \"Show a Christmas scene with a decorated tree and snow.\",\n", " \"Generate a spooky Halloween setting with pumpkins and ghosts.\",\n", " \"Create a spring meadow full of flowers and butterflies.\",\n", " \"Draw an autumn forest with falling leaves.\",\n", " \"Illustrate a New Year celebration with fireworks.\"\n", "]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "for i in queries:\n", " data_dict['prompt'].append(i.lower())\n", " data_dict['label'].append(\"image\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "counter=0\n", "detail_list=[\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]\n", "for i in image_data['prompt']:\n", " if any([paint_key in i for paint_key in [\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]]):\n", " data_dict['prompt'].append(i.lower().replace(random.choice(detail_list),\"image\"))\n", " data_dict['label'].append(\"image\")\n", " counter+=1\n", " if counter==20000:\n", " break\n", "counter=0\n", "for j in text_data[:20000]:\n", " data_dict['prompt'].append(j['note'].lower())\n", " data_dict['label'].append(\"text\")\n", " counter+=1\n", " if counter==15000:\n", " break" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "counter=0\n", "for z in text_data[15000:]:\n", " if any([paint_key in z['note'] for paint_key in [\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]]):\n", " data_dict['prompt'].append(z['note'].lower())\n", " data_dict['label'].append(\"text\")\n", " counter+=1\n", " if counter==5000:\n", " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
promptlabel
0generate a beautiful sunset over the ocean.image
1create a futuristic cityscape at night.image
2show a cozy cabin in the middle of a snowy for...image
3draw a tropical beach with palm trees and clea...image
4design a medieval castle on a hilltop.image
.........
40035i was watching a documentary and it spoke of s...text
40036should i buy a dslr or a new phone for photogr...text
40037okay, i see. so it depends on how serious i am...text
40038it is just to take photos of my familytext
40039is there any topical treatment i can apply to ...text
\n", "

40040 rows × 2 columns

\n", "
" ], "text/plain": [ " prompt label\n", "0 generate a beautiful sunset over the ocean. image\n", "1 create a futuristic cityscape at night. image\n", "2 show a cozy cabin in the middle of a snowy for... image\n", "3 draw a tropical beach with palm trees and clea... image\n", "4 design a medieval castle on a hilltop. image\n", "... ... ...\n", "40035 i was watching a documentary and it spoke of s... text\n", "40036 should i buy a dslr or a new phone for photogr... text\n", "40037 okay, i see. so it depends on how serious i am... text\n", "40038 it is just to take photos of my family text\n", "40039 is there any topical treatment i can apply to ... text\n", "\n", "[40040 rows x 2 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(data_dict)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import os\n", "folder_path = 'formatted_data/'\n", "\n", "# Get the list of all files in the folder\n", "file_names = os.listdir(folder_path)\n", "max_file_name=max([int(i.split(\"_\")[-1][:-4]) for i in file_names])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Confussing prompts" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# from langchain_community.llms import Ollama\n", "# llm = Ollama(model=\"llava:34b \",num_ctx=10000)\n", "# enhancement=\"I need to train a model to distinguish between text and images. Please create a list of challenging prompts where the model needs to decide whether to generate text or identify an image.\"\n", "# prompt = enhancement\n", "# # result = llm.invoke(prompt)\n", "# value=llm.invoke(prompt)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# print(str(value))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "pd.DataFrame(data_dict).to_csv(\"formatted_data/data_\"+str(max_file_name+1)+\".csv\",index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }