Spaces:

warhawkmonk
/

mutimodal

Running

File size: 17,625 Bytes

973f016

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import json\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<>:1: SyntaxWarning: invalid escape sequence '\\c'\n",
      "<>:3: SyntaxWarning: invalid escape sequence '\\d'\n",
      "<>:1: SyntaxWarning: invalid escape sequence '\\c'\n",
      "<>:3: SyntaxWarning: invalid escape sequence '\\d'\n",
      "C:\\Users\\rajst\\AppData\\Local\\Temp\\ipykernel_11856\\1444736939.py:1: SyntaxWarning: invalid escape sequence '\\c'\n",
      "  image_data=pd.read_csv(\"data_set_formation\\custom_prompts_df.csv\")\n",
      "C:\\Users\\rajst\\AppData\\Local\\Temp\\ipykernel_11856\\1444736939.py:3: SyntaxWarning: invalid escape sequence '\\d'\n",
      "  with open(\"data_set_formation\\data.json\") as read:\n"
     ]
    }
   ],
   "source": [
    "image_data=pd.read_csv(\"data_set_formation\\custom_prompts_df.csv\")\n",
    "\n",
    "with open(\"data_set_formation\\data.json\") as read:\n",
    "    text_data=json.load(read)\n",
    "# prompt_data="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prompt</th>\n",
       "      <th>image_file</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>painting of King Henry VIII carrying an umbrella</td>\n",
       "      <td>images/0/custom_0_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Fox Mulder and a chinchilla walking down a roa...</td>\n",
       "      <td>images/0/custom_1_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>photo of a gas burner by a soft pretzel</td>\n",
       "      <td>images/0/custom_2_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>photo of Shyster standing street lights on at ...</td>\n",
       "      <td>images/0/custom_3_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>cute young man eating a plant over a fence in ...</td>\n",
       "      <td>images/0/custom_5_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99995</th>\n",
       "      <td>photo of a natural kite at Westminster Abbey</td>\n",
       "      <td>images/102/custom_102419_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99996</th>\n",
       "      <td>smooth rum with a clock in the style of a digi...</td>\n",
       "      <td>images/102/custom_102420_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99997</th>\n",
       "      <td>a lovable elephant by the Gamla Stan, Stockholm</td>\n",
       "      <td>images/102/custom_102421_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99998</th>\n",
       "      <td>photo of Courtney Love with a hot dog</td>\n",
       "      <td>images/102/custom_102422_0.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99999</th>\n",
       "      <td>Maniac jumping on a skateboard near a fence</td>\n",
       "      <td>images/102/custom_102423_0.png</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  prompt  \\\n",
       "0       painting of King Henry VIII carrying an umbrella   \n",
       "1      Fox Mulder and a chinchilla walking down a roa...   \n",
       "2                photo of a gas burner by a soft pretzel   \n",
       "3      photo of Shyster standing street lights on at ...   \n",
       "4      cute young man eating a plant over a fence in ...   \n",
       "...                                                  ...   \n",
       "99995       photo of a natural kite at Westminster Abbey   \n",
       "99996  smooth rum with a clock in the style of a digi...   \n",
       "99997    a lovable elephant by the Gamla Stan, Stockholm   \n",
       "99998              photo of Courtney Love with a hot dog   \n",
       "99999        Maniac jumping on a skateboard near a fence   \n",
       "\n",
       "                           image_file  \n",
       "0             images/0/custom_0_0.png  \n",
       "1             images/0/custom_1_0.png  \n",
       "2             images/0/custom_2_0.png  \n",
       "3             images/0/custom_3_0.png  \n",
       "4             images/0/custom_5_0.png  \n",
       "...                               ...  \n",
       "99995  images/102/custom_102419_0.png  \n",
       "99996  images/102/custom_102420_0.png  \n",
       "99997  images/102/custom_102421_0.png  \n",
       "99998  images/102/custom_102422_0.png  \n",
       "99999  images/102/custom_102423_0.png  \n",
       "\n",
       "[100000 rows x 2 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "image_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_dict={\"prompt\":[],\"label\":[]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "queries = [\n",
    "    # General Descriptions\n",
    "    \"Generate a beautiful sunset over the ocean.\",\n",
    "    \"Create a futuristic cityscape at night.\",\n",
    "    \"Show a cozy cabin in the middle of a snowy forest.\",\n",
    "    \"Draw a tropical beach with palm trees and clear blue water.\",\n",
    "    \"Design a medieval castle on a hilltop.\",\n",
    "    \n",
    "    # Character-Focused Queries\n",
    "    \"Generate a young woman with long red hair in a fantasy setting.\",\n",
    "    \"Create a warrior in futuristic armor holding a glowing sword.\",\n",
    "    \"Draw a friendly robot helping people in a park.\",\n",
    "    \"Design a wise old wizard with a long beard and staff.\",\n",
    "    \"Illustrate a child playing with a puppy in a garden.\",\n",
    "    \n",
    "    # Animal and Nature Queries\n",
    "    \"Show a majestic tiger in a dense jungle.\",\n",
    "    \"Create a flock of birds flying over a mountain range.\",\n",
    "    \"Draw a koi fish pond with colorful fish.\",\n",
    "    \"Generate a close-up of a butterfly on a flower.\",\n",
    "    \"Illustrate a desert landscape with cacti and a setting sun.\",\n",
    "    \n",
    "    # Architectural and Object Queries\n",
    "    \"Design a futuristic spaceship hovering above Earth.\",\n",
    "    \"Create a vintage car driving on a country road.\",\n",
    "    \"Draw a small café on a busy European street.\",\n",
    "    \"Generate a treehouse in the middle of a forest.\",\n",
    "    \"Show a steampunk-style clock tower.\",\n",
    "    \n",
    "    # Abstract or Conceptual Queries\n",
    "    \"Create an image representing the concept of time.\",\n",
    "    \"Design a surreal landscape with floating islands.\",\n",
    "    \"Generate an artwork of colors blending like a rainbow.\",\n",
    "    \"Illustrate the feeling of calmness in visual form.\",\n",
    "    \"Show a dreamlike city made of crystal.\",\n",
    "    \n",
    "    # Cultural or Historical Themes\n",
    "    \"Illustrate an ancient Egyptian pyramid under the stars.\",\n",
    "    \"Show a samurai in traditional armor standing in a bamboo forest.\",\n",
    "    \"Draw a Viking ship sailing through a storm.\",\n",
    "    \"Create an Indian temple with intricate carvings.\",\n",
    "    \"Generate a Renaissance-style painting of a feast.\",\n",
    "    \n",
    "    # Event or Scene Queries\n",
    "    \"Show a birthday party with balloons and a cake.\",\n",
    "    \"Create an image of people camping under the stars.\",\n",
    "    \"Draw a bustling market in a small village.\",\n",
    "    \"Illustrate a concert with a crowd and colorful lights.\",\n",
    "    \"Generate an image of a wedding ceremony by the beach.\",\n",
    "    \n",
    "    # Seasonal and Holiday Themes\n",
    "    \"Show a Christmas scene with a decorated tree and snow.\",\n",
    "    \"Generate a spooky Halloween setting with pumpkins and ghosts.\",\n",
    "    \"Create a spring meadow full of flowers and butterflies.\",\n",
    "    \"Draw an autumn forest with falling leaves.\",\n",
    "    \"Illustrate a New Year celebration with fireworks.\"\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in queries:\n",
    "    data_dict['prompt'].append(i.lower())\n",
    "    data_dict['label'].append(\"image\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter=0\n",
    "detail_list=[\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]\n",
    "for i in image_data['prompt']:\n",
    "    if any([paint_key in i for paint_key in [\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]]):\n",
    "        data_dict['prompt'].append(i.lower().replace(random.choice(detail_list),\"image\"))\n",
    "        data_dict['label'].append(\"image\")\n",
    "        counter+=1\n",
    "    if counter==20000:\n",
    "        break\n",
    "counter=0\n",
    "for j in text_data[:20000]:\n",
    "    data_dict['prompt'].append(j['note'].lower())\n",
    "    data_dict['label'].append(\"text\")\n",
    "    counter+=1\n",
    "    if counter==15000:\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter=0\n",
    "for z in text_data[15000:]:\n",
    "    if any([paint_key in z['note'] for paint_key in [\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]]):\n",
    "        data_dict['prompt'].append(z['note'].lower())\n",
    "        data_dict['label'].append(\"text\")\n",
    "        counter+=1\n",
    "    if counter==5000:\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prompt</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>generate a beautiful sunset over the ocean.</td>\n",
       "      <td>image</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>create a futuristic cityscape at night.</td>\n",
       "      <td>image</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>show a cozy cabin in the middle of a snowy for...</td>\n",
       "      <td>image</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>draw a tropical beach with palm trees and clea...</td>\n",
       "      <td>image</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>design a medieval castle on a hilltop.</td>\n",
       "      <td>image</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40035</th>\n",
       "      <td>i was watching a documentary and it spoke of s...</td>\n",
       "      <td>text</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40036</th>\n",
       "      <td>should i buy a dslr or a new phone for photogr...</td>\n",
       "      <td>text</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40037</th>\n",
       "      <td>okay, i see. so it depends on how serious i am...</td>\n",
       "      <td>text</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40038</th>\n",
       "      <td>it is just to take photos of my family</td>\n",
       "      <td>text</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40039</th>\n",
       "      <td>is there any topical treatment i can apply to ...</td>\n",
       "      <td>text</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40040 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  prompt  label\n",
       "0            generate a beautiful sunset over the ocean.  image\n",
       "1                create a futuristic cityscape at night.  image\n",
       "2      show a cozy cabin in the middle of a snowy for...  image\n",
       "3      draw a tropical beach with palm trees and clea...  image\n",
       "4                 design a medieval castle on a hilltop.  image\n",
       "...                                                  ...    ...\n",
       "40035  i was watching a documentary and it spoke of s...   text\n",
       "40036  should i buy a dslr or a new phone for photogr...   text\n",
       "40037  okay, i see. so it depends on how serious i am...   text\n",
       "40038             it is just to take photos of my family   text\n",
       "40039  is there any topical treatment i can apply to ...   text\n",
       "\n",
       "[40040 rows x 2 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(data_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "folder_path = 'formatted_data/'\n",
    "\n",
    "# Get the list of all files in the folder\n",
    "file_names = os.listdir(folder_path)\n",
    "max_file_name=max([int(i.split(\"_\")[-1][:-4]) for i in file_names])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Confussing prompts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from langchain_community.llms import Ollama\n",
    "# llm = Ollama(model=\"llava:34b \",num_ctx=10000)\n",
    "# enhancement=\"I need to train a model to distinguish between text and images. Please create a list of challenging prompts where the model needs to decide whether to generate text or identify an image.\"\n",
    "# prompt = enhancement\n",
    "# # result = llm.invoke(prompt)\n",
    "# value=llm.invoke(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(str(value))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(data_dict).to_csv(\"formatted_data/data_\"+str(max_file_name+1)+\".csv\",index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}