wznmickey commited on
Commit
cf7c183
1 Parent(s): 781a167
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ magicPromt/Index_stemmed/index.json filter=lfs diff=lfs merge=lfs -text
37
+ Index_stemmed/index.json filter=lfs diff=lfs merge=lfs -text
38
+ Index_stemmed/document_metadata.json filter=lfs diff=lfs merge=lfs -text
39
+ data/data.csv.zip filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["python", "app.py"]
Index_stemmed/document_metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f0a52432ead8b4b1aeba7df2c48632f7f4578e82d7433a8ac6d8adda1c5239
3
+ size 9044788
Index_stemmed/index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f89061059c002998d57aaf565439cbb9e424bb074db41a4f035930e7ee53492
3
+ size 36154176
Index_stemmed/statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: purple
5
  colorTo: purple
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
5
  colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ app_port: 7860
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, redirect, render_template, request, url_for
2
+ from document_preprocessor import *
3
+ from indexing import *
4
+ from ranker import *
5
+ import pandas as pd
6
+ from image2text import *
7
+ app = Flask(__name__)
8
+
9
+
10
+ def initialize_all():
11
+ MAIN_INDEX_STEMMED = "Index_stemmed"
12
+ STOPWORD_PATH = "data/stopwords.txt"
13
+
14
+ # stopwords
15
+ stopwords = set()
16
+ with open(STOPWORD_PATH, "r", encoding="utf-8") as file:
17
+ for stopword in file:
18
+ stopwords.add(stopword.strip())
19
+
20
+ # index
21
+ main_index = BasicInvertedIndex()
22
+ main_index.load(MAIN_INDEX_STEMMED)
23
+
24
+ # processor
25
+ preprocessor = RegexTokenizer(token_regex=r"[\w\.-]+", stemming=True)
26
+
27
+ bm25 = BM25(main_index)
28
+ pipeline = Ranker(main_index, preprocessor, stopwords, bm25)
29
+ return pipeline
30
+
31
+
32
+ def get_results_all(ranker, query, top_n, args=None):
33
+ DATASET_CSV_PATH = "data/data.csv.zip"
34
+ results = ranker.query(query)
35
+ docids = [result[0] for result in results]
36
+ df = pd.read_csv(DATASET_CSV_PATH)
37
+ if args is None:
38
+ df_results = df.iloc[docids]
39
+ else:
40
+ df_results = df.iloc[docids]
41
+ for arg in args:
42
+ if arg:
43
+ arg = arg.split(",")
44
+ prompt_filter = ""
45
+ for tag in arg:
46
+ prompt_filter += (
47
+ r'df_results["prompt"].str.contains(fr"\b'
48
+ + tag
49
+ + r'\b", regex=True, case=False) | '
50
+ )
51
+ df_results = df_results[eval(prompt_filter[:-3])]
52
+ prompts = df_results["prompt"].tolist()[:top_n]
53
+ urls = df_results["pic_url"].tolist()[:top_n]
54
+ return prompts, urls
55
+
56
+
57
+ engine = initialize_all()
58
+
59
+
60
+ @app.route("/")
61
+ def home():
62
+ query = "A mountain in spring"
63
+ print(query)
64
+ prompts, urls = get_results_all(engine, query, 200)
65
+ result = list(zip(prompts, urls))
66
+ return render_template("index.html", result=result)
67
+
68
+
69
+ @app.route("/search", methods=["POST", "GET"])
70
+ def search():
71
+ if request.method == "POST":
72
+ query = request.form.get("query")
73
+ if not query:
74
+ query = "A mountain in spring with white cloud"
75
+ style = request.form.get("style")
76
+ scene = request.form.get("scene")
77
+ medium = request.form.get("medium")
78
+ light = request.form.get("light")
79
+ quality = request.form.get("quality")
80
+ print(query)
81
+ print(style)
82
+ print(scene)
83
+ print(medium)
84
+ print(light)
85
+ print(quality)
86
+ args = [style, scene, medium, light, quality]
87
+ prompts, urls = get_results_all(engine, query, 200, args)
88
+ result = list(zip(prompts, urls))
89
+ return render_template(
90
+ "search.html",
91
+ result=result,
92
+ query=query,
93
+ style=style,
94
+ scene=scene,
95
+ medium=medium,
96
+ light=light,
97
+ quality=quality,
98
+ )
99
+ return redirect(url_for("home"))
100
+
101
+ @app.route("/search_picture", methods=["POST", "GET"])
102
+ def search_picture():
103
+ if request.method == "POST":
104
+ query=request.files['img']
105
+ query = image2textData(query)
106
+ if not query:
107
+ query = "A mountain in spring with white cloud"
108
+ style = request.form.get("style")
109
+ scene = request.form.get("scene")
110
+ medium = request.form.get("medium")
111
+ light = request.form.get("light")
112
+ quality = request.form.get("quality")
113
+ print(query)
114
+ print(style)
115
+ print(scene)
116
+ print(medium)
117
+ print(light)
118
+ print(quality)
119
+ args = [style, scene, medium, light, quality]
120
+ prompts, urls = get_results_all(engine, query, 200, args)
121
+ result = list(zip(prompts, urls))
122
+ return render_template(
123
+ "search.html",
124
+ result=result,
125
+ query=query,
126
+ style=style,
127
+ scene=scene,
128
+ medium=medium,
129
+ light=light,
130
+ quality=quality,
131
+ )
132
+ return redirect(url_for("home"))
133
+
134
+
135
+ if __name__ == "__main__":
136
+ # engine = initialize_all()
137
+ app.run(debug=True)
data/data.csv.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6059cbd15bd103448593d5e9aa2dc2186c8999eb81ea86e69d45ea1fb2d2d0
3
+ size 15907777
data/stopwords.txt ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ able
3
+ about
4
+ above
5
+ according
6
+ accordingly
7
+ across
8
+ actually
9
+ after
10
+ afterwards
11
+ again
12
+ against
13
+ ain't
14
+ all
15
+ allow
16
+ allows
17
+ almost
18
+ alone
19
+ along
20
+ already
21
+ also
22
+ although
23
+ always
24
+ am
25
+ among
26
+ amongst
27
+ an
28
+ and
29
+ another
30
+ any
31
+ anybody
32
+ anyhow
33
+ anyone
34
+ anything
35
+ anyway
36
+ anyways
37
+ anywhere
38
+ apart
39
+ appear
40
+ appreciate
41
+ appropriate
42
+ are
43
+ aren't
44
+ around
45
+ as
46
+ aside
47
+ ask
48
+ asking
49
+ associated
50
+ at
51
+ available
52
+ away
53
+ awfully
54
+ be
55
+ became
56
+ because
57
+ become
58
+ becomes
59
+ becoming
60
+ been
61
+ before
62
+ beforehand
63
+ behind
64
+ being
65
+ believe
66
+ below
67
+ beside
68
+ besides
69
+ best
70
+ better
71
+ between
72
+ beyond
73
+ both
74
+ brief
75
+ but
76
+ by
77
+ c'mon
78
+ c's
79
+ came
80
+ can
81
+ can't
82
+ cannot
83
+ cant
84
+ cause
85
+ causes
86
+ certain
87
+ certainly
88
+ changes
89
+ clearly
90
+ co
91
+ com
92
+ come
93
+ comes
94
+ concerning
95
+ consequently
96
+ consider
97
+ considering
98
+ contain
99
+ containing
100
+ contains
101
+ corresponding
102
+ could
103
+ couldn't
104
+ course
105
+ currently
106
+ definitely
107
+ described
108
+ despite
109
+ did
110
+ didn't
111
+ different
112
+ do
113
+ does
114
+ doesn't
115
+ doing
116
+ don't
117
+ done
118
+ down
119
+ downwards
120
+ during
121
+ each
122
+ edu
123
+ eg
124
+ eight
125
+ either
126
+ else
127
+ elsewhere
128
+ enough
129
+ entirely
130
+ especially
131
+ et
132
+ etc
133
+ even
134
+ ever
135
+ every
136
+ everybody
137
+ everyone
138
+ everything
139
+ everywhere
140
+ ex
141
+ exactly
142
+ example
143
+ except
144
+ far
145
+ few
146
+ fifth
147
+ first
148
+ five
149
+ followed
150
+ following
151
+ follows
152
+ for
153
+ former
154
+ formerly
155
+ forth
156
+ four
157
+ from
158
+ further
159
+ furthermore
160
+ get
161
+ gets
162
+ getting
163
+ given
164
+ gives
165
+ go
166
+ goes
167
+ going
168
+ gone
169
+ got
170
+ gotten
171
+ greetings
172
+ had
173
+ hadn't
174
+ happens
175
+ hardly
176
+ has
177
+ hasn't
178
+ have
179
+ haven't
180
+ having
181
+ he
182
+ he's
183
+ hello
184
+ help
185
+ hence
186
+ her
187
+ here
188
+ here's
189
+ hereafter
190
+ hereby
191
+ herein
192
+ hereupon
193
+ hers
194
+ herself
195
+ hi
196
+ him
197
+ himself
198
+ his
199
+ hither
200
+ hopefully
201
+ how
202
+ howbeit
203
+ however
204
+ i'd
205
+ i'll
206
+ i'm
207
+ i've
208
+ ie
209
+ if
210
+ ignored
211
+ immediate
212
+ in
213
+ inasmuch
214
+ inc
215
+ indeed
216
+ indicate
217
+ indicated
218
+ indicates
219
+ inner
220
+ insofar
221
+ instead
222
+ into
223
+ inward
224
+ is
225
+ isn't
226
+ it
227
+ it'd
228
+ it'll
229
+ it's
230
+ its
231
+ itself
232
+ just
233
+ keep
234
+ keeps
235
+ kept
236
+ know
237
+ knows
238
+ known
239
+ last
240
+ lately
241
+ later
242
+ latter
243
+ latterly
244
+ least
245
+ less
246
+ lest
247
+ let
248
+ let's
249
+ like
250
+ liked
251
+ likely
252
+ little
253
+ look
254
+ looking
255
+ looks
256
+ ltd
257
+ mainly
258
+ many
259
+ may
260
+ maybe
261
+ me
262
+ mean
263
+ meanwhile
264
+ merely
265
+ might
266
+ more
267
+ moreover
268
+ most
269
+ mostly
270
+ much
271
+ must
272
+ my
273
+ myself
274
+ name
275
+ namely
276
+ nd
277
+ near
278
+ nearly
279
+ necessary
280
+ need
281
+ needs
282
+ neither
283
+ never
284
+ nevertheless
285
+ new
286
+ next
287
+ nine
288
+ no
289
+ nobody
290
+ non
291
+ none
292
+ noone
293
+ nor
294
+ normally
295
+ not
296
+ nothing
297
+ novel
298
+ now
299
+ nowhere
300
+ obviously
301
+ of
302
+ off
303
+ often
304
+ oh
305
+ ok
306
+ okay
307
+ old
308
+ on
309
+ once
310
+ one
311
+ ones
312
+ only
313
+ onto
314
+ or
315
+ other
316
+ others
317
+ otherwise
318
+ ought
319
+ our
320
+ ours
321
+ ourselves
322
+ out
323
+ outside
324
+ over
325
+ overall
326
+ own
327
+ particular
328
+ particularly
329
+ per
330
+ perhaps
331
+ placed
332
+ please
333
+ plus
334
+ possible
335
+ presumably
336
+ probably
337
+ provides
338
+ que
339
+ quite
340
+ qv
341
+ rather
342
+ rd
343
+ re
344
+ really
345
+ reasonably
346
+ regarding
347
+ regardless
348
+ regards
349
+ relatively
350
+ respectively
351
+ right
352
+ said
353
+ same
354
+ saw
355
+ say
356
+ saying
357
+ says
358
+ second
359
+ secondly
360
+ see
361
+ seeing
362
+ seem
363
+ seemed
364
+ seeming
365
+ seems
366
+ seen
367
+ self
368
+ selves
369
+ sensible
370
+ sent
371
+ serious
372
+ seriously
373
+ seven
374
+ several
375
+ shall
376
+ she
377
+ should
378
+ shouldn't
379
+ since
380
+ six
381
+ so
382
+ some
383
+ somebody
384
+ somehow
385
+ someone
386
+ something
387
+ sometime
388
+ sometimes
389
+ somewhat
390
+ somewhere
391
+ soon
392
+ sorry
393
+ specified
394
+ specify
395
+ specifying
396
+ still
397
+ sub
398
+ such
399
+ sup
400
+ sure
401
+ t's
402
+ take
403
+ taken
404
+ tell
405
+ tends
406
+ th
407
+ than
408
+ thank
409
+ thanks
410
+ thanx
411
+ that
412
+ that's
413
+ thats
414
+ the
415
+ their
416
+ theirs
417
+ them
418
+ themselves
419
+ then
420
+ thence
421
+ there
422
+ there's
423
+ thereafter
424
+ thereby
425
+ therefore
426
+ therein
427
+ theres
428
+ thereupon
429
+ these
430
+ they
431
+ they'd
432
+ they'll
433
+ they're
434
+ they've
435
+ think
436
+ third
437
+ this
438
+ thorough
439
+ thoroughly
440
+ those
441
+ though
442
+ three
443
+ through
444
+ throughout
445
+ thru
446
+ thus
447
+ to
448
+ together
449
+ too
450
+ took
451
+ toward
452
+ towards
453
+ tried
454
+ tries
455
+ truly
456
+ try
457
+ trying
458
+ twice
459
+ two
460
+ un
461
+ under
462
+ unfortunately
463
+ unless
464
+ unlikely
465
+ until
466
+ unto
467
+ up
468
+ upon
469
+ us
470
+ use
471
+ used
472
+ useful
473
+ uses
474
+ using
475
+ usually
476
+ value
477
+ various
478
+ very
479
+ via
480
+ viz
481
+ vs
482
+ want
483
+ wants
484
+ was
485
+ wasn't
486
+ way
487
+ we
488
+ we'd
489
+ we'll
490
+ we're
491
+ we've
492
+ welcome
493
+ well
494
+ went
495
+ were
496
+ weren't
497
+ what
498
+ what's
499
+ whatever
500
+ when
501
+ whence
502
+ whenever
503
+ where
504
+ where's
505
+ whereafter
506
+ whereas
507
+ whereby
508
+ wherein
509
+ whereupon
510
+ wherever
511
+ whether
512
+ which
513
+ while
514
+ whither
515
+ who
516
+ who's
517
+ whoever
518
+ whole
519
+ whom
520
+ whose
521
+ why
522
+ will
523
+ willing
524
+ wish
525
+ with
526
+ within
527
+ without
528
+ won't
529
+ wonder
530
+ would
531
+ would
532
+ wouldn't
533
+ yes
534
+ yet
535
+ you
536
+ you'd
537
+ you'll
538
+ you're
539
+ you've
540
+ your
541
+ yours
542
+ yourself
543
+ yourselves
544
+ zero
document_preprocessor.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.tokenize import RegexpTokenizer
2
+ from nltk.stem import PorterStemmer
3
+
4
+ class Tokenizer:
5
+ def __init__(
6
+ self,
7
+ lowercase: bool = True,
8
+ multiword_expressions: list[str] = None,
9
+ stemming: bool = False,
10
+ ) -> None:
11
+ """
12
+ A generic class for objects that turn strings into sequences of tokens.
13
+ A tokenizer can support different preprocessing options or use different methods
14
+ for determining word breaks.
15
+
16
+ Args:
17
+ lowercase: Whether to lowercase all the tokens
18
+ multiword_expressions: A list of strings that should be recognized as single tokens
19
+ If set to 'None' no multi-word expression matching is performed.
20
+ No need to perform/implement multi-word expression recognition for HW3.
21
+ """
22
+ # TODO: Save arguments that are needed as fields of this class
23
+ self.lowercase = lowercase
24
+ self.multiword_expressions = multiword_expressions
25
+ self.stemming = stemming
26
+
27
+ def find_and_replace_mwes(self, input_tokens: list[str]) -> list[str]:
28
+ """
29
+ IGNORE THIS PART; NO NEED TO IMPLEMENT THIS SINCE NO MULTI-WORD EXPRESSION PROCESSING IS TO BE USED.
30
+ For the given sequence of tokens, finds any recognized multi-word expressions in the sequence
31
+ and replaces that subsequence with a single token containing the multi-word expression.
32
+
33
+ Args:
34
+ input_tokens: A list of tokens
35
+
36
+ Returns:
37
+ A list of tokens containing processed multi-word expressions
38
+ """
39
+ # NOTE: You shouldn't implement this in homework
40
+ raise NotImplemented("MWE is not supported")
41
+
42
+ def postprocess(self, input_tokens: list[str]) -> list[str]:
43
+ """
44
+ Performs any set of optional operations to modify the tokenized list of words such as
45
+ lower-casing and stemming and returns the modified list of tokens.
46
+
47
+ Args:
48
+ input_tokens: A list of tokens
49
+
50
+ Returns:
51
+ A list of tokens processed by lower-casing and stemming depending on the given condition
52
+ """
53
+ # TODO: Add support for lower-casing
54
+ if self.lowercase:
55
+ input_tokens = [token.lower() for token in input_tokens]
56
+ if self.stemming:
57
+ ps = PorterStemmer()
58
+ input_tokens = [ps.stem(token) for token in input_tokens]
59
+ return input_tokens
60
+
61
+ def tokenize(self, text: str) -> list[str]:
62
+ """
63
+ Splits a string into a list of tokens and performs all required postprocessing steps.
64
+
65
+ Args:
66
+ text: An input text you want to tokenize
67
+
68
+ Returns:
69
+ A list of tokens
70
+ """
71
+ raise NotImplementedError(
72
+ "tokenize() is not implemented in the base class; please use a subclass"
73
+ )
74
+
75
+
76
+ class RegexTokenizer(Tokenizer):
77
+ def __init__(
78
+ self,
79
+ token_regex: str,
80
+ lowercase: bool = True,
81
+ multiword_expressions: list[str] = None,
82
+ stemming: bool = False,
83
+ ) -> None:
84
+ """
85
+ Uses NLTK's RegexpTokenizer to tokenize a given string.
86
+
87
+ Args:
88
+ token_regex: Use the following default regular expression pattern: '\\w+'
89
+ lowercase: Whether to lowercase all the tokens
90
+ multiword_expressions: A list of strings that should be recognized as single tokens
91
+ If set to 'None' no multi-word expression matching is performed.
92
+ No need to perform/implement multi-word expression recognition for HW3; you can ignore this.
93
+ """
94
+ super().__init__(lowercase, multiword_expressions, stemming)
95
+ # TODO: Save a new argument that is needed as a field of this class
96
+ # TODO: Initialize the NLTK's RegexpTokenizer
97
+ self.tokenizer = RegexpTokenizer(token_regex)
98
+
99
+ def tokenize(self, text: str) -> list[str]:
100
+ """Uses NLTK's RegexTokenizer and a regular expression pattern to tokenize a string.
101
+
102
+ Args:
103
+ text: An input text you want to tokenize
104
+
105
+ Returns:
106
+ A list of tokens
107
+ """
108
+ # TODO: Tokenize the given text and perform postprocessing on the list of tokens
109
+ # using the postprocess function
110
+ words = self.tokenizer.tokenize(text)
111
+ return self.postprocess(words)
image2text.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
4
+ headers = {"Authorization": f"Bearer KEY"}
5
+
6
+ def image2textFilename(filename):
7
+ with open(filename, "rb") as f:
8
+ data = f.read()
9
+ response = requests.post(API_URL, headers=headers, data=data)
10
+ return response.json()[0]["generated_text"]
11
+ def image2textData(data):
12
+ print(data)
13
+ response = requests.post(API_URL, headers=headers, data=data)
14
+ return response.json()[0]["generated_text"]
15
+
16
+ # output = image2textFilename("/home/wznmickey/Pictures/20230130-153108.jpeg")
17
+
18
+ # print(output)
indexing.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ import json
3
+ import os
4
+ from tqdm import tqdm
5
+ from collections import Counter, defaultdict
6
+ from document_preprocessor import Tokenizer
7
+ import gzip
8
+
9
+
10
+ class IndexType(Enum):
11
+ # The three types of index currently supported are InvertedIndex, PositionalIndex and OnDiskInvertedIndex
12
+ InvertedIndex = 'BasicInvertedIndex'
13
+ # NOTE: You don't need to support the following three
14
+ PositionalIndex = 'PositionalIndex'
15
+ OnDiskInvertedIndex = 'OnDiskInvertedIndex'
16
+ SampleIndex = 'SampleIndex'
17
+
18
+
19
+ class InvertedIndex:
20
+ def __init__(self) -> None:
21
+ """
22
+ The base interface representing the data structure for all index classes.
23
+ The functions are meant to be implemented in the actual index classes and not as part of this interface.
24
+ """
25
+ self.statistics = defaultdict(Counter) # Central statistics of the index
26
+ self.index = {} # Index
27
+ self.document_metadata = {} # Metadata like length, number of unique tokens of the documents
28
+
29
+ # NOTE: The following functions have to be implemented in the three inherited classes and not in this class
30
+
31
+ def remove_doc(self, docid: int) -> None:
32
+ """
33
+ Removes a document from the index and updates the index's metadata on the basis of this
34
+ document's deletion.
35
+
36
+ Args:
37
+ docid: The id of the document
38
+ """
39
+ # TODO: Implement this to remove a document from the entire index and statistics
40
+ raise NotImplementedError
41
+
42
+ def add_doc(self, docid: int, tokens: list[str]) -> None:
43
+ """
44
+ Adds a document to the index and updates the index's metadata on the basis of this
45
+ document's addition (e.g., collection size, average document length).
46
+
47
+ Args:
48
+ docid: The id of the document
49
+ tokens: The tokens of the document
50
+ Tokens that should not be indexed will have been replaced with None in this list.
51
+ The length of the list should be equal to the number of tokens prior to any token removal.
52
+ """
53
+ # TODO: Implement this to add documents to the index
54
+ raise NotImplementedError
55
+
56
+ def get_postings(self, term: str) -> list[tuple[int, int]]:
57
+ """
58
+ Returns the list of postings, which contains (at least) all the documents that have that term.
59
+ In most implementation, this information is represented as list of tuples where each tuple
60
+ contains the docid and the term's frequency in that document.
61
+
62
+ Args:
63
+ term: The term to be searched for
64
+
65
+ Returns:
66
+ A list of tuples containing a document id for a document
67
+ that had that search term and an int value indicating the term's frequency in
68
+ the document
69
+ """
70
+ # TODO: Implement this to fetch a term's postings from the index
71
+ raise NotImplementedError
72
+
73
+ def get_doc_metadata(self, doc_id: int) -> dict[str, int]:
74
+ """
75
+ For the given document id, returns a dictionary with metadata about that document.
76
+ Metadata should include keys such as the following:
77
+ "unique_tokens": How many unique tokens are in the document (among those not-filtered)
78
+ "length": how long the document is in terms of tokens (including those filtered)
79
+
80
+ Args:
81
+ docid: The id of the document
82
+
83
+ Returns:
84
+ A dictionary with metadata about the document
85
+ """
86
+ # TODO: Implement to fetch a particular document stored in metadata
87
+ raise NotImplementedError
88
+
89
+ def get_term_metadata(self, term: str) -> dict[str, int]:
90
+ """
91
+ For the given term, returns a dictionary with metadata about that term in the index.
92
+ Metadata should include keys such as the following:
93
+ "count": How many times this term appeared in the corpus as a whole
94
+
95
+ Args:
96
+ term: The term to be searched for
97
+
98
+ Returns:
99
+ A dictionary with metadata about the term in the index
100
+ """
101
+ # TODO: Implement to fetch a particular term stored in metadata
102
+ raise NotImplementedError
103
+
104
+ def get_statistics(self) -> dict[str, int]:
105
+ """
106
+ Returns a dictionary mapping statistical properties (named as strings) about the index to their values.
107
+ Keys should include at least the following:
108
+ "unique_token_count": how many unique terms are in the index
109
+ "total_token_count": how many total tokens are indexed including filterd tokens),
110
+ i.e., the sum of the lengths of all documents
111
+ "stored_total_token_count": how many total tokens are indexed excluding filterd tokens
112
+ "number_of_documents": the number of documents indexed
113
+ "mean_document_length": the mean number of tokens in a document (including filter tokens)
114
+
115
+ Returns:
116
+ A dictionary mapping statistical properties (named as strings) about the index to their values
117
+ """
118
+ # TODO: Calculate statistics like 'unique_token_count', 'total_token_count',
119
+ # 'number_of_documents', 'mean_document_length' and any other relevant central statistic
120
+ raise NotImplementedError
121
+
122
+ def save(self, index_directory_name: str) -> None:
123
+ """
124
+ Saves the state of this index to the provided directory.
125
+ The save state should include the inverted index as well as
126
+ any metadata need to load this index back from disk.
127
+
128
+ Args:
129
+ index_directory_name: The name of the directory where the index will be saved
130
+ """
131
+ # TODO: Save the index files to disk
132
+ raise NotImplementedError
133
+
134
+ def load(self, index_directory_name: str) -> None:
135
+ """
136
+ Loads the inverted index and any associated metadata from files located in the directory.
137
+ This method will only be called after save() has been called, so the directory should
138
+ match the filenames used in save().
139
+
140
+ Args:
141
+ index_directory_name: The name of the directory that contains the index
142
+ """
143
+ # TODO: Load the index files from disk to a Python object
144
+ raise NotImplementedError
145
+
146
+
147
+ class BasicInvertedIndex(InvertedIndex):
148
+ def __init__(self) -> None:
149
+ """
150
+ An inverted index implementation where everything is kept in memory
151
+ """
152
+ super().__init__()
153
+ self.statistics['index_type'] = 'BasicInvertedIndex'
154
+ # For example, you can initialize the index and statistics here:
155
+ # self.statistics['docmap'] = {}
156
+ # self.index = defaultdict(list)
157
+ # self.doc_id = 0
158
+ self.statistics['term_metadata'] = defaultdict(list)
159
+ self.statistics['unique_token_count'] = 0
160
+ self.statistics['total_token_count'] = 0
161
+ self.statistics['stored_total_token_count'] = 0
162
+ self.statistics['number_of_documents'] = 0
163
+ self.statistics['mean_document_length'] = 0
164
+ self.index = defaultdict(list)
165
+
166
+ # TODO: Implement all the functions mentioned in the interface
167
+ # This is the typical inverted index where each term keeps track of documents and the term count per document
168
+ def remove_doc(self, docid: int) -> None:
169
+ # TODO implement this to remove a document from the entire index and statistics
170
+ for token in self.index:
171
+ for i, (doc, count) in enumerate(self.index[token]):
172
+ if doc == docid:
173
+ self.index[token].pop(i)
174
+ self.statistics['stored_total_token_count'] -= count
175
+ self.statistics['term_metadata'][token][0] -= 1
176
+ self.statistics['term_metadata'][token][1] -= count
177
+ break
178
+ self.statistics['total_token_count'] -= self.document_metadata[docid]['length']
179
+ self.statistics['number_of_documents'] -= 1
180
+ del self.document_metadata[docid]
181
+ self.get_statistics()
182
+
183
+
184
+ def add_doc(self, docid: int, tokens: list[str]) -> None:
185
+ '''
186
+ Adds a document to the index and updates the index's metadata on the basis of this
187
+ document's addition (e.g., collection size, average document length, etc.)
188
+
189
+ Arguments:
190
+ docid [int]: the identifier of the document
191
+
192
+ tokens list[str]: the tokens of the document. Tokens that should not be indexed will have
193
+ been replaced with None in this list. The length of the list should be equal to the number
194
+ of tokens prior to any token removal.
195
+ '''
196
+ # TODO implement this to add documents to the index
197
+ if not tokens:
198
+ return
199
+
200
+ token_counts = Counter(tokens)
201
+ for token in token_counts:
202
+ if token is None:
203
+ continue
204
+ self.index[token].append((docid, token_counts[token]))
205
+ self.statistics['stored_total_token_count'] += token_counts[token]
206
+ if token in self.statistics['term_metadata']:
207
+ self.statistics['term_metadata'][token][0] += 1
208
+ self.statistics['term_metadata'][token][1] += token_counts[token]
209
+ else:
210
+ self.statistics['term_metadata'][token] = [1, token_counts[token]]
211
+
212
+ self.document_metadata[docid] = {'unique_tokens': len(token_counts), 'length': len(tokens)}
213
+ self.statistics['total_token_count'] += len(tokens)
214
+ self.statistics['number_of_documents'] += 1
215
+
216
+
217
+ def get_postings(self, term: str) -> list[tuple[int, int]]:
218
+ '''
219
+ Returns the list of postings, which contains (at least) all the documents that have that term.
220
+ In most implementation this information is represented as list of tuples where each tuple
221
+ contains the docid and the term's frequency in that document.
222
+
223
+ Arguments:
224
+ term [str]: the term to be searched for
225
+
226
+ Returns:
227
+ list[tuple[int,str]] : A list of tuples containing a document id for a document
228
+ that had that search term and an int value indicating the term's frequency in
229
+ the document.
230
+ '''
231
+ # TODO implement this to fetch a term's postings from the index
232
+ if term not in self.index:
233
+ return []
234
+ return self.index[term]
235
+
236
+ def get_doc_metadata(self, doc_id: int) -> dict[str, int]:
237
+ '''
238
+ For the given document id, returns a dictionary with metadata about that document. Metadata
239
+ should include keys such as the following:
240
+ "unique_tokens": How many unique tokens are in the document (among those not-filtered)
241
+ "length": how long the document is in terms of tokens (including those filtered)
242
+ '''
243
+ # TODO implement to fetch a particular documents stored metadata
244
+ if doc_id in self.document_metadata:
245
+ return self.document_metadata[doc_id]
246
+ return {'unique_tokens': 0, 'length': 0}
247
+
248
+ def get_term_metadata(self, term: str) -> dict[str, int]:
249
+ '''
250
+ For the given term, returns a dictionary with metadata about that term in the index. Metadata
251
+ should include keys such as the following:
252
+ "count": How many times this term appeared in the corpus as a whole.
253
+ '''
254
+ # TODO implement to fetch a particular terms stored metadata
255
+ if term not in self.statistics['term_metadata']:
256
+ return {'document_count': 0, 'count': 0}
257
+ document_count = self.statistics['term_metadata'][term][0]
258
+ term_frequency = self.statistics['term_metadata'][term][1]
259
+ return {'document_count': document_count, 'count': term_frequency}
260
+
261
+ def get_statistics(self) -> dict[str, int]:
262
+ '''
263
+ Returns a dictionary mapping statistical properties (named as strings) about the index to their values.
264
+ Keys should include at least the following:
265
+
266
+ "unique_token_count": how many unique terms are in the index
267
+ "total_token_count": how many total tokens are indexed including filterd tokens),
268
+ i.e., the sum of the lengths of all documents
269
+ "stored_total_token_count": how many total tokens are indexed excluding filterd tokens
270
+ "number_of_documents": the number of documents indexed
271
+ "mean_document_length": the mean number of tokens in a document (including filter tokens)
272
+ '''
273
+ # TODO calculate statistics like 'unique_token_count', 'total_token_count',
274
+ # 'number_of_documents', 'mean_document_length' and any other relevant central statistic.
275
+ self.statistics['unique_token_count'] = len(self.index)
276
+ self.statistics['mean_document_length'] = self.statistics['total_token_count']/self.statistics['number_of_documents'] if self.statistics['number_of_documents'] else 0
277
+ return {'unique_token_count': self.statistics['unique_token_count'],
278
+ 'total_token_count': self.statistics['total_token_count'],
279
+ 'stored_total_token_count': self.statistics['stored_total_token_count'],
280
+ 'number_of_documents': self.statistics['number_of_documents'],
281
+ 'mean_document_length': self.statistics['mean_document_length']}
282
+
283
+ # NOTE: changes in this method for HW2
284
+ def save(self, index_directory_name) -> None:
285
+ '''
286
+ Saves the state of this index to the provided directory. The save state should include the
287
+ inverted index as well as any meta data need to load this index back from disk
288
+ '''
289
+ # TODO save the index files to disk
290
+ if not os.path.exists(index_directory_name):
291
+ os.mkdir(index_directory_name)
292
+ with open(index_directory_name+'/'+'index.json', 'w', encoding='utf-8') as f:
293
+ json.dump(self.index, f)
294
+ with open(index_directory_name+'/'+'document_metadata.json', 'w', encoding='utf-8') as f:
295
+ json.dump(self.document_metadata, f)
296
+ with open(index_directory_name+'/'+'statistics.json', 'w', encoding='utf-8') as f:
297
+ json.dump(self.statistics, f)
298
+ print('Index saved!')
299
+
300
+ # NOTE: changes in this method for HW2
301
+ def load(self, index_directory_name) -> None:
302
+ '''
303
+ Loads the inverted index and any associated metadata from files located in the directory.
304
+ This method will only be called after save() has been called, so the directory should
305
+ match the filenames used in save()
306
+ '''
307
+ # TODO load the index files from disk to a Python object
308
+ with open(index_directory_name+'/'+'index.json', 'r', encoding='utf-8') as f:
309
+ self.index = json.load(f)
310
+ with open(index_directory_name+'/'+'document_metadata.json', 'r', encoding='utf-8') as f:
311
+ document_metadata = json.load(f)
312
+ self.document_metadata = {int(k): v for k, v in document_metadata.items()}
313
+ with open(index_directory_name+'/'+'statistics.json', 'r', encoding='utf-8') as f:
314
+ self.statistics = json.load(f)
315
+
316
+
317
+ class Indexer:
318
+ """
319
+ The Indexer class is responsible for creating the index used by the search/ranking algorithm.
320
+ """
321
+ @staticmethod
322
+ def create_index(index_type: IndexType, dataset_path: str,
323
+ document_preprocessor: Tokenizer, stopwords: set[str],
324
+ minimum_word_frequency: int, text_key="prompt",
325
+ max_docs: int = -1, doc_augment_dict: dict[int, list[str]] | None = None) -> InvertedIndex:
326
+ """
327
+ Creates an inverted index.
328
+
329
+ Args:
330
+ index_type: This parameter tells you which type of index to create, e.g., BasicInvertedIndex
331
+ dataset_path: The file path to your dataset
332
+ document_preprocessor: A class which has a 'tokenize' function which would read each document's text
333
+ and return a list of valid tokens
334
+ stopwords: The set of stopwords to remove during preprocessing or 'None' if no stopword filtering is to be done
335
+ minimum_word_frequency: An optional configuration which sets the minimum word frequency of a particular token to be indexed
336
+ If the token does not appear in the document at least for the set frequency, it will not be indexed.
337
+ Setting a value of 0 will completely ignore the parameter.
338
+ text_key: The key in the JSON to use for loading the text
339
+ max_docs: The maximum number of documents to index
340
+ Documents are processed in the order they are seen.
341
+ doc_augment_dict: An optional argument; This is a dict created from the doc2query.csv where the keys are
342
+ the document id and the values are the list of queries for a particular document.
343
+
344
+ Returns:
345
+ An inverted index
346
+ """
347
+ # TODO (HW3): This function now has an optional argument doc_augment_dict; check README
348
+
349
+ # HINT: Think of what to do when doc_augment_dict exists, how can you deal with the extra information?
350
+ # How can you use that information with the tokens?
351
+ # If doc_augment_dict doesn't exist, it's the same as before, tokenizing just the document text
352
+
353
+ # TODO: Implement this class properly. This is responsible for going through the documents
354
+ # one by one and inserting them into the index after tokenizing the document
355
+
356
+ # TODO: Figure out what type of InvertedIndex to create.
357
+ # For HW3, only the BasicInvertedIndex is required to be supported
358
+
359
+ # TODO: If minimum word frequencies are specified, process the collection to get the
360
+ # word frequencies
361
+
362
+ # NOTE: Make sure to support both .jsonl.gz and .jsonl as input
363
+
364
+ # TODO: Figure out which set of words to not index because they are stopwords or
365
+ # have too low of a frequency
366
+
367
+ # HINT: This homework should work fine on a laptop with 8GB of memory but if you need,
368
+ # you can delete some unused objects here to free up some space
369
+
370
+ # TODO: Read the collection and process/index each document.
371
+ # Only index the terms that are not stopwords and have high-enough frequency
372
+
373
+ index = None
374
+ if index_type == IndexType.InvertedIndex:
375
+ index = BasicInvertedIndex()
376
+ else:
377
+ raise NameError
378
+
379
+
380
+ filtered_tokens = set()
381
+ if minimum_word_frequency:
382
+ word_frequency = Counter()
383
+ if dataset_path.endswith('jsonl'):
384
+ with open(dataset_path, 'r', encoding='utf-8') as f:
385
+ for i, line in tqdm(enumerate(f)):
386
+ if max_docs > 0 and i >= max_docs:
387
+ break
388
+ docid = json.loads(line)['docid']
389
+ doc = json.loads(line)[text_key]
390
+ if doc_augment_dict and docid in doc_augment_dict:
391
+ doc = ' '.join([doc] + doc_augment_dict[docid])
392
+ tokens = document_preprocessor.tokenize(doc)
393
+ word_frequency.update(tokens)
394
+ elif dataset_path.endswith('jsonl.gz'):
395
+ with gzip.open(dataset_path, 'rt', encoding='utf-8') as f:
396
+ for i, line in tqdm(enumerate(f)):
397
+ if max_docs > 0 and i >= max_docs:
398
+ break
399
+ docid = json.loads(line)['docid']
400
+ doc = json.loads(line)[text_key]
401
+ if doc_augment_dict and docid in doc_augment_dict:
402
+ doc = ' '.join([doc] + doc_augment_dict[docid])
403
+ tokens = document_preprocessor.tokenize(doc)
404
+ word_frequency.update(tokens)
405
+ else:
406
+ raise TypeError('Dataset type not supported')
407
+ for word in word_frequency:
408
+ if word_frequency[word] < minimum_word_frequency:
409
+ filtered_tokens.add(word)
410
+
411
+ if stopwords:
412
+ filtered_tokens |= stopwords
413
+
414
+ if dataset_path.endswith('jsonl'):
415
+ with open(dataset_path, 'r', encoding='utf-8') as f:
416
+ for i, line in tqdm(enumerate(f)):
417
+ if max_docs > 0 and i >= max_docs:
418
+ break
419
+ docid = json.loads(line)['docid']
420
+ doc = json.loads(line)[text_key]
421
+ if doc_augment_dict and docid in doc_augment_dict:
422
+ doc = ' '.join([doc] + doc_augment_dict[docid])
423
+ tokens = document_preprocessor.tokenize(doc)
424
+ for j, token in enumerate(tokens):
425
+ if token in filtered_tokens:
426
+ tokens[j] = None
427
+ index.add_doc(docid, tokens)
428
+ elif dataset_path.endswith('jsonl.gz'):
429
+ with gzip.open(dataset_path, 'rt', encoding='utf-8') as f:
430
+ for i, line in tqdm(enumerate(f)):
431
+ if max_docs > 0 and i >= max_docs:
432
+ break
433
+ docid = json.loads(line)['docid']
434
+ doc = json.loads(line)[text_key]
435
+ if doc_augment_dict and docid in doc_augment_dict:
436
+ doc = ' '.join([doc] + doc_augment_dict[docid])
437
+ tokens = document_preprocessor.tokenize(doc)
438
+ for j, token in enumerate(tokens):
439
+ if token in filtered_tokens:
440
+ tokens[j] = None
441
+ index.add_doc(docid, tokens)
442
+ index.get_statistics()
443
+
444
+ return index
ranker.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This is the template for implementing the rankers for your search engine.
3
+ You will be implementing WordCountCosineSimilarity, DirichletLM, TF-IDF, BM25, Pivoted Normalization, and your own ranker.
4
+ """
5
+ from collections import Counter, defaultdict
6
+ from indexing import InvertedIndex
7
+ import math
8
+
9
+
10
+ class Ranker:
11
+ """
12
+ The ranker class is responsible for generating a list of documents for a given query, ordered by their scores
13
+ using a particular relevance function (e.g., BM25).
14
+ A Ranker can be configured with any RelevanceScorer.
15
+ """
16
+ # TODO: Implement this class properly; this is responsible for returning a list of sorted relevant documents
17
+ def __init__(self, index: InvertedIndex, document_preprocessor, stopwords: set[str], scorer: 'RelevanceScorer') -> None:
18
+ """
19
+ Initializes the state of the Ranker object.
20
+
21
+ TODO (HW3): Previous homeworks had you passing the class of the scorer to this function
22
+ This has been changed as it created a lot of confusion.
23
+ You should now pass an instantiated RelevanceScorer to this function.
24
+
25
+ Args:
26
+ index: An inverted index
27
+ document_preprocessor: The DocumentPreprocessor to use for turning strings into tokens
28
+ stopwords: The set of stopwords to use or None if no stopword filtering is to be done
29
+ scorer: The RelevanceScorer object
30
+ """
31
+ self.index = index
32
+ self.tokenize = document_preprocessor.tokenize
33
+ self.scorer = scorer
34
+ self.stopwords = stopwords
35
+
36
+ def query(self, query: str) -> list[tuple[int, float]]:
37
+ """
38
+ Searches the collection for relevant documents to the query and
39
+ returns a list of documents ordered by their relevance (most relevant first).
40
+
41
+ Args:
42
+ query: The query to search for
43
+
44
+ Returns:
45
+ A list of dictionary objects with keys "docid" and "score" where docid is
46
+ a particular document in the collection and score is that document's relevance
47
+
48
+ TODO (HW3): We are standardizing the query output of Ranker to match with L2RRanker.query and VectorRanker.query
49
+ The query function should return a sorted list of tuples where each tuple has the first element as the document ID
50
+ and the second element as the score of the document after the ranking process.
51
+ """
52
+ # TODO: Tokenize the query and remove stopwords, if needed
53
+ tokens = self.tokenize(query)
54
+ query_parts = [token for token in tokens if token not in self.stopwords] if self.stopwords else tokens
55
+
56
+ # TODO: Fetch a list of possible documents from the index and create a mapping from
57
+ # a document ID to a dictionary of the counts of the query terms in that document.
58
+ # You will pass the dictionary to the RelevanceScorer as input.
59
+ doc_word_counts = defaultdict(Counter)
60
+ query_word_counts = Counter(query_parts)
61
+ for term in query_word_counts:
62
+ postings = self.index.get_postings(term)
63
+ for posting in postings:
64
+ doc_word_counts[posting[0]][term] = posting[1]
65
+
66
+ # TODO: Rank the documents using a RelevanceScorer (like BM25 from below classes)
67
+ results = []
68
+ for docid in doc_word_counts:
69
+ res = self.scorer.score(docid, doc_word_counts[docid], query_word_counts)
70
+ if res:
71
+ results.append((docid, res))
72
+
73
+ # TODO: Return the **sorted** results as format [{docid: 100, score:0.5}, {{docid: 10, score:0.2}}]
74
+ results.sort(key=lambda x: x[1], reverse=True)
75
+ return results
76
+
77
+
78
+ class RelevanceScorer:
79
+ """
80
+ This is the base interface for all the relevance scoring algorithm.
81
+ It will take a document and attempt to assign a score to it.
82
+ """
83
+ # TODO: Implement the functions in the child classes (WordCountCosineSimilarity, DirichletLM, BM25,
84
+ # PivotedNormalization, TF_IDF) and not in this one
85
+
86
+ def __init__(self, index: InvertedIndex, parameters) -> None:
87
+ raise NotImplementedError
88
+
89
+ def score(self, docid: int, doc_word_counts: dict[str, int], query_word_counts: dict[str, int]) -> float:
90
+ """
91
+ Returns a score for how relevance is the document for the provided query.
92
+
93
+ Args:
94
+ docid: The ID of the document
95
+ doc_word_counts: A dictionary containing all words in the document and their frequencies.
96
+ Words that have been filtered will be None.
97
+ query_word_counts: A dictionary containing all words in the query and their frequencies.
98
+ Words that have been filtered will be None.
99
+
100
+ Returns:
101
+ A score for how relevant the document is (Higher scores are more relevant.)
102
+
103
+ """
104
+ raise NotImplementedError
105
+
106
+
107
+ # TODO (HW1): Implement unnormalized cosine similarity on word count vectors
108
+ class WordCountCosineSimilarity(RelevanceScorer):
109
+ def __init__(self, index: InvertedIndex, parameters={}) -> None:
110
+ self.index = index
111
+ self.parameters = parameters
112
+
113
+ def score(self, docid: int, doc_word_counts: dict[str, int], query_word_counts: dict[str, int]) -> float:
114
+ # 1. Find the dot product of the word count vector of the document and the word count vector of the query
115
+
116
+ # 2. Return the score
117
+ cwq = query_word_counts
118
+ score = 0
119
+ flag = 0
120
+ for word in cwq:
121
+ if word in doc_word_counts:
122
+ flag = 1
123
+ score += cwq[word] * doc_word_counts[word]
124
+ if not flag:
125
+ return None
126
+ return score
127
+
128
+
129
+ # TODO (HW1): Implement DirichletLM
130
+ class DirichletLM(RelevanceScorer):
131
+ def __init__(self, index: InvertedIndex, parameters={'mu': 2000}) -> None:
132
+ self.index = index
133
+ self.parameters = parameters
134
+ self.mu = parameters['mu']
135
+
136
+ def score(self, docid: int, doc_word_counts: dict[str, int], query_word_counts: dict[str, int]) -> float:
137
+ # 1. Get necessary information from index
138
+
139
+ # 2. Compute additional terms to use in algorithm
140
+
141
+ # 3. For all query_parts, compute score
142
+
143
+ # 4. Return the score
144
+ cwq = query_word_counts
145
+ q_len = sum(cwq.values())
146
+ flag = 0
147
+ score = 0
148
+
149
+ for term in cwq:
150
+ if term in doc_word_counts and docid in self.index.document_metadata:
151
+ flag = 1
152
+ pwc = self.index.get_term_metadata(term)['count']/self.index.statistics['total_token_count']
153
+ first_part = cwq[term]*math.log(1+doc_word_counts[term]/(self.mu*pwc))
154
+ score+=first_part
155
+ if docid in self.index.document_metadata:
156
+ second_part = q_len*math.log(self.mu/(self.mu+self.index.document_metadata[docid]['length']))
157
+ score+=second_part
158
+ if not flag:
159
+ return None
160
+ return score
161
+
162
+
163
+ # TODO (HW1): Implement BM25
164
+ class BM25(RelevanceScorer):
165
+ def __init__(self, index: InvertedIndex, parameters={'b': 0.75, 'k1': 1.2, 'k3': 8}) -> None:
166
+ self.index = index
167
+ self.b = parameters['b']
168
+ self.k1 = parameters['k1']
169
+ self.k3 = parameters['k3']
170
+
171
+ def score(self, docid: int, doc_word_counts: dict[str, int], query_word_counts: dict[str, int])-> float:
172
+ # 1. Get necessary information from index
173
+
174
+ # 2. Find the dot product of the word count vector of the document and the word count vector of the query
175
+
176
+ # 3. For all query parts, compute the TF and IDF to get a score
177
+
178
+ # 4. Return score
179
+ cwq = query_word_counts
180
+ info = self.index.statistics # statistics
181
+ avg_dl = info['mean_document_length']
182
+ N = info['number_of_documents']
183
+ score = 0
184
+ flag = 0
185
+ for term in cwq:
186
+ if term in doc_word_counts and docid in self.index.document_metadata:
187
+ flag = 1
188
+ third_part = cwq[term]*(self.k3+1)/(self.k3+cwq[term])
189
+ first_part = math.log((N+0.5-self.index.get_term_metadata(term)['document_count'])\
190
+ /(self.index.get_term_metadata(term)['document_count']+0.5))
191
+ ctd = doc_word_counts[term]
192
+ second_part = ((self.k1+1)*ctd)\
193
+ /(self.k1*(1-self.b+self.b*self.index.document_metadata[docid]['length']/avg_dl)+ctd)
194
+ score+=first_part*second_part*third_part
195
+ if not flag:
196
+ return None
197
+ return score
198
+
199
+
200
+ # TODO (HW1): Implement Pivoted Normalization
201
+ class PivotedNormalization(RelevanceScorer):
202
+ def __init__(self, index: InvertedIndex, parameters={'b': 0.2}) -> None:
203
+ self.index = index
204
+ self.b = parameters['b']
205
+
206
+ def score(self, docid: int, doc_word_counts: dict[str, int], query_word_counts: dict[str, int]) -> float:
207
+ # 1. Get necessary information from index
208
+
209
+ # 2. Compute additional terms to use in algorithm
210
+
211
+ # 3. For all query parts, compute the TF, IDF, and QTF values to get a score
212
+
213
+ # 4. Return the score
214
+ cwq = query_word_counts
215
+ info = self.index.statistics # statistics
216
+ avg_dl = info['mean_document_length']
217
+ N = info['number_of_documents']
218
+ score = 0
219
+ flag = 0
220
+ for term in cwq:
221
+ if term in doc_word_counts and docid in self.index.document_metadata:
222
+ flag = 1
223
+ first_part = cwq[term]
224
+ third_part = math.log((N+1)/self.index.get_term_metadata(term)['document_count'])
225
+ second_part = (1+math.log(1+math.log(doc_word_counts[term])))\
226
+ /(1-self.b+self.b*self.index.document_metadata[docid]['length']/avg_dl)
227
+ # print(first_part, second_part, third_part)
228
+ score+=first_part*second_part*third_part
229
+ if not flag:
230
+ return None
231
+ return score
232
+
233
+
234
+ # TODO (HW1): Implement TF-IDF
235
+ class TF_IDF(RelevanceScorer):
236
+ def __init__(self, index: InvertedIndex, parameters={}) -> None:
237
+ self.index = index
238
+ self.parameters = parameters
239
+
240
+ def score(self, docid: int, doc_word_counts: dict[str, int], query_word_counts: dict[str, int]) -> float:
241
+ # 1. Get necessary information from index
242
+
243
+ # 2. Compute additional terms to use in algorithm
244
+
245
+ # 3. For all query parts, compute the TF, IDF, and QTF values to get a score
246
+
247
+ # 4. Return the score
248
+ cwq = query_word_counts
249
+ doc_total = self.index.statistics['number_of_documents'] # statistics
250
+ score = 0
251
+ flag = 0
252
+ for term in cwq:
253
+ if term in doc_word_counts:
254
+ flag = 1
255
+ score += math.log(doc_word_counts[term]+1)*\
256
+ (1+math.log(doc_total/(self.index.get_term_metadata(term)['document_count'])))*cwq[term]
257
+ if not flag:
258
+ return None
259
+ return score
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blinker==1.7.0
2
+ click==8.1.7
3
+ Flask==3.0.0
4
+ itsdangerous==2.1.2
5
+ Jinja2==3.1.2
6
+ joblib==1.3.2
7
+ MarkupSafe==2.1.3
8
+ nltk==3.8.1
9
+ numpy==1.26.2
10
+ pandas==2.1.4
11
+ python-dateutil==2.8.2
12
+ pytz==2023.3.post1
13
+ regex==2023.10.3
14
+ six==1.16.0
15
+ tqdm==4.66.1
16
+ tzdata==2023.3
17
+ Werkzeug==3.0.1
static/css/style_index.css ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #search-header{
2
+ margin-top: 15%;
3
+ margin-bottom: 15%;
4
+ display: block;
5
+ }
6
+
7
+ #introduce-header {
8
+ /* margin-top: 10%; */
9
+ /* margin-bottom: 5%; */
10
+ margin: 5% auto;
11
+ }
12
+
13
+ #introduce-header p{
14
+ font-family: "Special Elite", "SF Pro Display", "SF Pro Icons","Helvetica Neue", Helvetica, Arial, sans-serif;
15
+ margin-top: 3%;
16
+ }
17
+
18
+ .results-wrapper{
19
+ position: relative;
20
+ animation: moveRightToLeft 1000s linear infinite;
21
+ display: grid;
22
+ grid-template-columns: repeat(100, 30%);
23
+ grid-template-rows: 1fr;
24
+ grid-auto-flow: row;
25
+ }
26
+
27
+ @keyframes moveRightToLeft {
28
+ 0% {
29
+ left: 1000%;
30
+ }
31
+ 100% {
32
+ left: -1000%;
33
+ }
34
+ }
35
+
36
+ .results-wrapper:hover{
37
+ animation-play-state: paused;
38
+ }
static/css/style_search.css ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ html {
2
+ background-color: #fbfbfb;
3
+ }
4
+ body {
5
+ background-color: #fbfbfb;
6
+ font-family: "SF Pro Display", "SF Pro Icons","Helvetica Neue", Helvetica, Arial, sans-serif;
7
+ font-weight: 600;
8
+ line-height: 1.1;
9
+ /* position: relative; */
10
+ }
11
+
12
+ nav {
13
+ background-color: rgba(255, 255, 255, 0.8);
14
+ backdrop-filter: saturate(180%) blur(20px);
15
+ width: 100%;
16
+ /* position: sticky; */
17
+ position: fixed;
18
+ top: 0;
19
+ left: 0;
20
+ z-index: 100;
21
+ padding: 15px 0px;
22
+ border-bottom: rgb(222, 222, 222) 1px solid;
23
+ }
24
+
25
+ main{
26
+ padding: 1% 5%;
27
+ background-color: #fbfbfb;
28
+ }
29
+
30
+ #nav-title{
31
+ font-size: 21px;
32
+ margin-left: 50px;
33
+ font-family: Silkscreen, "Special Elite", "SF Pro Display", "SF Pro Icons","Helvetica Neue", Helvetica, Arial, sans-serif;
34
+ }
35
+
36
+ #nav-title a{
37
+ text-decoration: none;
38
+ color: inherit;
39
+ }
40
+
41
+ #nav-title a:hover{
42
+ color: #565564;
43
+ }
44
+
45
+
46
+ h1{
47
+ text-align: center;
48
+ font-weight: 600;
49
+ font-family: "DM Serif Display", Silkscreen, "Special Elite", "SF Pro Display", "SF Pro Icons","Helvetica Neue", Helvetica, Arial, sans-serif;;
50
+ }
51
+
52
+ #search-header{
53
+ text-align: center;
54
+ width: 100%;
55
+ /* margin-top: 5%; */
56
+ /* margin-bottom: 5%; */
57
+ margin: 5% auto;
58
+ align-items: center;
59
+ animation: fadeIn;
60
+ animation-duration: 1s;
61
+ }
62
+
63
+ #search-bar{
64
+ width: 100%;
65
+ /* display: flex; */
66
+ /* box-shadow: 2px 4px 12px rgba(0 0 0/0.08); */
67
+ border-radius: 30px;
68
+ /* align-items: center; */
69
+ /* background-color: white; */
70
+ margin: 0 auto;
71
+ }
72
+
73
+ /* #search-bar:hover{
74
+ box-shadow: 8px 8px 16px rgba(0 0 0/0.1);
75
+ } */
76
+
77
+ #input-wrapper{
78
+ display: flex;
79
+ align-items: center;
80
+ background-color: white;
81
+ border-radius: 30px;
82
+ box-shadow: 2px 4px 12px rgba(0 0 0/0.08);
83
+ width: 50vw;
84
+ }
85
+
86
+ #input-wrapper:hover{
87
+ box-shadow: 8px 8px 16px rgba(0 0 0/0.1);
88
+ }
89
+
90
+ #icon-magnifying-glass{
91
+ margin-left: 3%;
92
+ }
93
+
94
+ #input-wrapper-filter{
95
+ display: flex;
96
+ align-items: center;
97
+ justify-content: center;
98
+ margin: 5% 0;
99
+ }
100
+
101
+ #icon-filter:hover{
102
+ opacity: 0.6;
103
+ }
104
+
105
+ #search-header{
106
+ display: flex;
107
+ }
108
+
109
+ #search-header form{
110
+ width: 100%;
111
+ /* border-radius: 30px; */
112
+ }
113
+
114
+ #search-header input{
115
+ width: 100%;
116
+ height: 50px;
117
+ border: none;
118
+ /* box-shadow: 2px 4px 12px rgba(0 0 0/0.08); */
119
+ border-radius: 0 30px 30px 0;
120
+ padding: 25px 25px 25px 15px;
121
+ background-color: white;
122
+ }
123
+
124
+ #search-header input:focus{
125
+ outline: none;
126
+ }
127
+
128
+ #icon-filter{
129
+ margin-left: 3%;
130
+ cursor: pointer;
131
+ }
132
+
133
+ .multiselect-single-wrapper{
134
+ display: flex;
135
+ align-items: center;
136
+ width: 95%;
137
+ /* flex: 0 0 20vw; */
138
+ /* margin: 10px 5%; */
139
+ /* z-index: 0; */
140
+ }
141
+
142
+ .multiselect-single-wrapper:nth-child(1){
143
+ grid-column: 1 / span 3;
144
+ }
145
+
146
+ .multiselect-single-wrapper:nth-child(2){
147
+ grid-column: 4 / span 3;
148
+
149
+ }
150
+
151
+ .multiselect-single-wrapper:nth-child(3){
152
+ grid-column: 1 / span 3;
153
+ }
154
+
155
+ .multiselect-single-wrapper:nth-child(4){
156
+ grid-column: 4 / span 3;
157
+ }
158
+
159
+ .multiselect-single-wrapper:nth-child(5){
160
+ grid-column: 2 / -2;
161
+ }
162
+
163
+ .multiselect-single-wrapper label{
164
+ margin: 0 5%;
165
+ }
166
+
167
+ .multiselect-single-wrapper>div:first-of-type{
168
+ width: 100%;
169
+ }
170
+
171
+ .ui.multiple.selection.dropdown{
172
+ border-radius: 20px;
173
+ border: none;
174
+ background-color: white;
175
+ box-shadow: 2px 4px 12px rgba(0 0 0/0.08);
176
+ padding: 5px 15px;
177
+ width: 100%;
178
+ }
179
+
180
+ .ui.multiple.selection.dropdown:hover{
181
+ box-shadow: 8px 8px 16px rgba(0 0 0/0.1);;
182
+ }
183
+
184
+ .ui.selection.active.dropdown .menu{
185
+ border: none;
186
+ }
187
+
188
+ .ui.label.transition.visible{
189
+ border: none;
190
+ background-color: rgba(0,0,0,.87);
191
+ color: whitesmoke;
192
+ border-radius: 30px;
193
+ box-shadow: 2px 4px 12px rgba(0 0 0/0.08);
194
+ }
195
+
196
+ #multiselect-wrapper{
197
+ display: grid;
198
+ grid-template-columns: repeat(6, 1fr);
199
+ row-gap: 25px;
200
+ width: 95%;
201
+ justify-content: space-evenly;
202
+ justify-items: center;
203
+ padding: 0;
204
+ overflow: hidden;
205
+ max-height: 0;
206
+ /* display: none; */
207
+ transition: max-height 0.3s linear, padding 0.3s linear;
208
+ /* animation: fadeOutUp; */
209
+ /* animation-duration: 0.7s; */
210
+ }
211
+
212
+ #search-bar.open #multiselect-wrapper{
213
+ /* display: grid;
214
+ grid-template-columns: repeat(6, 1fr);
215
+ row-gap: 25px;
216
+ width: 100%;
217
+ justify-content: space-evenly;
218
+ justify-items: center; */
219
+ padding: 2% 0% 2% 0%;
220
+ overflow: visible;
221
+ max-height: 500px;
222
+ transition: max-height 0.3s linear, padding 0.3s linear;
223
+ animation: fadeInDown;
224
+ animation-duration: 0.3s;
225
+ /* z-index: 2; */
226
+ }
227
+
228
+
229
+ .results-wrapper {
230
+ display: grid;
231
+ grid-template-columns: repeat(3, 1fr);
232
+ /* grid-template-rows: 400px; */
233
+ grid-auto-flow: dense;
234
+ grid-gap: 25px;
235
+ /* grid-row-gap: 20px; */
236
+ /* margin-top: 50px; */
237
+ justify-content: center;
238
+ justify-items: center;
239
+ /* z-index: 0; */
240
+ /* background-color: #fbfbfb; */
241
+ }
242
+
243
+ .single-result-card {
244
+ display: grid;
245
+ grid-template-columns: 1fr;
246
+ grid-template-rows: 2fr 1fr;
247
+ background-color: white;
248
+ border-radius: 30px;
249
+ box-shadow: 2px 4px 12px rgba(0 0 0/0.08);
250
+ width: 80%;
251
+ box-sizing: border-box;
252
+ transition: transform 0.5s;
253
+ height: 30vw;
254
+ }
255
+
256
+ .single-result-card:hover{
257
+ transform: translateY(-10px);
258
+ }
259
+
260
+ .single-result-card img{
261
+ grid-row: 1 / span 2;
262
+ grid-column: 1 / span 1;
263
+ width: 100%;
264
+ height: 100%;
265
+ /* max-height: 400px; */
266
+ border-radius: 30px 30px 0 0;
267
+ object-fit: cover;
268
+ }
269
+
270
+ .prompt-text-wrapper{
271
+ grid-row: 1 / span 2;
272
+ grid-column: 1 / span 1;
273
+ /* z-index: 1; */
274
+ display: flex;
275
+ align-items: flex-end;
276
+ justify-content: center;
277
+ text-align: center;
278
+ /* font-style: italic; */
279
+ height: 100%;
280
+ /* max-height: 30px; */
281
+ overflow: hidden;
282
+ text-overflow: clip;
283
+ /* color: #75757d; */
284
+ font-weight: 400;
285
+ font-family: "SF Pro Text", "SF Pro Icons", "Helvetica Neue",Helvetica, Arial, sans-serif;
286
+ color: whitesmoke;
287
+ /* background-color: rgba(50, 50, 50, 0.7); */
288
+ /* border-radius: 30px 30px 0 0; */
289
+ border-radius: 30px 30px 0 0;
290
+
291
+ }
292
+
293
+ .prompt-text-wrapper:hover{
294
+ background: linear-gradient(to top, rgba(50, 50, 50, 0.5) 5%, transparent);
295
+ /* background-color: rgba(50, 50, 50, 0.7); */
296
+ /* cursor: pointer; */
297
+ }
298
+
299
+ /* .prompt-text-wrapper.copied{
300
+ background-color: rgba(50, 50, 50, 0.7);
301
+ transition: background-color 0.1s ease;
302
+ } */
303
+
304
+
305
+ .prompt-text-wrapper p {
306
+ display: none;
307
+ }
308
+
309
+ .prompt-text-wrapper:hover p{
310
+ display: block;
311
+ padding: 15px;
312
+ /* padding-bottom: 15px; */
313
+ /* padding: 15px 15px 60px 15px; */
314
+ /* height: auto; */
315
+ max-height: 30%;
316
+ /* overflow: hidden; */
317
+ text-overflow: ellipsis;
318
+ /* background-color: rgba(50, 50, 50, 0.7); */
319
+ border-radius: 30px 30px 0 0;
320
+ /* transition: height 0.5s ease; */
321
+ width: 100%;
322
+ margin: 0;
323
+ margin-bottom: 2%;
324
+ }
325
+
326
+ /* color see here:
327
+ https://colorhunt.co/palette/ff90bcffc0d9f9f9e08acdd7
328
+ */
329
+
330
+ .single-result-card form{
331
+ width: 100%;
332
+ display: flex;
333
+ grid-row: 4 / span 1;
334
+ text-align: center;
335
+ }
336
+
337
+ .single-result-card input[type="submit"]{
338
+ border-radius: 0 0 0 30px;
339
+ background-color: #FF90BC;
340
+ color: white;
341
+ font-weight: 600;
342
+ padding: 15px;
343
+ display: inline-block;
344
+ box-sizing: border-box;
345
+ width: 50%;
346
+ margin: 0;
347
+ border: none;
348
+ flex-direction: row;
349
+ }
350
+
351
+ .single-result-card input[type="submit"]:hover{
352
+ opacity: 0.8;
353
+ }
354
+
355
+ .pick-this{
356
+ /* display: inline-block; */
357
+ display: flex;
358
+ flex-direction: row;
359
+ padding: 15px;
360
+ align-items: center;
361
+ color: white;
362
+ background-color: #8ACDD7;
363
+ border-radius: 0 0 30px 0;
364
+ box-sizing: border-box;
365
+ width: 50%;
366
+ cursor: pointer;
367
+ margin: 0;
368
+ user-select: none;
369
+ justify-content: space-evenly;
370
+ transition: background-color 0.1s ease;
371
+ }
372
+
373
+ .pick-this:hover{
374
+ background-color: rgba(138, 205, 215, 0.8);
375
+ }
376
+
377
+ .pick-this.copied{
378
+ background-color: rgb(30, 48, 80);
379
+ transition: background-color 0.1s ease;
380
+ }
381
+
382
+ #no-results{
383
+ grid-column: 1 / -1;
384
+ }
static/js/script.js ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // delete images on error
2
+ const images = document.querySelectorAll("img");
3
+
4
+ images.forEach(function (image) {
5
+ image.addEventListener("error", function () {
6
+ console.error("Image load error:", image.src);
7
+ console.log("404 image:", image.src);
8
+ const parentElement = image.parentElement;
9
+ if (parentElement) {
10
+ console.log("parentElement:", parentElement);
11
+ parentElement.remove();
12
+ console.log("image removed");
13
+ }
14
+ });
15
+ });
16
+
17
+ // multiselect dropdown
18
+ const myForm = document.getElementById("myForm");
19
+
20
+ $(".ui.multiple.selection.dropdown").dropdown({
21
+ action: "activate",
22
+ onChange: function (value, text, $selectedItem) {
23
+ console.log(value);
24
+ myForm.submit();
25
+ },
26
+ });
27
+
28
+ // switch filter dropdown on
29
+ const filterSwitch = document.getElementById("icon-filter");
30
+ const filterContent = document.getElementById("multiselect-wrapper");
31
+ if (filterSwitch) {
32
+ filterSwitch.addEventListener("click", function () {
33
+ const wrapper = this.closest("#search-bar");
34
+ wrapper.classList.toggle("open");
35
+ });
36
+ }
37
+
38
+ // copy to clipboard
39
+ const copyButtons = document.querySelectorAll(".pick-this");
40
+ copyButtons.forEach(function (button) {
41
+ button.addEventListener("click", function () {
42
+ console.log("copy button clicked");
43
+ const text =
44
+ this.parentNode.parentNode.querySelector(
45
+ ".prompt-text-p"
46
+ ).textContent;
47
+ navigator.clipboard.writeText(text).then(
48
+ () => {
49
+ console.log("Copied to clipboard:", text);
50
+ this.classList.add("copied");
51
+ setTimeout(() => {
52
+ this.classList.remove("copied");
53
+ }, 200);
54
+ },
55
+ function (err) {
56
+ console.error("Async: Could not copy text: ", err);
57
+ }
58
+ );
59
+ });
60
+ });
61
+
62
+ // const copyButtons_2 = document.querySelectorAll(".prompt-text-wrapper");
63
+ // copyButtons_2.forEach(function (button) {
64
+ // button.addEventListener("click", function () {
65
+ // console.log("copy button clicked");
66
+ // const text =
67
+ // this.parentNode.parentNode.querySelector(
68
+ // ".prompt-text-p"
69
+ // ).textContent;
70
+ // navigator.clipboard.writeText(text).then(
71
+ // () => {
72
+ // console.log("Copied to clipboard:", text);
73
+ // this.classList.add("copied");
74
+ // setTimeout(() => {
75
+ // this.classList.remove("copied");
76
+ // }, 200);
77
+ // },
78
+ // function (err) {
79
+ // console.error("Async: Could not copy text: ", err);
80
+ // }
81
+ // );
82
+ // });
83
+ // });
static/src/copy-regular.svg ADDED
static/src/filter.svg ADDED
static/src/magnifying-glass-solid.svg ADDED
static/src/wand-magic-sparkles-solid.svg ADDED
templates/base_head.html ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+
7
+ <link
8
+ href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
9
+ rel="stylesheet"
10
+ integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD"
11
+ crossorigin="anonymous"
12
+ />
13
+ <script
14
+ src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"
15
+ integrity="sha384-/mhDoLbDldZc3qpsJHpLogda//BVZbgYuw6kof4u2FrCedxOtgRZDTHgHUhOCVim"
16
+ crossorigin="anonymous"
17
+ ></script>
18
+
19
+ <link
20
+ rel="stylesheet"
21
+ href="https://fonts.googleapis.com/css?family=Special+Elite%7CSilkscreen%7CDM+Serif+Display"
22
+ />
23
+ <link
24
+ rel="stylesheet"
25
+ href="https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.css"
26
+ />
27
+ <link
28
+ rel="stylesheet"
29
+ href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/4.1.1/animate.min.css"
30
+ />
31
+ <link
32
+ rel="icon"
33
+ href="static/src/wand-magic-sparkles-solid.svg"
34
+ type="image/x-icon"
35
+ />
36
+
37
+ <!--- Seo Part-->
38
+ <link rel="canonical" href="magic-prompt.net" />
39
+ <meta name="robots" content="index,follow" />
40
+ <meta name="googlebot" content="index,follow" />
41
+ <meta name="revisit-after" content="1 days" />
42
+
43
+ <meta property="og:type" content="website" />
44
+ <meta
45
+ property="og:title"
46
+ content="Magic Prompt - Explore the Best AI Image Generating Prompts"
47
+ />
48
+ <meta property="og:url" content="magic-prompt.net" />
49
+ <meta
50
+ property="og:site_name"
51
+ content="Magic Prompt - Explore the Best AI Image Generating Prompts"
52
+ />
53
+ <meta property="og:locale" content="en_US" />
54
+ <meta property="article:author" content="[email protected]" />
55
+ <meta name="twitter:card" content="summary" />
56
+ <meta
57
+ name="description"
58
+ content="Explore diverse AI image prompts on
59
+ our site. Search by style and preferences for personalized inspiration.
60
+ Find similar prompts to further tailor your creative journey! 在我们的网站上,您可以探索多样的AI图像生成提示。支持按风格搜索,根据喜好查找类似提示,为您的创意之旅定制个性化的灵感。"
61
+ />
62
+ <!-- baidu -->
63
+ <meta name="baidu-site-verification" content="codeva-2ptCyPDL5z" />
64
+
65
+ <!-- Google tag (gtag.js) -->
66
+ <script src="https://www.googletagmanager.com/gtag/js?id=G-FNS66H6NJY"></script>
67
+ <script>
68
+ window.dataLayer = window.dataLayer || [];
69
+ function gtag() {
70
+ dataLayer.push(arguments);
71
+ }
72
+ gtag("js", new Date());
73
+
74
+ gtag("config", "G-FNS66H6NJY");
75
+ </script>
76
+ <title>
77
+ Magic Prompt - Explore the best AI Image Generating Prompts
78
+ </title>
79
+ </head>
80
+ </html>
templates/index.html ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ {% include "base_head.html"%}
4
+
5
+ <head>
6
+ <link
7
+ rel="stylesheet"
8
+ href="static/css/style_search.css"
9
+ type="text/css"
10
+ />
11
+ <link
12
+ rel="stylesheet"
13
+ href="static/css/style_index.css"
14
+ type="text/css"
15
+ />
16
+ </head>
17
+
18
+ <body>
19
+ <nav>
20
+ <div id="nav-title"><a href="/">Magic Prompt</a></div>
21
+ </nav>
22
+
23
+ <main>
24
+ <div id="search-header">
25
+ <div id="introduce-header">
26
+ <h1>Explore the world's best AIGC prompts.</h1>
27
+ <p>Prefer. Search again. No end.</p>
28
+ </div>
29
+ <form action="/search_picture" method="post" enctype="multipart/form-data"><input type="file" id="img" name="img"><button type="submit">submit</button></form>
30
+
31
+
32
+ <div id="search-bar">
33
+ <form action="/search" method="POST" id="myForm">
34
+ <div id="input-wrapper-filter">
35
+ <div id="input-wrapper">
36
+ <img
37
+ src="static/src/magnifying-glass-solid.svg"
38
+ alt="search icon"
39
+ width="15"
40
+ height="15"
41
+ id="icon-magnifying-glass"
42
+ />
43
+ <input
44
+ type="text"
45
+ name="query"
46
+ placeholder="A mountain in spring"
47
+ value="{{query}}"
48
+ />
49
+ </div>
50
+ </div>
51
+ <!-- <input type="submit" value="submit" /> -->
52
+ </form>
53
+ </div>
54
+ </div>
55
+
56
+ <div class="results-wrapper">
57
+ {% for value in result %}
58
+ <div class="single-result-card">
59
+ <img src="{{ value[1] }}" alt="Image" />
60
+ <div class="prompt-text-wrapper">
61
+ <p class="prompt-text-p">{{ value[0] }}</p>
62
+ </div>
63
+ <form action="/search" method="POST">
64
+ <input
65
+ type="hidden"
66
+ name="query"
67
+ value="{{ value[0] }}"
68
+ />
69
+ <!-- <input type="hidden" name="query" value="{{ value[2] }}" /> -->
70
+ <!-- <input type="hidden" name="title" value="{{ value[0] }}" /> -->
71
+ <input type="submit" value="Find Similar" />
72
+ <div class="pick-this">
73
+ <span>Copy</span>
74
+ <svg
75
+ xmlns="http://www.w3.org/2000/svg"
76
+ height="16"
77
+ width="14"
78
+ viewBox="0 0 448 512"
79
+ class="icon-copy"
80
+ >
81
+ <path
82
+ d="M208 0H332.1c12.7 0 24.9 5.1 33.9 14.1l67.9 67.9c9 9 14.1 21.2 14.1 33.9V336c0 26.5-21.5 48-48 48H208c-26.5 0-48-21.5-48-48V48c0-26.5 21.5-48 48-48zM48 128h80v64H64V448H256V416h64v48c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V176c0-26.5 21.5-48 48-48z"
83
+ fill="white"
84
+ />
85
+ </svg>
86
+ </div>
87
+ </form>
88
+ </div>
89
+ {% endfor %}
90
+ </div>
91
+ </main>
92
+ <script src="https://cdn.bootcss.com/jquery/3.4.1/jquery.js"></script>
93
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.js"></script>
94
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.11/clipboard.min.js"></script>
95
+ <script src="static/js/script.js"></script>
96
+ </body>
97
+ </html>
templates/search.html ADDED
@@ -0,0 +1,857 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+
5
+ {% include "base_head.html"%}
6
+
7
+ <head>
8
+ <link
9
+ rel="stylesheet"
10
+ href="static/css/style_search.css"
11
+ type="text/css"
12
+ />
13
+ </head>
14
+
15
+ <body>
16
+ <nav>
17
+ <div id="nav-title"><a href="/">Magic Prompt</a></div>
18
+ </nav>
19
+
20
+ <main>
21
+ <div id="search-header">
22
+ {% if style or scene or medium or light or quality %}
23
+ <div id="search-bar" class="open">
24
+ {% else %}
25
+ <div id="search-bar">
26
+ {% endif %}
27
+ <form action="/search" method="POST" id="myForm">
28
+ <div id="input-wrapper-filter">
29
+ <div id="input-wrapper">
30
+ <img
31
+ src="static/src/magnifying-glass-solid.svg"
32
+ alt="search icon"
33
+ width="15"
34
+ height="15"
35
+ id="icon-magnifying-glass"
36
+ />
37
+ <input
38
+ type="text"
39
+ name="query"
40
+ placeholder="A mountain in spring"
41
+ value="{{query}}"
42
+ />
43
+ </div>
44
+ <img
45
+ src="static/src/filter.svg"
46
+ alt="search icon"
47
+ width="15"
48
+ height="15"
49
+ id="icon-filter"
50
+ />
51
+ </div>
52
+ <!-- <input type="submit" value="submit" /> -->
53
+
54
+ <div id="multiselect-wrapper">
55
+ <!-- multiselect for style-->
56
+ <div class="multiselect-single-wrapper">
57
+ <label for="style">Style</label>
58
+ <div id="multiselect-style">
59
+ <div
60
+ class="ui multiple selection dropdown"
61
+ id="style-select"
62
+ >
63
+ {% if style %}
64
+ <input
65
+ name="style"
66
+ type="hidden"
67
+ value="{{ style }}"
68
+ />
69
+ <i class="dropdown icon"></i>
70
+ {% for s in style.split(',') %}
71
+ <div
72
+ class="ui label transition visible"
73
+ data-value="{{ s }}"
74
+ style="
75
+ display: inline-block !important;
76
+ "
77
+ >
78
+ {{ s }}<i class="delete icon"></i>
79
+ </div>
80
+ {% endfor %} {% else %}
81
+ <input
82
+ name="style"
83
+ type="hidden"
84
+ value=""
85
+ />
86
+ <i class="dropdown icon"></i>
87
+ {% endif %}
88
+ <div class="default text">
89
+ Choose a style.
90
+ </div>
91
+ <div class="menu">
92
+ <div
93
+ class="item"
94
+ data-value="comics"
95
+ >
96
+ Comics
97
+ </div>
98
+ <div
99
+ class="item"
100
+ data-value="dreamworks"
101
+ >
102
+ Dreamworks
103
+ </div>
104
+ <div
105
+ class="item"
106
+ data-value="pixar"
107
+ >
108
+ Pixar
109
+ </div>
110
+ <div
111
+ class="item"
112
+ data-value="realism"
113
+ >
114
+ Realism
115
+ </div>
116
+ <div class="item" data-value="90s">
117
+ 90s
118
+ </div>
119
+ <div
120
+ class="item"
121
+ data-value="bauhaus"
122
+ >
123
+ Bauhaus
124
+ </div>
125
+ <div
126
+ class="item"
127
+ data-value="country"
128
+ >
129
+ Country
130
+ </div>
131
+ <div
132
+ class="item"
133
+ data-value="steampunk"
134
+ >
135
+ Steampunk
136
+ </div>
137
+ <div
138
+ class="item"
139
+ data-value="gothic"
140
+ >
141
+ Gothic
142
+ </div>
143
+ <div
144
+ class="item"
145
+ data-value="baroque"
146
+ >
147
+ Baroque
148
+ </div>
149
+ <div
150
+ class="item"
151
+ data-value="fauvism"
152
+ >
153
+ Fauvism
154
+ </div>
155
+ <div
156
+ class="item"
157
+ data-value="surrealism"
158
+ >
159
+ Surrealism
160
+ </div>
161
+ <div
162
+ class="item"
163
+ data-value="optical"
164
+ >
165
+ Optical
166
+ </div>
167
+ <div
168
+ class="item"
169
+ data-value="pokemon"
170
+ >
171
+ Pokemon
172
+ </div>
173
+ <div class="item" data-value="botw">
174
+ BotW
175
+ </div>
176
+ <div
177
+ class="item"
178
+ data-value="ghibli"
179
+ >
180
+ Ghibli
181
+ </div>
182
+ <div
183
+ class="item"
184
+ data-value="miyazaki"
185
+ >
186
+ Miyazaki
187
+ </div>
188
+ <div
189
+ class="item"
190
+ data-value="doodle"
191
+ >
192
+ Doodle
193
+ </div>
194
+ <div
195
+ class="item"
196
+ data-value="concept"
197
+ >
198
+ Concept
199
+ </div>
200
+ <div
201
+ class="item"
202
+ data-value="portrait"
203
+ >
204
+ Portrait
205
+ </div>
206
+ <div
207
+ class="item"
208
+ data-value="sketching"
209
+ >
210
+ Sketching
211
+ </div>
212
+ <div
213
+ class="item"
214
+ data-value="cinematic"
215
+ >
216
+ Cinematic
217
+ </div>
218
+ <div
219
+ class="item"
220
+ data-value="hyperrealism"
221
+ >
222
+ Hyperrealism
223
+ </div>
224
+ <div
225
+ class="item"
226
+ data-value="sophisticated"
227
+ >
228
+ Sophisticated
229
+ </div>
230
+ <div class="item" data-value="3d">
231
+ 3D
232
+ </div>
233
+ <div
234
+ class="item"
235
+ data-value="voynich-manuscript"
236
+ >
237
+ Voynich Manuscript
238
+ </div>
239
+ <div
240
+ class="item"
241
+ data-value="partial-anatomy"
242
+ >
243
+ Partial Anatomy
244
+ </div>
245
+ <div
246
+ class="item"
247
+ data-value="quilted"
248
+ >
249
+ Quilted
250
+ </div>
251
+ <div
252
+ class="item"
253
+ data-value="digital"
254
+ >
255
+ Digital
256
+ </div>
257
+ <div
258
+ class="item"
259
+ data-value="video"
260
+ >
261
+ Video
262
+ </div>
263
+ <div class="item" data-value="game">
264
+ Game
265
+ </div>
266
+ <div
267
+ class="item"
268
+ data-value="photo"
269
+ >
270
+ Photo
271
+ </div>
272
+ <div
273
+ class="item"
274
+ data-value="cartoon"
275
+ >
276
+ Cartoon
277
+ </div>
278
+ <div
279
+ class="item"
280
+ data-value="painting"
281
+ >
282
+ Painting
283
+ </div>
284
+ <div
285
+ class="item"
286
+ data-value="chinese"
287
+ >
288
+ Chinese
289
+ </div>
290
+ <div
291
+ class="item"
292
+ data-value="cyberpunk"
293
+ >
294
+ Cyberpunk
295
+ </div>
296
+ <div
297
+ class="item"
298
+ data-value="fashion"
299
+ >
300
+ Fashion
301
+ </div>
302
+ <div
303
+ class="item"
304
+ data-value="design"
305
+ >
306
+ Design
307
+ </div>
308
+ <div
309
+ class="item"
310
+ data-value="modern"
311
+ >
312
+ Modern
313
+ </div>
314
+ </div>
315
+ </div>
316
+ </div>
317
+ </div>
318
+
319
+ <!-- multiselect for medium-->
320
+ <div class="multiselect-single-wrapper">
321
+ <label for="medium">Medium</label>
322
+ <div id="multiselect-medium">
323
+ <div
324
+ class="ui multiple selection dropdown"
325
+ id="medium-select"
326
+ >
327
+ {% if medium %}
328
+ <input
329
+ name="medium"
330
+ type="hidden"
331
+ value="{{ medium }}"
332
+ />
333
+ <i class="dropdown icon"></i>
334
+ {% for m in medium.split(',') %}
335
+ <div
336
+ class="ui label transition visible"
337
+ data-value="{{ m }}"
338
+ style="
339
+ display: inline-block !important;
340
+ "
341
+ >
342
+ {{ m }}<i class="delete icon"></i>
343
+ </div>
344
+ {% endfor %} {% else %}
345
+ <input
346
+ name="medium"
347
+ type="hidden"
348
+ value=""
349
+ />
350
+ <i class="dropdown icon"></i>
351
+ {% endif %}
352
+ <div class="default text">
353
+ Choose a medium.
354
+ </div>
355
+ <div class="menu">
356
+ <div
357
+ class="item"
358
+ data-value="illustrations"
359
+ >
360
+ Illustrations
361
+ </div>
362
+ <div
363
+ class="item"
364
+ data-value="vector"
365
+ >
366
+ Vector
367
+ </div>
368
+ <div class="item" data-value="oil">
369
+ Oil
370
+ </div>
371
+ <div
372
+ class="item"
373
+ data-value="photography"
374
+ >
375
+ Photography
376
+ </div>
377
+ <div
378
+ class="item"
379
+ data-value="watercolor"
380
+ >
381
+ Watercolor
382
+ </div>
383
+ <div
384
+ class="item"
385
+ data-value="sketch"
386
+ >
387
+ Sketch
388
+ </div>
389
+ <div class="item" data-value="ink">
390
+ Ink
391
+ </div>
392
+ <div
393
+ class="item"
394
+ data-value="sculpture"
395
+ >
396
+ Sculpture
397
+ </div>
398
+ <div
399
+ class="item"
400
+ data-value="manuscript"
401
+ >
402
+ Manuscript
403
+ </div>
404
+ <div
405
+ class="item"
406
+ data-value="painting"
407
+ >
408
+ Painting
409
+ </div>
410
+ <div
411
+ class="item"
412
+ data-value="drawing"
413
+ >
414
+ Drawing
415
+ </div>
416
+ </div>
417
+ </div>
418
+ </div>
419
+ </div>
420
+
421
+ <!-- multiselect for light-->
422
+ <div class="multiselect-single-wrapper">
423
+ <label for="light">Light</label>
424
+ <div id="multiselect-light">
425
+ <div
426
+ class="ui multiple selection dropdown"
427
+ id="light-select"
428
+ >
429
+ {% if light %}
430
+ <input
431
+ name="light"
432
+ type="hidden"
433
+ value="{{ light }}"
434
+ />
435
+ <i class="dropdown icon"></i>
436
+ {% for l in light.split(',') %}
437
+ <div
438
+ class="ui label transition visible"
439
+ data-value="{{ l }}"
440
+ style="
441
+ display: inline-block !important;
442
+ "
443
+ >
444
+ {{ l }}<i class="delete icon"></i>
445
+ </div>
446
+ {% endfor %} {% else %}
447
+ <input
448
+ name="light"
449
+ type="hidden"
450
+ value=""
451
+ />
452
+ <i class="dropdown icon"></i>
453
+ {% endif %}
454
+ <div class="default text">
455
+ Choose a light.
456
+ </div>
457
+ <div class="menu">
458
+ <div
459
+ class="item"
460
+ data-value="moody"
461
+ >
462
+ Moody
463
+ </div>
464
+ <div
465
+ class="item"
466
+ data-value="studio"
467
+ >
468
+ Studio
469
+ </div>
470
+ <div class="item" data-value="cove">
471
+ Cove
472
+ </div>
473
+ <div class="item" data-value="soft">
474
+ Soft
475
+ </div>
476
+ <div class="item" data-value="hard">
477
+ Hard
478
+ </div>
479
+ <div
480
+ class="item"
481
+ data-value="volumetric"
482
+ >
483
+ Volumetric
484
+ </div>
485
+ <div
486
+ class="item"
487
+ data-value="low-key"
488
+ >
489
+ Low-key
490
+ </div>
491
+ <div
492
+ class="item"
493
+ data-value="high-key"
494
+ >
495
+ High-key
496
+ </div>
497
+ <div class="item" data-value="epic">
498
+ Epic
499
+ </div>
500
+ <div
501
+ class="item"
502
+ data-value="rembrandt"
503
+ >
504
+ Rembrandt
505
+ </div>
506
+ <div
507
+ class="item"
508
+ data-value="contre-jour"
509
+ >
510
+ Contre-jour
511
+ </div>
512
+ <div
513
+ class="item"
514
+ data-value="veiling-flare"
515
+ >
516
+ Veiling Flare
517
+ </div>
518
+ <div
519
+ class="item"
520
+ data-value="crepuscular"
521
+ >
522
+ Crepuscular
523
+ </div>
524
+ <div
525
+ class="item"
526
+ data-value="rays-of-shimmering"
527
+ >
528
+ Rays of Shimmering
529
+ </div>
530
+ <div
531
+ class="item"
532
+ data-value="godrays"
533
+ >
534
+ Godrays
535
+ </div>
536
+ </div>
537
+ </div>
538
+ </div>
539
+ </div>
540
+
541
+ <!-- multiselect for quality-->
542
+ <div class="multiselect-single-wrapper">
543
+ <label for="quality">Quality</label>
544
+ <div id="multiselect-quality">
545
+ <div
546
+ class="ui multiple selection dropdown"
547
+ id="quality-select"
548
+ >
549
+ {% if quality %}
550
+ <input
551
+ name="quality"
552
+ type="hidden"
553
+ value="{{ quality }}"
554
+ />
555
+ <i class="dropdown icon"></i>
556
+ {% for q in quality.split(',') %}
557
+ <div
558
+ class="ui label transition visible"
559
+ data-value="{{ q }}"
560
+ style="
561
+ display: inline-block !important;
562
+ "
563
+ >
564
+ {{ q }}<i class="delete icon"></i>
565
+ </div>
566
+ {% endfor %} {% else %}
567
+ <input
568
+ name="quality"
569
+ type="hidden"
570
+ value=""
571
+ />
572
+ <i class="dropdown icon"></i>
573
+ {% endif %}
574
+ <div class="default text">
575
+ Choose a quality.
576
+ </div>
577
+ <div class="menu">
578
+ <div class="item" data-value="16k">
579
+ 16K
580
+ </div>
581
+ <div class="item" data-value="8k">
582
+ 8K
583
+ </div>
584
+ <div class="item" data-value="4k">
585
+ 4K
586
+ </div>
587
+ <div class="item" data-value="2k">
588
+ 2K
589
+ </div>
590
+ <div
591
+ class="item"
592
+ data-value="high-resolution"
593
+ >
594
+ High Resolution
595
+ </div>
596
+ <div
597
+ class="item"
598
+ data-value="low-resolution"
599
+ >
600
+ Low Resolution
601
+ </div>
602
+ <div
603
+ class="item"
604
+ data-value="high-quality"
605
+ >
606
+ High Quality
607
+ </div>
608
+ <div
609
+ class="item"
610
+ data-value="low-quality"
611
+ >
612
+ Low Quality
613
+ </div>
614
+ </div>
615
+ </div>
616
+ </div>
617
+ </div>
618
+
619
+ <!-- multiselect for scene-->
620
+ <div class="multiselect-single-wrapper">
621
+ <label for="scene">Scene</label>
622
+ <div id="multiselect-scene">
623
+ <div
624
+ class="ui multiple selection dropdown"
625
+ id="scene-select"
626
+ >
627
+ {% if scene %}
628
+ <input
629
+ name="scene"
630
+ type="hidden"
631
+ value="{{ scene }}"
632
+ />
633
+ <i class="dropdown icon"></i>
634
+ {% for s in scene.split(',') %}
635
+ <div
636
+ class="ui label transition visible"
637
+ data-value="{{ s }}"
638
+ style="
639
+ display: inline-block !important;
640
+ "
641
+ >
642
+ {{ s }}<i class="delete icon"></i>
643
+ </div>
644
+ {% endfor %} {% else %}
645
+ <input
646
+ name="scene"
647
+ type="hidden"
648
+ value=""
649
+ />
650
+ <i class="dropdown icon"></i>
651
+ {% endif %}
652
+ <div class="default text">
653
+ Choose a scene.
654
+ </div>
655
+ <div class="menu">
656
+ <div
657
+ class="item"
658
+ data-value="utopia"
659
+ >
660
+ Utopia
661
+ </div>
662
+ <div
663
+ class="item"
664
+ data-value="fantasy"
665
+ >
666
+ Fantasy
667
+ </div>
668
+ <div
669
+ class="item"
670
+ data-value="whimsically"
671
+ >
672
+ Whimsically
673
+ </div>
674
+ <div
675
+ class="item"
676
+ data-value="deserted"
677
+ >
678
+ Deserted
679
+ </div>
680
+ <div class="item" data-value="city">
681
+ City
682
+ </div>
683
+ <div
684
+ class="item"
685
+ data-value="street"
686
+ >
687
+ Street
688
+ </div>
689
+ <div
690
+ class="item"
691
+ data-value="universe"
692
+ >
693
+ Universe
694
+ </div>
695
+ <div
696
+ class="item"
697
+ data-value="morning"
698
+ >
699
+ Morning
700
+ </div>
701
+ <div
702
+ class="item"
703
+ data-value="sunlight"
704
+ >
705
+ Sunlight
706
+ </div>
707
+ <div
708
+ class="item"
709
+ data-value="galaxy"
710
+ >
711
+ Galaxy
712
+ </div>
713
+ <div
714
+ class="item"
715
+ data-value="dungeon"
716
+ >
717
+ Dungeon
718
+ </div>
719
+ <div
720
+ class="item"
721
+ data-value="nebula"
722
+ >
723
+ Nebula
724
+ </div>
725
+ <div
726
+ class="item"
727
+ data-value="garden"
728
+ >
729
+ Garden
730
+ </div>
731
+ <div
732
+ class="item"
733
+ data-value="babylon"
734
+ >
735
+ Babylon
736
+ </div>
737
+ <div
738
+ class="item"
739
+ data-value="meadow"
740
+ >
741
+ Meadow
742
+ </div>
743
+ <div
744
+ class="item"
745
+ data-value="nature"
746
+ >
747
+ Nature
748
+ </div>
749
+ <div
750
+ class="item"
751
+ data-value="apocalyptic"
752
+ >
753
+ Apocalyptic
754
+ </div>
755
+ <div
756
+ class="item"
757
+ data-value="castle"
758
+ >
759
+ Castle
760
+ </div>
761
+ <div
762
+ class="item"
763
+ data-value="forest"
764
+ >
765
+ Forest
766
+ </div>
767
+ <div
768
+ class="item"
769
+ data-value="ruins"
770
+ >
771
+ Ruins
772
+ </div>
773
+ <div
774
+ class="item"
775
+ data-value="classroom"
776
+ >
777
+ Classroom
778
+ </div>
779
+ <div class="item" data-value="city">
780
+ City
781
+ </div>
782
+ <div class="item" data-value="rain">
783
+ Rain
784
+ </div>
785
+ <div class="item" data-value="sky">
786
+ Sky
787
+ </div>
788
+ <div
789
+ class="item"
790
+ data-value="geography"
791
+ >
792
+ Geography
793
+ </div>
794
+ <div
795
+ class="item"
796
+ data-value="architectural"
797
+ >
798
+ Architectural
799
+ </div>
800
+ </div>
801
+ </div>
802
+ </div>
803
+ </div>
804
+ </div>
805
+ </form>
806
+ </div>
807
+
808
+ <!-- <h1>Discover better prompts about "{{query}}"</h1> -->
809
+ </div>
810
+
811
+ <div class="results-wrapper">
812
+ {% if not result %}
813
+ <p id="no-results">
814
+ Sorry, no results. Try adjusting your selected filters to
815
+ see different results.
816
+ </p>
817
+ {% endif %} {% for value in result %}
818
+ <div class="single-result-card">
819
+ <img src="{{ value[1] }}" alt="Image" />
820
+ <div class="prompt-text-wrapper">
821
+ <p class="prompt-text-p">{{ value[0] }}</p>
822
+ </div>
823
+ <form action="/search" method="POST">
824
+ <input
825
+ type="hidden"
826
+ name="query"
827
+ value="{{ value[0] }}"
828
+ />
829
+ <!-- <input type="hidden" name="query" value="{{ value[2] }}" /> -->
830
+ <!-- <input type="hidden" name="title" value="{{ value[0] }}" /> -->
831
+ <input type="submit" value="Find Similar" />
832
+ <div class="pick-this">
833
+ <span>Copy</span>
834
+ <svg
835
+ xmlns="http://www.w3.org/2000/svg"
836
+ height="16"
837
+ width="14"
838
+ viewBox="0 0 448 512"
839
+ class="icon-copy"
840
+ >
841
+ <path
842
+ d="M208 0H332.1c12.7 0 24.9 5.1 33.9 14.1l67.9 67.9c9 9 14.1 21.2 14.1 33.9V336c0 26.5-21.5 48-48 48H208c-26.5 0-48-21.5-48-48V48c0-26.5 21.5-48 48-48zM48 128h80v64H64V448H256V416h64v48c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V176c0-26.5 21.5-48 48-48z"
843
+ fill="white"
844
+ />
845
+ </svg>
846
+ </div>
847
+ </form>
848
+ </div>
849
+ {% endfor %}
850
+ </div>
851
+ </main>
852
+ <script src="https://cdn.bootcss.com/jquery/3.4.1/jquery.js"></script>
853
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.js"></script>
854
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.11/clipboard.min.js"></script>
855
+ <script src="static/js/script.js"></script>
856
+ </body>
857
+ </html>