sartajbhuvaji commited on
Commit
8dba7d9
1 Parent(s): 50e8cce

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -0
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - sartajbhuvaji/gutenberg
5
+ base_model:
6
+ - google-bert/bert-base-uncased
7
+ pipeline_tag: text-classification
8
+ tags:
9
+ - classification
10
+ ---
11
+
12
+ ```python
13
+ from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
14
+ from datasets import load_dataset
15
+ from transformers import pipeline
16
+ import pandas as pd
17
+
18
+ model = BertForSequenceClassification.from_pretrained("sartajbhuvaji/gutenberg-bert-base-uncased")
19
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
20
+
21
+ # Create a text classification pipeline
22
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device='cuda')
23
+
24
+ # Test the pipeline
25
+ result = classifier("This is a great book!")
26
+ print(result) #[{'label': 'LABEL_8', 'score': 0.2576160430908203}]
27
+
28
+ # Test the pipeline on a document
29
+ dataset = load_dataset("sartajbhuvaji/gutenberg", split="100")
30
+ df = dataset.to_pandas()
31
+
32
+ doc_id = 1
33
+ doc_text = df.loc[df['DocID'] == doc_id, 'Text'].values[0]
34
+
35
+ result = classifier(doc_text[:512]) # Truncate to 512 tokens
36
+ print(result) # [{'label': 'LABEL_2', 'score': 0.28877997398376465}]
37
+ ```