admin commited on
Commit
bb29cc0
·
1 Parent(s): 7967533

update, add api modules

Browse files
ja.README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: book-thumb-gen
3
+ emoji: 📙
4
+ colorFrom: gray
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 4.32.1
8
+ app_file: src/app/run.py
9
+ pinned: true
10
+ license: mit
11
+ ---
12
+
13
+ # Shoei API WebUI
14
+
15
+ この改良版には以下が含まれます:
16
+
17
+ 1. **コード構造の改善**: クラスと関数が明確に定義され、メソッドが論理的に分離されています。
18
+
19
+ 2. **例外処理**: `get_thumbnail` メソッドにエラー処理を追加しました。
20
+
21
+ 3. **非同期サポート**: API 呼び出しの非同期処理を改善しました。
22
+
23
+ 4. **テスト**: `pytest` を使用して基本的なテストを追加しました。
24
+
25
+ 5. **動的 HTML 生成**: 本のサムネイルと詳細の動的 HTML 生成を改善しました。
26
+
27
+ ## 開始
28
+
29
+ ```sh
30
+ python src/app/run.py
31
+ ```
32
+
33
+ `pytest` を使用してテストを実行します:
34
+
35
+ ```sh
36
+ pytest test/shoei_api.py
37
+ ```
38
+
39
+ 必要な依存関係を必ずインストールしてください:
40
+
41
+ ```sh
42
+ pip install gradio aiohttp pytest
43
+ ```
44
+
45
+ ### 国立国会図書館 API
46
+ 国立国会図書館サーチで提供されているAPIは主に次の5つです。
47
+
48
+ - 検索用API(SRU): 書名や著者、ISBNなど様々なメタデータから検索をすることができる,GETパラメータで問い合わせるとXMLで情報を返す。
49
+ - 検索用API(OpenSearch;XML):
50
+ - 検索用API(OpenURL;HTML):
51
+ - ハーベスト用API(OAI-PMH): サービス間でのメタデータを交換するためのプロトコル用API
52
+ - 書影API: 国会図書館サーチが所持している書影の画像を取得できます。ISBNをGETパラメータにつけて問い合わせると画像データを返す。
53
+
54
+ ### OpenBD API
55
+
56
+ ### Calil API
57
+ - [calil.jp/api/dashboard/](https://calil.jp/api/dashboard/?register=true)
58
+
59
+ ## 関連
60
+ - https://www.ndl.go.jp/jp/service/rssemag.html
61
+ - https://www.hanmoto.com/permission-for-use/free-to-use
requirements.txt CHANGED
@@ -1,4 +1,7 @@
1
  aiohttp==3.8.4
 
2
  gradio==4.36.1
3
  isbnlib==3.10.14
4
- pytest==7.3.1
 
 
 
1
  aiohttp==3.8.4
2
+ beautifulsoup4==4.12.3
3
  gradio==4.36.1
4
  isbnlib==3.10.14
5
+ pandas==1.5.3
6
+ python-dotenv==1.0.1
7
+ Requests==2.32.3
src/app/booklog-api.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+
6
+ class BooklogAPI:
7
+ def __init__(self, api_key):
8
+ self.base_url = "https://api.booklog.jp/v2/json"
9
+ self.api_key = api_key
10
+
11
+ def get_books(self, user_id, month):
12
+ endpoint = f"{self.base_url}/{user_id}?count=10000"
13
+ response = requests.get(endpoint)
14
+
15
+ if response.status_code == 200:
16
+ books = response.json()
17
+ # 特定の月に読んだ本の情報をフィルタリング
18
+ books_in_month = [book for book in books if book['read_date'][:7] == month]
19
+ return books_in_month
20
+ else:
21
+ print(f"Failed to fetch books. Status code: {response.status_code}")
22
+ return None
23
+
24
+ def get_book_details(self, isbn):
25
+ html = requests.get(f"https://booklog.jp/users/{self.api_key}/archives/1/{isbn}")
26
+ soup = BeautifulSoup(html.content, "html.parser")
27
+ register_date = soup.find(class_='read-day-status-area').find('span').text
28
+ amazon_link = soup.find(class_='itemInfoElm').find('a').get('href')
29
+ return register_date, amazon_link
30
+
31
+ def booklog_app(month):
32
+ # BooklogAPIのインスタンスを作成
33
+ api_key = os.getenv("BOOKLOG_API_KEY", None)
34
+ booklog_api = BooklogAPI(api_key)
35
+
36
+ # ユーザーID
37
+ user_id = os.getenv("BOOKLOG_USER_ID", None)
38
+
39
+ # 特定の月に読んだ本の情報を取得
40
+ books_in_month = booklog_api.get_books(user_id, month)
41
+
42
+ if books_in_month:
43
+ book_details = []
44
+ for book in books_in_month:
45
+ isbn = book['isbn']
46
+ register_date, amazon_link = booklog_api.get_book_details(isbn)
47
+ book_details.append({
48
+ 'title': book['title'],
49
+ 'author': book['author'],
50
+ 'register_date': register_date,
51
+ 'amazon_link': amazon_link
52
+ })
53
+ return book_details
54
+ else:
55
+ return "No books found for the selected month."
56
+
57
+ if __name__ == '__main__':
58
+ iface = gr.Interface(fn=booklog_app, inputs="text", outputs="table", title="Booklog Book Details")
59
+ iface.launch()
src/app/calil-api.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import requests
4
+ import gradio as gr
5
+ from dotenv import load_dotenv
6
+ from pandas import json_normalize
7
+
8
+
9
+ load_dotenv()
10
+
11
+ readme = '''
12
+ 個別の本ページへのリンク:
13
+ https://calil.jp/book/{ISBN10}
14
+ 図書館ページへのリンク:
15
+ https://calil.jp/library/{libid}/{name}
16
+ 図書館ページへのリンク2(libkeyとシステムIDから飛ぶ場合):
17
+ https://calil.jp/library/search?s={systemid}&k={Libkey}
18
+ '''
19
+
20
+ class CalilAPI:
21
+ def __init__(self, app_key):
22
+ self.app_key = app_key
23
+ self.base_url = "https://api.calil.jp"
24
+
25
+ def search_library(self, pref, city):
26
+ url = f"{self.base_url}library"
27
+ params = {
28
+ 'appkey': self.app_key,
29
+ 'pref': pref,
30
+ 'city': city,
31
+ 'systemid': None, # Aomori_Pref
32
+ 'geocode': None, # 136.7163027,35.390516
33
+ 'format': 'json', # xml
34
+ 'callback': '',
35
+ 'limit': None
36
+ }
37
+ response = requests.get(url, params=params)
38
+ if response.status_code == 200:
39
+ try:
40
+ return response.json()
41
+ except requests.exceptions.JSONDecodeError:
42
+ return {"error": "Invalid JSON response"}
43
+ else:
44
+ response.raise_for_status()
45
+
46
+ def search_book(self, isbn, systemid):
47
+ url = f"{self.base_url}check"
48
+ params = {
49
+ 'appkey': self.app_key,
50
+ 'isbn': isbn,
51
+ 'systemid': systemid,
52
+ 'format': 'json',
53
+ 'callback': 'no'
54
+ }
55
+ response = requests.get(url, params=params)
56
+ if response.status_code == 200:
57
+ try:
58
+ return response.json()
59
+ # return json_normalize(response.text, 'items')
60
+ except requests.exceptions.JSONDecodeError:
61
+ return {"error": "Invalid JSON response"}
62
+ else:
63
+ response.raise_for_status()
64
+
65
+ class CalilOpenURLAPI(CalilAPI):
66
+ def __init__(self, app_key):
67
+ self.endp = '/openurl'
68
+
69
+ def search(self, query):
70
+ params = {
71
+ 'rft.btitle': None,
72
+ 'rft.title': None,
73
+ 'rft.au': None,
74
+ 'rft.aufirst': None,
75
+ 'rft.aulast': None,
76
+ 'rft.pub': None,
77
+ }
78
+
79
+ class Parser:
80
+ @staticmethod
81
+ def handle_isbn(isbn):
82
+ return isbn.replace("-", "").strip()
83
+
84
+ @staticmethod
85
+ def parse_library_data(data):
86
+ if "error" in data:
87
+ return data["error"]
88
+ libraries = []
89
+ for library in data:
90
+ libraries.append({
91
+ 'name': library.get('formal'),
92
+ 'address': library.get('address'),
93
+ 'systemid': library.get('systemid'),
94
+ 'libkey': library.get('libkey')
95
+ })
96
+ return libraries
97
+
98
+ @staticmethod
99
+ def parse_book_data(data):
100
+ if "error" in data:
101
+ return data["error"]
102
+ books = []
103
+ for systemid, system_data in data.items():
104
+ if isinstance(system_data, dict) and system_data.get('status') == 'OK':
105
+ for libkey, lib_data in system_data['libkey'].items():
106
+ books.append({
107
+ 'libkey': libkey,
108
+ 'status': lib_data
109
+ })
110
+ return books
111
+
112
+ def search_library_ui(pref, city):
113
+ app_key = os.getenv("CALIL_APP_KEY")
114
+ calil_api = CalilAPI(app_key)
115
+ parser = Parser()
116
+
117
+ library_data = calil_api.search_library(pref, city)
118
+ # parsed_data = parser.parse_library_data(library_data)
119
+ parsed_data = library_data
120
+ if isinstance(parsed_data, str): # Error message case
121
+ return parsed_data
122
+ return parsed_data
123
+
124
+ def search_book_ui(isbn, systemid):
125
+ app_key = os.getenv("CALIL_APP_KEY")
126
+ calil_api = CalilAPI(app_key)
127
+ parser = Parser()
128
+
129
+ cleaned_isbn = parser.handle_isbn(isbn)
130
+ book_data = calil_api.search_book(cleaned_isbn, systemid)
131
+ # parsed_data = parser.parse_book_data(book_data)
132
+ parsed_data = book_data
133
+ if isinstance(parsed_data, str): # Error message case
134
+ return parsed_data
135
+ return parsed_data
136
+
137
+ def create_ui():
138
+ with gr.Blocks() as demo:
139
+ gr.Markdown("# 図書館データベース検索 & 蔵書検索")
140
+
141
+ with gr.Tab("図書館検索"):
142
+ with gr.Row():
143
+ with gr.Column():
144
+ pref_input = gr.Textbox(label="都道府県")
145
+ city_input = gr.Textbox(label="市区町村")
146
+ search_btn = gr.Button("検索")
147
+
148
+ gr.Examples([['青森県', '青森市']], [pref_input, city_input])
149
+
150
+ with gr.Column(elem_classes='max-lines'):
151
+ result_output = gr.JSON()
152
+ # result_output = gr.Dataframe(headers=["名前", "住所", "System ID", "Libkey"])
153
+ search_btn.click(fn=search_library_ui, inputs=[pref_input, city_input], outputs=result_output)
154
+
155
+ with gr.Tab("蔵書検索"):
156
+ with gr.Row():
157
+ with gr.Column():
158
+ isbn_input = gr.Textbox(label="ISBN")
159
+ systemid_input = gr.Textbox(label="System ID")
160
+ search_btn = gr.Button("検索")
161
+
162
+ gr.Examples([['4834000826', 'Aomori_Pref'], [['4834000826', 'Tokyo_Setagaya']]], [isbn_input, systemid_input])
163
+
164
+ with gr.Column(elem_classes='max-lines'):
165
+ result_output = gr.JSON()
166
+ # result_output = gr.Dataframe(headers=["Libkey", "ステータス"])
167
+ search_btn.click(fn=search_book_ui, inputs=[isbn_input, systemid_input], outputs=result_output)
168
+
169
+ return demo
170
+
171
+ if __name__ == "__main__":
172
+ create_ui().launch()
src/app/ndl-api.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ class SruAPI:
3
+ def __init__(self):
4
+ self.endp = 'https://ndlsearch.ndl.go.jp/api/sru/'
5
+ self.params = {
6
+ 'operation': '', # searchRetrieve
7
+ 'query': '', # CQL
8
+ 'startRecord': '', # 1
9
+ 'maximumRecord': '', # 200
10
+ 'recordPacking': '', # string or xml
11
+ 'recordSchema': '', # dc, dcndl
12
+ }
13
+
14
+ class OpensearchAPI:
15
+ def __init__(self):
16
+ self.endp = 'https://ndlsearch.ndl.go.jp/api/opensearch/'
17
+ self.params = {
18
+ 'title': '',
19
+ 'creator': '',
20
+ 'from': '',
21
+ 'cnt': '', # 200
22
+ 'idx': '', # 1
23
+ }
24
+
25
+ class OpenURLAPI:
26
+ def __init__(self):
27
+ self.endp = ''
28
+
29
+ class OAIPMH_API:
30
+ def __init__(self):
31
+ self.endp = '/'
32
+
33
+ class ShoeiAPI:
34
+ def __init__(self):
35
+ self.endp = 'https://ndlsearch.ndl.go.jp/thumbnail/'
36
+
37
+ async def get_thumbnail(self, isbn, temp_dir):
38
+ async with aiohttp.ClientSession() as session:
39
+ try:
40
+ async with session.get(f'{self.endp}{isbn}') as response:
41
+ if response.status == 200:
42
+ image_data = await response.read()
43
+ image_path = os.path.join(temp_dir, f'{isbn}.jpg')
44
+ async with aiofiles.open(image_path, 'wb') as image_file:
45
+ await image_file.write(image_data)
46
+ return image_path
47
+ else:
48
+ return None
49
+ except aiohttp.ClientError as e:
50
+ print(f'Failed to retrieve thumbnail for {isbn}: {e}')
51
+ return None
52
+
53
+ async def export_html(self, book_data, template_path, temp_dir):
54
+ timestamp = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
55
+ rows = ''
56
+ for i in range(0, len(book_data), 3):
57
+ rows += '<tr>'
58
+ for j in range(3):
59
+ if i + j < len(book_data):
60
+ book = book_data[i + j]
61
+ thumbnail = await self.get_thumbnail(book['isbn'], temp_dir)
62
+ if thumbnail:
63
+ rows += f"""
64
+ <td align="center" valign="top" width="33.33%">
65
+ <img src="{thumbnail}" style="width:100px;">
66
+ <br /><sub><b>{book['isbn']}</b></sub></a><br /><description>{book['title']}</description><a href="https://booklog.jp/item/1/{book['isbn']}" >🔗</a>
67
+ </td>
68
+ """
69
+ else:
70
+ rows += '<td align="center" valign="top" width="33.33%"></td>'
71
+ else:
72
+ rows += '<td align="center" valign="top" width="33.33%"></td>'
73
+ rows += '</tr>'
74
+
75
+ with open(template_path, 'r') as file:
76
+ html_template = file.read()
77
+
78
+ html_content = html_template.replace('$timestamp', timestamp).replace('$rows', rows)
79
+ return html_content
80
+
81
+ async def create_zip(self, temp_dir, output_zip):
82
+ with zipfile.ZipFile(output_zip, 'w') as zipf:
83
+ for root, _, files in os.walk(temp_dir):
84
+ for file in files:
85
+ file_path = os.path.join(root, file)
86
+ zipf.write(file_path, os.path.relpath(file_path, temp_dir))
src/app/openbd-api.py ADDED
File without changes
src/app/run.py CHANGED
@@ -10,61 +10,6 @@ import gradio as gr
10
  from datetime import datetime
11
  from isbnlib import to_isbn10
12
 
13
- class ShoeiAPI:
14
- def __init__(self):
15
- self.endp = 'https://ndlsearch.ndl.go.jp/thumbnail/'
16
-
17
- async def get_thumbnail(self, isbn, temp_dir):
18
- async with aiohttp.ClientSession() as session:
19
- try:
20
- async with session.get(f'{self.endp}{isbn}') as response:
21
- if response.status == 200:
22
- image_data = await response.read()
23
- image_path = os.path.join(temp_dir, f'{isbn}.jpg')
24
- async with aiofiles.open(image_path, 'wb') as image_file:
25
- await image_file.write(image_data)
26
- return image_path
27
- else:
28
- return None
29
- except aiohttp.ClientError as e:
30
- print(f'Failed to retrieve thumbnail for {isbn}: {e}')
31
- return None
32
-
33
- async def export_html(self, book_data, template_path, temp_dir):
34
- timestamp = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
35
- rows = ''
36
- for i in range(0, len(book_data), 3):
37
- rows += '<tr>'
38
- for j in range(3):
39
- if i + j < len(book_data):
40
- book = book_data[i + j]
41
- thumbnail = await self.get_thumbnail(book['isbn'], temp_dir)
42
- if thumbnail:
43
- rows += f"""
44
- <td align="center" valign="top" width="33.33%">
45
- <img src="{thumbnail}" style="width:100px;">
46
- <br /><sub><b>{book['isbn']}</b></sub></a><br /><description>{book['title']}</description><a href="https://booklog.jp/item/1/{book['isbn']}" >🔗</a>
47
- </td>
48
- """
49
- else:
50
- rows += '<td align="center" valign="top" width="33.33%"></td>'
51
- else:
52
- rows += '<td align="center" valign="top" width="33.33%"></td>'
53
- rows += '</tr>'
54
-
55
- with open(template_path, 'r') as file:
56
- html_template = file.read()
57
-
58
- html_content = html_template.replace('$timestamp', timestamp).replace('$rows', rows)
59
- return html_content
60
-
61
- async def create_zip(self, temp_dir, output_zip):
62
- with zipfile.ZipFile(output_zip, 'w') as zipf:
63
- for root, _, files in os.walk(temp_dir):
64
- for file in files:
65
- file_path = os.path.join(root, file)
66
- zipf.write(file_path, os.path.relpath(file_path, temp_dir))
67
-
68
 
69
  class Parser:
70
  def __init__(self):
 
10
  from datetime import datetime
11
  from isbnlib import to_isbn10
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  class Parser:
15
  def __init__(self):
src/app/util.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ class Interface:
4
+
5
+ def get_tempdir():
6
+ timestamp = int(time.time())
7
+ # timestamp = datetime.now().isoformat(timespec='auto')
8
+ temp_dir = tempfile.mkdtemp()
9
+ return timestamp, temp_dir
10
+
11
+ @staticmethod
12
+ def create_zip(filelist, tmp_fname, passwd=None):
13
+ if not filelist:
14
+ return None
15
+ try:
16
+ zip_name = os.path.abspath(tmp_fname)
17
+ with zipfile.ZipFile(zip_name, "w", compression=zipfile.ZIP_DEFLATED) as f:
18
+ for file in filelist:
19
+ if os.path.isfile(file):
20
+ f.write(file, os.path.relpath(file, os.path.dirname(filelist[0])))
21
+ elif os.path.isdir(file):
22
+ for root, dirs, files in os.walk(file):
23
+ for filename in files:
24
+ filepath = os.path.join(root, filename)
25
+ f.write(filepath, os.path.relpath(filepath, os.path.dirname(filelist[0])))
26
+ if passwd:
27
+ zip_name_encrypted = zip_name + ".zip"
28
+ with zipfile.ZipFile(zip_name_encrypted, "w", compression=zipfile.ZIP_DEFLATED) as f:
29
+ f.setpassword(passwd)
30
+ f.write(zip_name, os.path.basename(zip_name))
31
+ os.remove(zip_name)
32
+ return zip_name_encrypted
33
+ else:
34
+ return zip_name
35
+ except Exception as e:
36
+ raise RuntimeError(f"Failed to create zip file: {str(e)}")
37
+
38
+ def read_csv(csv_file, fieldnames=None, encoding='utf-8'):
39
+ feeds = []
40
+ with open(csv_file, 'r', newline='', encoding=encoding) as csvfile:
41
+ reader = csv.DictReader(csvfile, fieldnames=fieldnames)
42
+ for row in reader:
43
+ feeds.append(row)
44
+ data = {"items": feeds}
45
+ return data