import streamlit as st
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from collections import Counter
def clean_bookmarks(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
links = soup.find_all('a')
# Extract URLs and domains
urls = [link.get('href') for link in links]
domains = [urlparse(url).netloc for url in urls]
# Count domains
domain_counts = Counter(domains)
# Create a dictionary with domain as key and list of URLs as value
domain_url_dict = {}
for link in links:
url = link.get('href')
domain = urlparse(url).netloc
if domain in domain_url_dict:
domain_url_dict[domain].append((url, link.text.strip()))
else:
domain_url_dict[domain] = [(url, link.text.strip())]
# Sort domains by count in descending order
sorted_domains = sorted(domain_url_dict.items(), key=lambda item: domain_counts[item[0]], reverse=True)
# Build cleaned HTML
cleaned_html = '
\n'
cleaned_markdown = ''
for domain, url_anchors in sorted_domains:
cleaned_html += f'{domain}
\n'
cleaned_markdown += f'## {domain}\n'
url_anchors.sort(key=lambda x: x[1]) # Sort URLs by anchor text
for url, anchor_name in url_anchors:
cleaned_html += f'{anchor_name}
\n'
cleaned_markdown += f'[{anchor_name}]({url})\n'
cleaned_html += '
\n'
cleaned_markdown += '\n'
cleaned_html += ''
return cleaned_html, cleaned_markdown
def Instructions():
instructions = '''
To export your Google Chrome bookmarks, including those on the bookmark bar, and curate the list, follow these steps:
**Export bookmarks:**
1. Open Google Chrome and click on the three-dot menu icon in the top-right corner.
2. Go to "Bookmarks" > "Bookmark manager" or press Ctrl+Shift+O (Windows) or Cmd+Option+B (Mac).
3. In the Bookmark Manager, click on the three-dot menu icon and select "Export bookmarks."
4. Choose a location to save the HTML file containing your bookmarks and click "Save."
**Curate the bookmarks:**
1. Open the exported HTML file in a text editor like Notepad++ (Windows) or TextEdit (Mac).
2. Locate the section containing your bookmarks. It will be enclosed within `` tags.
3. Find the bookmark bar section, which is usually labeled with `
Bookmarks bar
`.
4. Delete any unwanted bookmarks by removing the entire `- ...` line corresponding to that bookmark.
5. Organize the remaining bookmarks by moving the `
- ...` lines within the bookmark bar section.
6. Save the edited HTML file.
**Import the curated bookmarks:**
1. In Google Chrome, open the Bookmark Manager again.
2. Click on the three-dot menu icon and select "Import bookmarks."
3. Choose the edited HTML file you saved in step 2 and click "Open."
4. Your curated bookmarks will now be imported into Chrome, replacing the previous set of bookmarks.
By following these steps, you can export your Google Chrome bookmarks, curate the list by removing unwanted bookmarks and organizing the remaining ones, and then import the curated list back into Chrome. This process allows you to keep your bookmark bar clean and organized with the bookmarks you use daily.
'''
st.markdown(instructions)
def main():
st.title('Bookmark File Cleaner')
Instructions()
uploaded_file = st.file_uploader('Choose an HTML bookmark file', type=['html'])
if uploaded_file is not None:
html_content = uploaded_file.read().decode('utf-8')
cleaned_html, cleaned_markdown = clean_bookmarks(html_content)
st.subheader('Cleaned Bookmarks')
st.text_area('Output HTML', value=cleaned_html, height=400)
st.text_area('Output Markdown', value=cleaned_markdown, height=400)
output_file = 'cleaned_bookmarks.html'
with open(output_file, 'w') as f:
f.write(cleaned_html)
st.download_button('Download Cleaned Bookmarks', cleaned_html, file_name=output_file)
st.markdown(cleaned_markdown)
if __name__ == '__main__':
main()