Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
from xml.etree import ElementTree
|
5 |
+
|
6 |
+
# Function to create a search URL for Wikipedia based on dataset name
|
7 |
+
def create_search_url_wikipedia(dataset_name):
|
8 |
+
base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
|
9 |
+
return base_url + dataset_name.replace(' ', '+').replace('β', '%E2%80%93').replace('&', 'and')
|
10 |
+
|
11 |
+
# Function to scan current directory for XML files and extract URLs
|
12 |
+
def scan_xml_for_urls():
|
13 |
+
urls = []
|
14 |
+
for file in os.listdir('.'):
|
15 |
+
if file.endswith('.xml'):
|
16 |
+
try:
|
17 |
+
tree = ElementTree.parse(file)
|
18 |
+
root = tree.getroot()
|
19 |
+
# Assuming that URLs might be within 'url' tags for simplicity
|
20 |
+
for url in root.iter('url'):
|
21 |
+
urls.append(url.text)
|
22 |
+
except ElementTree.ParseError:
|
23 |
+
st.error(f"Error parsing {file}")
|
24 |
+
return urls
|
25 |
+
|
26 |
+
# Main application
|
27 |
+
def main():
|
28 |
+
st.title("Freedom of Information Act (FOIA) π and Open Data π")
|
29 |
+
|
30 |
+
# Description of FOIA
|
31 |
+
st.markdown("""
|
32 |
+
The Freedom of Information Act (FOIA) πΊπΈ is a law that keeps citizens in the know about their government. By allowing full or partial disclosure of previously unreleased information and documents controlled by the United States government, FOIA strengthens the principle of transparency and accountability. Datasets created or used by federal programs, and thus made publicly available under FOIA, are invaluable resources for researchers, developers, and the curious minds alike! π΅οΈββοΈπβ¨
|
33 |
+
""")
|
34 |
+
|
35 |
+
# List of datasets under FOIA with guessed Wikipedia URLs
|
36 |
+
datasets = [
|
37 |
+
"Provider Taxonomy",
|
38 |
+
"Consumer Complaint Database",
|
39 |
+
"National Bridge Inventory",
|
40 |
+
"Medicare Provider Utilization and Payment Data",
|
41 |
+
"College Scorecard",
|
42 |
+
"Toxic Release Inventory",
|
43 |
+
"Veterans Data",
|
44 |
+
"Public Access to Court Electronic Records (PACER)"
|
45 |
+
]
|
46 |
+
|
47 |
+
st.markdown("## FOIA Datasets and Their Wikipedia URLs π")
|
48 |
+
st.markdown("| Dataset | Wikipedia URL |")
|
49 |
+
st.markdown("| ------- | ------------- |")
|
50 |
+
for dataset in datasets:
|
51 |
+
url = create_search_url_wikipedia(dataset)
|
52 |
+
st.markdown(f"| {dataset} | [Link]({url}) |")
|
53 |
+
|
54 |
+
# Scan for XML files and display URLs
|
55 |
+
st.markdown("## Detected URLs in Local XML Files ππ")
|
56 |
+
urls = scan_xml_for_urls()
|
57 |
+
if urls:
|
58 |
+
for url in urls:
|
59 |
+
st.markdown(f"- [URL]({url})")
|
60 |
+
else:
|
61 |
+
st.markdown("No XML files with URLs found in the current directory.")
|
62 |
+
|
63 |
+
# Run the main application
|
64 |
+
if __name__ == "__main__":
|
65 |
+
main()
|