awacke1 commited on
Commit
24eca53
Β·
verified Β·
1 Parent(s): 363b9ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -49
app.py CHANGED
@@ -1,65 +1,53 @@
1
  import streamlit as st
2
  import os
3
- import re
4
- from xml.etree import ElementTree
5
 
6
- # Function to create a search URL for Wikipedia based on dataset name
7
- def create_search_url_wikipedia(dataset_name):
8
  base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
9
- return base_url + dataset_name.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
10
 
11
- # Function to scan current directory for XML files and extract URLs
12
- def scan_xml_for_urls():
13
- urls = []
14
- for file in os.listdir('.'):
15
- if file.endswith('.xml'):
16
- try:
17
- tree = ElementTree.parse(file)
18
- root = tree.getroot()
19
- # Assuming that URLs might be within 'url' tags for simplicity
20
- for url in root.iter('url'):
21
- urls.append(url.text)
22
- except ElementTree.ParseError:
23
- st.error(f"Error parsing {file}")
24
- return urls
25
 
26
- # Main application
27
- def main():
28
- st.title("Freedom of Information Act (FOIA) πŸ“œ and Open Data 🌍")
29
-
30
- # Description of FOIA
31
- st.markdown("""
32
- The Freedom of Information Act (FOIA) πŸ‡ΊπŸ‡Έ is a law that keeps citizens in the know about their government. By allowing full or partial disclosure of previously unreleased information and documents controlled by the United States government, FOIA strengthens the principle of transparency and accountability. Datasets created or used by federal programs, and thus made publicly available under FOIA, are invaluable resources for researchers, developers, and the curious minds alike! πŸ•΅οΈβ€β™‚οΈπŸ”βœ¨
33
  """)
34
-
35
- # List of datasets under FOIA with guessed Wikipedia URLs
36
  datasets = [
37
  "Provider Taxonomy",
38
  "Consumer Complaint Database",
39
- "National Bridge Inventory",
40
  "Medicare Provider Utilization and Payment Data",
41
- "College Scorecard",
 
 
42
  "Toxic Release Inventory",
43
- "Veterans Data",
44
- "Public Access to Court Electronic Records (PACER)"
45
  ]
46
-
47
- st.markdown("## FOIA Datasets and Their Wikipedia URLs 🌐")
48
- st.markdown("| Dataset | Wikipedia URL |")
49
- st.markdown("| ------- | ------------- |")
50
  for dataset in datasets:
51
- url = create_search_url_wikipedia(dataset)
52
- st.markdown(f"| {dataset} | [Link]({url}) |")
53
-
54
- # Scan for XML files and display URLs
55
- st.markdown("## Detected URLs in Local XML Files πŸ“πŸ”—")
56
- urls = scan_xml_for_urls()
57
- if urls:
58
- for url in urls:
59
- st.markdown(f"- [URL]({url})")
60
- else:
61
- st.markdown("No XML files with URLs found in the current directory.")
62
 
63
- # Run the main application
64
  if __name__ == "__main__":
65
- main()
 
1
  import streamlit as st
2
  import os
3
+ import xml.etree.ElementTree as ET
 
4
 
5
+ # Function to create search URL on Wikipedia
6
+ def create_search_url_wikipedia(search_query):
7
  base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
8
+ return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
9
 
10
+ # Function to scan for XML files and generate Wikipedia links for organizations
11
+ def scan_for_xml_files_and_generate_links():
12
+ xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
13
+ for xml_file in xml_files:
14
+ tree = ET.parse(xml_file)
15
+ root = tree.getroot()
16
+ # Assuming the XML structure provided is consistent across files
17
+ for org in root.findall(".//nc:Organization", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'}):
18
+ short_name = org.find("nc:OrganizationAbbreviationText", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'}).text
19
+ long_name = org.find("nc:OrganizationName", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'}).text
20
+ st.markdown(f"- **{short_name}**: [Wikipedia]({create_search_url_wikipedia(short_name)})")
21
+ st.markdown(f"- **{long_name}**: [Wikipedia]({create_search_url_wikipedia(long_name)})")
 
 
22
 
23
+ # Streamlit UI
24
+ def app():
25
+ st.title("Freedom of Information Act (FOIA) Open Data πŸŒπŸ“Š")
26
+ st.write("""
27
+ The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. πŸŽ‰βœ¨
28
+
29
+ Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. πŸ“šπŸ”
30
  """)
31
+
32
+ # Example datasets under FOIA
33
  datasets = [
34
  "Provider Taxonomy",
35
  "Consumer Complaint Database",
 
36
  "Medicare Provider Utilization and Payment Data",
37
+ "Global Terrorism Database",
38
+ "National Nutrient Database",
39
+ "Patent Grant Full Text Data",
40
  "Toxic Release Inventory",
41
+ "Residential Energy Consumption Survey",
 
42
  ]
43
+
44
+ # Displaying the datasets table
45
+ st.markdown("### FOIA Datasets and Wikipedia URLs")
 
46
  for dataset in datasets:
47
+ st.markdown(f"- **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
48
+
49
+ st.markdown("### Organizations in Found XML Files")
50
+ scan_for_xml_files_and_generate_links()
 
 
 
 
 
 
 
51
 
 
52
  if __name__ == "__main__":
53
+ app()