Spaces:
Sleeping
Sleeping
| import json | |
| import streamlit as st | |
| from google.oauth2 import service_account | |
| from google.cloud import language_v1 | |
| import urllib.parse | |
| import urllib.request | |
| import pandas as pd | |
| # Function to query Google's Knowledge Graph API | |
| def query_knowledge_graph(entity_id): | |
| try: | |
| google_search_link = f"https://www.google.com/search?kgmid={entity_id}" | |
| st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True) | |
| except Exception as e: | |
| st.write(f"An error occurred: {e}") | |
| # Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata | |
| def count_entities(entities): | |
| count = 0 | |
| for entity in entities: | |
| metadata = entity.metadata | |
| if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']): | |
| count += 1 | |
| return count | |
| # Function to serialize entity metadata | |
| def serialize_entity_metadata(metadata): | |
| return {k: str(v) for k, v in metadata.items()} | |
| # Function to export all entities, including those without metadata | |
| def export_entities(entities): | |
| entity_list = [] | |
| for entity in entities: | |
| metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {} | |
| entity_info = { | |
| "Name": entity.name, | |
| "Type": language_v1.Entity.Type(entity.type_).name, | |
| "Salience Score": entity.salience, | |
| "Metadata": metadata, | |
| "Mentions": [mention.text.content for mention in entity.mentions] | |
| } | |
| entity_list.append(entity_info) | |
| if not entity_list: | |
| st.write("No entities found to export.") | |
| return | |
| df = pd.DataFrame(entity_list) | |
| # Export as CSV | |
| csv = df.to_csv(index=False) | |
| st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv") | |
| # Export as JSON | |
| json_data = json.dumps(entity_list, indent=2) | |
| st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json") | |
| # Sidebar content | |
| st.sidebar.title("About This Tool") | |
| st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.") | |
| st.sidebar.markdown("### Step-by-Step Guide") | |
| st.sidebar.markdown(""" | |
| 1. **Open the Tool**: Navigate to the URL where the tool is hosted. | |
| 2. **User Input**: Enter the text you want to analyze. | |
| 3. **Analyze**: Click the 'Analyze' button. | |
| 4. **View Results**: See the identified entities and their details. | |
| 5. **Export Entities**: Export the entities as JSON or CSV. | |
| """) | |
| # Header and intro | |
| st.title("Google Cloud NLP Entity Analyzer") | |
| st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.") | |
| st.write("Entity salience scores are always relative to the analyzed text.") | |
| def sample_analyze_entities(text_content): | |
| service_account_info = json.loads(st.secrets["google_nlp"]) | |
| credentials = service_account.Credentials.from_service_account_info( | |
| service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"] | |
| ) | |
| client = language_v1.LanguageServiceClient(credentials=credentials) | |
| document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"} | |
| encoding_type = language_v1.EncodingType.UTF8 | |
| response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type}) | |
| # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata | |
| entity_count = count_entities(response.entities) | |
| if entity_count == 0: | |
| st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities") | |
| st.write("---") | |
| elif entity_count == 1: | |
| st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity") | |
| st.write("---") | |
| else: | |
| st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities") | |
| st.write("---") | |
| for i, entity in enumerate(response.entities): | |
| st.write(f"Entity {i+1} of {len(response.entities)}") | |
| st.write(f"Name: {entity.name}") | |
| st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}") | |
| st.write(f"Salience Score: {entity.salience}") | |
| if entity.metadata: | |
| st.write("Metadata:") | |
| st.write(entity.metadata) | |
| if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']): | |
| entity_id = entity.metadata['mid'] | |
| query_knowledge_graph(entity_id) | |
| if entity.mentions: | |
| mention_count = len(entity.mentions) | |
| plural = "s" if mention_count > 1 else "" | |
| st.write(f"Mentions: {mention_count} mention{plural}") | |
| st.write("Raw Array:") | |
| st.write(entity.mentions) | |
| st.write("---") | |
| # Add the export functionality | |
| export_entities(response.entities) | |
| # User input for text analysis | |
| user_input = st.text_area("Enter text to analyze") | |
| if st.button("Analyze"): | |
| if user_input: | |
| sample_analyze_entities(user_input) |