Spaces:
Running
Running
| """ | |
| Basic similarity search example. Used in the original txtai demo. | |
| """ | |
| import os | |
| import streamlit as st | |
| from txtai.embeddings import Embeddings | |
| class Application: | |
| """ | |
| Main application. | |
| """ | |
| def __init__(self): | |
| """ | |
| Creates a new application. | |
| """ | |
| # Create embeddings model, backed by sentence-transformers & transformers | |
| self.embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"}) | |
| def run(self): | |
| """ | |
| Runs a Streamlit application. | |
| """ | |
| st.title("Similarity Search") | |
| st.markdown("This application runs a basic similarity search that identifies the best matching row for a query.") | |
| data = [ | |
| "US tops 5 million confirmed virus cases", | |
| "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg", | |
| "Beijing mobilises invasion craft along coast as Taiwan tensions escalate", | |
| "The National Park Service warns against sacrificing slower friends in a bear attack", | |
| "Maine man wins $1M from $25 lottery ticket", | |
| "Make huge profits without work, earn up to $100,000 a day", | |
| ] | |
| data = st.text_area("Data", value="\n".join(data)) | |
| query = st.text_input("Query") | |
| data = data.split("\n") | |
| if query: | |
| # Get index of best section that best matches query | |
| uid = self.embeddings.similarity(query, data)[0][0] | |
| st.write(data[uid]) | |
| def create(): | |
| """ | |
| Creates and caches a Streamlit application. | |
| Returns: | |
| Application | |
| """ | |
| return Application() | |
| if __name__ == "__main__": | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| # Create and run application | |
| app = create() | |
| app.run() | |