Commit
·
3e482dc
1
Parent(s):
08f7b1b
Create recomendation
Browse files- recomendation +95 -0
recomendation
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from vectorization import spotify_data
|
| 5 |
+
import json
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from gradio.components import Textbox
|
| 8 |
+
from ast import literal_eval
|
| 9 |
+
spotify_data_processed = pd.read_csv('dataset_modificado.csv')
|
| 10 |
+
|
| 11 |
+
def convert_string_to_array(str_vector):
|
| 12 |
+
# Si str_vector ya es un array de NumPy, devolverlo directamente
|
| 13 |
+
if isinstance(str_vector, np.ndarray):
|
| 14 |
+
return str_vector
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
cleaned_str = str_vector.replace('[', '').replace(']', '').replace('\n', ' ').replace('\r', '').strip()
|
| 18 |
+
vector_elements = [float(item) for item in cleaned_str.split()]
|
| 19 |
+
return np.array(vector_elements)
|
| 20 |
+
except ValueError as e:
|
| 21 |
+
print("Error:", e)
|
| 22 |
+
return np.zeros((100,))
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
spotify_data_processed['song_vector'] = spotify_data_processed['song_vector'].apply(convert_string_to_array)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# Aplicar la función a las primeras filas para ver los resultados
|
| 29 |
+
sample_data = spotify_data_processed['song_vector'].head()
|
| 30 |
+
converted_vectors = sample_data.apply(convert_string_to_array)
|
| 31 |
+
print(converted_vectors)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def recommend_song(song_name, artist_name, spotify_data_processed, top_n=4):
|
| 36 |
+
# Filtrar para encontrar la canción específica
|
| 37 |
+
specific_song = spotify_data_processed[(spotify_data_processed['song'] == song_name)
|
| 38 |
+
& (spotify_data_processed['artist'] == artist_name)]
|
| 39 |
+
|
| 40 |
+
# Verificar si la canción existe en el dataset
|
| 41 |
+
if specific_song.empty:
|
| 42 |
+
return pd.DataFrame({"Error": ["Canción no encontrada en la base de datos."]})
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# Obtener el vector de la canción específica
|
| 46 |
+
song_vec = specific_song['song_vector'].iloc[0]
|
| 47 |
+
|
| 48 |
+
# Asegurarte de que song_vec sea un array de NumPy
|
| 49 |
+
if isinstance(song_vec, str):
|
| 50 |
+
song_vec = convert_string_to_array(song_vec)
|
| 51 |
+
|
| 52 |
+
all_song_vectors = np.array(spotify_data_processed['song_vector'].tolist())
|
| 53 |
+
|
| 54 |
+
# Calcular similitudes
|
| 55 |
+
similarities = cosine_similarity([song_vec], all_song_vectors)[0]
|
| 56 |
+
|
| 57 |
+
# Obtener los índices de las canciones más similares
|
| 58 |
+
top_indices = np.argsort(similarities)[::-1][1:top_n+1]
|
| 59 |
+
|
| 60 |
+
# Devolver los nombres y artistas de las canciones más similares
|
| 61 |
+
recommended_songs = spotify_data_processed.iloc[top_indices][['song', 'artist']]
|
| 62 |
+
return recommended_songs
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def recommend_song_interface(song_name, artist_name):
|
| 68 |
+
recommendations_df = recommend_song(song_name, artist_name, spotify_data_processed)
|
| 69 |
+
|
| 70 |
+
# Verificar si el DataFrame está vacío o si las columnas necesarias están presentes
|
| 71 |
+
if isinstance(recommendations_df, pd.DataFrame) and not recommendations_df.empty and {'song', 'artist'}.issubset(recommendations_df.columns):
|
| 72 |
+
recommendations_list = recommendations_df[['song', 'artist']].values.tolist()
|
| 73 |
+
formatted_recommendations = ["{} by {}".format(song, artist) for song, artist in recommendations_list]
|
| 74 |
+
# Rellenar con cadenas vacías si hay menos de 4 recomendaciones
|
| 75 |
+
while len(formatted_recommendations) < 4:
|
| 76 |
+
formatted_recommendations.append("")
|
| 77 |
+
return formatted_recommendations[:4]
|
| 78 |
+
else:
|
| 79 |
+
random_song = spotify_data_processed.sample() # Escoge una linea la azar de todo el conjunto de datos .sample()
|
| 80 |
+
random_song_name = random_song['song'].iloc[0] # Extrae el valor de la columna song de la fila sample (Nombre)
|
| 81 |
+
random_artist_name = random_song['artist'].iloc[0] # Extrae el valor de la columna artist de la fila sample (Artista)
|
| 82 |
+
|
| 83 |
+
# Obtener recomendaciones para la canción aleatoria
|
| 84 |
+
random_recommendations_df = recommend_song(random_song_name, random_artist_name, spotify_data_processed)
|
| 85 |
+
random_recommendations_list = random_recommendations_df[['song', 'artist']].values.tolist()
|
| 86 |
+
formatted_random_recommendations = ["{} by {}".format(song, artist) for song, artist in random_recommendations_list]
|
| 87 |
+
|
| 88 |
+
# Rellenar con cadenas vacías si hay menos de 4 recomendaciones
|
| 89 |
+
while len(formatted_random_recommendations) < 4:
|
| 90 |
+
formatted_random_recommendations.append("")
|
| 91 |
+
return formatted_random_recommendations[:4]
|
| 92 |
+
|
| 93 |
+
# Ejemplo de uso
|
| 94 |
+
# Asegúrate de que spotify_data_processed es un DataFrame de Pandas válido con las columnas 'song' y 'artist'
|
| 95 |
+
recommendations = recommend_song_interface("song_name", "artist_name")
|