o
    RTh!                     @   sv  d dl Zd dlmZmZ d dlmZ d dlm	Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZ d dlZd dlmZ d d	lmZ eed
ddeddd;dedee  fddZ!eed
ddedd	d;dedee  fddZ"eed
ddedd	d<dee deee   fddZ#eed
ddedd	d<dee deee   fddZ$dd Z%dd Z&	 d=d!ee  d"eee   dee fd#d$Z'dej(fd%d&Z)	'd>d"eee   dej(fd(d)Z*	'd>d"eee   dej(fd*d+Z+			,	-	.d?d/ej(d0eee  d1eee  fd2d3Z,			,	-	4	.d@d/ej(d0eee  d1eee  d5ed6ed7ed8e-fd9d:Z.dS )A    N)ListOptional)spatial)PCA)TSNE)average_precision_scoreprecision_recall_curve)retrystop_after_attemptwait_random_exponential)numpy)pandas      )minmax   )waitstoptext-similarity-davinci-001textreturnc                 K   s2   |  dd} tjjd| g|d|d d d S N
 inputenginedatar   	embedding )replaceopenai	Embeddingcreater   r   kwargsr    r    Z/home/air/segue/gemini/backup/venv/lib/python3.10/site-packages/openai/embeddings_utils.pyget_embedding   s   &r(   c                    s:   |  dd} tjjd| g|d|I d H d d d S r   )r!   r"   r#   acreater%   r    r    r'   aget_embedding   s
   &r*   text-similarity-babbage-001list_of_textc                 K   sJ   t | dks
J ddd | D } tjjd| |d|j}dd |D S )N   .The batch size should not be larger than 2048.c                 S      g | ]}| d dqS r   r   r!   .0r   r    r    r'   
<listcomp>.       z"get_embeddings.<locals>.<listcomp>r   c                 S      g | ]}|d  qS r   r    r3   dr    r    r'   r4   1       r    )lenr"   r#   r$   r   r,   r   r&   r   r    r    r'   get_embeddings'   s   r=   c                    sR   t | dksJ ddd | D } tjjd| |d|I d H j}dd |D S )Nr-   r.   c                 S   r/   r0   r1   r2   r    r    r'   r4   ;   r5   z#aget_embeddings.<locals>.<listcomp>r   c                 S   r6   r7   r    r8   r    r    r'   r4   >   r:   r    )r;   r"   r#   r)   r   r<   r    r    r'   aget_embeddings4   s
    r>   c                 C   s$   t | |t j| t j|  S )N)npdotlinalgnorm)abr    r    r'   cosine_similarityA   s   $rE   c                    sb  t  }tj fddt|D ddj}t }t }t }t|D ]-}	t|dd|	f | dd|	f \||	< ||	< }
t|dd|	f | dd|	f ||	< q#t| |  \}}}
t|| dd}t	t
|d|  tjd	d
 tjdddd}g }g }|D ]4}tdd}|| d| |  }tj||dk ||dk ddd\}tjd|d|d d fd q|| |d tj||ddd\}|| |d| t|D ]!}	tj||	 ||	 dd\}|| |d |	 ||	  qt }|jdd  td!d"g td!d#g td$ td% t| d& t|| dS )'a!  
    Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours.

    Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html
    c                    s   g | ]} | kqS r    r    )r3   i
class_listy_true_untransformedr    r'   r4   O   r5   z4plot_multiclass_precision_recall.<locals>.<listcomp>r   )axisNmicro)averagez5 - Average precision score over all classes: {0:0.2f})	   
   )figsizeg?g?   )numg{Gz?   r   gray)coloralphazf1={0:0.1f}g?-   g{Gz?)xyziso-f1 curvesgold)rT   lwz+average Precision-recall (auprc = {0:0.2f}))rY   z3Precision-recall for class `{0}` (auprc = {1:0.2f})g      ?)bottomg        g      ?g?Recall	Precisionz': Precision-Recall curve for each class)r;   pdconcatrangevaluesdictr   r   ravelprintstrformatpltfigurer?   linspaceplotannotateappendgcfsubplots_adjustxlimylimxlabelylabeltitlelegend)y_scorerI   rH   classifier_name	n_classesy_true	precisionrecallaverage_precisionrF   _precision_microrecall_microaverage_precision_microf_scoreslineslabelsf_scorexylfigr    rG   r'    plot_multiclass_precision_recallE   sl   0(
$"





r   cosinequery_embedding
embeddingsc                    s8   t jjt jjt jjt jjd fdd|D }|S )zHReturn the distances between a query embedding and a list of embeddings.)r   L1L2Linfc                    s   g | ]	}  |qS r    r    )r3   r   distance_metricdistance_metricsr   r    r'   r4      s    z-distances_from_embeddings.<locals>.<listcomp>)r   distancer   	cityblock	euclidean	chebyshev)r   r   r   	distancesr    r   r'   distances_from_embeddings   s   r   c                 C   s
   t | S )zGReturn a list of indices of nearest neighbors from a list of distances.)r?   argsort)r   r    r    r'   +indices_of_nearest_neighbors_from_distances   s   
r   rR   c                 C   s   t |d}t| }||S )z2Return the PCA components of a list of embeddings.)n_components)r   r?   arrayfit_transform)r   r   pcaarray_of_embeddingsr    r    r'   pca_components_from_embeddings   s   


r   c                 K   sN   d|  vr
d|d< d|  vrd|d< tdd|i|}t| }||S )z1Returns t-SNE components of a list of embeddings.initr   learning_rateautor   Nr    )keysr   r?   r   r   )r   r   r&   tsner   r    r    r'   tsne_components_from_embeddings   s   

r   Component 0Component 1   
componentsr   stringsc           
      K   s   dd | D }t || dddf || dddf d|r|n|d|r)dd |D n|i}tj|f|||r7dnd|r<dnd|rBdgndd	|jt|d
d}	|	S )z7Return an interactive 2D chart of embedding components.c                 S      g | ]}d qS  r    r3   r{   r    r    r'   r4          z)chart_from_components.<locals>.<listcomp>Nr   r   labelstringc                 S       g | ]}d  tj|ddqS z<br>   )widthjointrwrapr3   r   r    r    r'   r4           )r   r   rT   symbol
hover_datasizemarker)r]   	DataFramepxscatterupdate_tracesra   )
r   r   r   x_titley_title	mark_sizer&   
empty_listr   chartr    r    r'   chart_from_components   s2   




	r   Compontent 2r   r   z_titler   c                 K   s   dd | D }t || dddf || dddf || dddf d|r&|n|d|r1d	d |D n|i}	tj|	f||||r@dnd|rEdnd|rKdgndd
|jt|dd}
|
S )z7Return an interactive 3D chart of embedding components.c                 S   r   r   r    r   r    r    r'   r4      r   z,chart_from_components_3D.<locals>.<listcomp>Nr   r   rR   r   r   c                 S   r   r   r   r   r    r    r'   r4      r   )r   r   zrT   r   r   r   r   )r]   r   r   
scatter_3dr   ra   )r   r   r   r   r   r   r   r&   r   r   r   r    r    r'   chart_from_components_3D   s6   


	
r   )r   )r+   )r   )rR   )NNr   r   r   )NNr   r   r   r   )/textwrapr   typingr   r   matplotlib.pyplotpyplotrf   plotly.expressexpressr   scipyr   sklearn.decompositionr   sklearn.manifoldr   sklearn.metricsr   r   tenacityr	   r
   r   r"   openai.datalib.numpy_helperr   r?   openai.datalib.pandas_helperr   r]   rd   floatr(   r*   r=   r>   rE   r   r   ndarrayr   r   r   r   intr   r    r    r    r'   <module>   s    

I









#

