U
    0J<b                    @   s  d Z dZdZe ZdZddlmZmZm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZ ejZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlm Z  ddl!Z"ddl#Z#dd	l$m%Z% dd
l&m'Z( ddl)m*Z* ej+e, Z-ddlmZmZm	Z	m
Z
mZmZmZ ddlmZmZ dd Z.dddZ/dd Z0dddZ1G dd dZ2dd Z3dd Z4dd Z5dd  Z6dd!d"Z7d#d$ Z8dd&d'Z9d(d) Z:dd*d+Z;d,d- Z<d.d/ Z=G d0d1 d1Z>d2d3 Z?G d4d5 d5Z@d6d7 ZAdd9d:ZBd;d< ZCe ZdZejDEd=d>fd?d@ZFddAdBZGdCdD ZHedE dFfedG< edH dIfedJ< ddKdLZIdMdN ZJdOdP ZKddQdRZLdSdT ZMG dUdV dVejZNe-jdW dXfe-jdY< dZd[ ZOdd]d^ZPG d_d` d`ZQdadb ZRddddeZSdfdg ZTdhdi ZUG djdk dkZVddmdnZWG dodp dpZXdqdr ZYdsdt ZZdudv Z[ddwdxZ\G dydz dzejZ]e-jd{ d|fe-jd}< d~d Z^dd Z_dddZ`dd Zadd Zbdd ZcdddZdG dd dejZeG dd dejZfG dd dejZgG dd de-jeZhG dd de	jijjZkdd ZldddZmdd ZndddZodd ZpG dd dejZqG dd dejZrG dd de-jfZsG dd dejZtdd Zudd ZvG dd dejZwG dd dejZxG dd dejZyG dd dejZzG dd de-jeZ{dd Z|dddZ}dd Z~e-jd dfe-jd< dddƄZdddɄZddd˄ZG dd̈́ d̓Zddτ Zddф Ze-jfddӄZe-jfddՄZe-J e-jfddׄZe-jd dfe-jd< dd܄ Zddބ Zdd Zdd ZdddZdd ZdddZdddZdd Zdd Zdd ZdddZe-jd dfe-jd< dddZG dd de	jjZdd Ze-jd d fe-jd< dҐddZdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddggZddd	d
dddddddddddddddddgZdd Zdd Zd d! ZG d"d# d#e	jjZd$d% Ze-jd& d'fe-jd(< d)d* Zd+d, Zd-d. Zd/d0 Ze-jd1 d2fe-jd3< e-jd4 d5fe-jd6< d7d8 Zd9d: Zd;d< ZG d=d> d>Zd?d@ ZdAdB ZdCdD Ze-jdE dFfe-jdG< e-jdH dIfe-jdJ< e-jdK dLfe-jdM< e-jdN dOfe-jdP< G dQdR dRZdӐdSdTZG dUdV dVejZG dWdX dXejZG dYdZ dZejZG d[d\ d\ejZd]e-jd^< d_d` Zdadb Zdcdd Zdedf Zdgdh Zdidj ZG dkdl dle	jjZdmdn Zdodp Zdqe-jdr< dsdt ZdԐdvdwZdxdy Zdze-jd{< d|d} ZG d~d de	jjZdՐddZdd Zdd Zde-jd< dd ZÐd֐ddZĐdאddZŐdؐddZe-J dfddZG dd de	jijȃZG dd de	jijȃZʐdd Zːdd Z̐dِddZe-jd dfe-jd< G dd de	jjZΐdd Zϐdd Ze-jd dfe-jd< dd Zѐdd ZejZejZejZejZejZejZejZejZejZejZejZejZejZejjZejjZejZejZejZejZejZejZejZdd Zdd Zdd Zdd Zdd Zdd ZdS (  TFz,http://d2l-data.s3-accelerate.amazonaws.com/    )autogradcontextgluonimageinitnpnpx)nnrnn)
transformsN)defaultdict)display)pyplot)backend_inlinec                   C   s   t d dS )zWUse the svg format to display a plot in Jupyter.

    Defined in :numref:`sec_calculus`svgN)r   Zset_matplotlib_formats r   r   8/home/d2l-worker/workspace/d2l-en-release/./d2l/mxnet.pyuse_svg_display*   s    r   g      @      @c                 C   s   t   | tjjd< dS )zJSet the figure size for matplotlib.

    Defined in :numref:`sec_calculus`zfigure.figsizeN)r   d2lpltZrcParamsfigsizer   r   r   set_figsize0   s    r   c                 C   sV   |  | | | | | | | | | | | |rJ| | |   dS )zCSet the axes for matplotlib.

    Defined in :numref:`sec_calculus`N)
set_xlabel
set_ylabelZ
set_xscaleZ
set_yscaleZset_xlimZset_ylimlegendgrid)axesxlabelylabelxlimylimxscaleyscaler   r   r   r   set_axes7   s    






r&   linear-zm--zg-.zr:c              	   C   s   |dkrg }t |
 |r|ntj }dd }|| r<| g} |dkrZg gt|  |  } }n||rh|g}t| t|kr| t| } |  t| ||	D ].\}}}t|r|||| q||| qt|||||||| dS )z8Plot data points.

    Defined in :numref:`sec_calculus`Nc                 S   s.   t | dr| jdkp,t| to,t | d d S )Nndim   r   __len__)hasattrr*   
isinstancelistXr   r   r   has_one_axisR   s    zplot.<locals>.has_one_axis)	r   r   r   gcalenclazipplotr&   )r1   Yr    r!   r   r"   r#   r$   r%   fmtsr   r   r2   xyfmtr   r   r   r7   E   s&    r7   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )TimerzRecord multiple running times.c                 C   s   g | _ |   dS )z(Defined in :numref:`subsec_linear_model`N)timesstartselfr   r   r   __init__h   s    zTimer.__init__c                 C   s   t   | _dS )zStart the timer.N)timetikr@   r   r   r   r?   m   s    zTimer.startc                 C   s    | j t | j  | j d S )z-Stop the timer and record the time in a list.)r>   appendrC   rD   r@   r   r   r   stopq   s    z
Timer.stopc                 C   s   t | jt| j S )zReturn the average time.)sumr>   r4   r@   r   r   r   avgv   s    z	Timer.avgc                 C   s
   t | jS )zReturn the sum of time.)rH   r>   r@   r   r   r   rH   z   s    z	Timer.sumc                 C   s   t | j  S )zReturn the accumulated time.)r   arrayr>   cumsumtolistr@   r   r   r   rK   ~   s    zTimer.cumsumN)
__name__
__module____qualname____doc__rB   r?   rG   rI   rH   rK   r   r   r   r   r=   f   s   r=   c                 C   sJ   t dd|t| f}t || | }|t dd|j7 }|t |dfS )zIGenerate y = Xw + b + noise.

    Defined in :numref:`sec_linear_scratch`r   r+   {Gz?rE   r+   )r   normalr4   matmulshapereshape)wbnum_examplesr1   r;   r   r   r   synthetic_data   s    rZ   c                 C   s   t | || S )zIThe linear regression model.

    Defined in :numref:`sec_linear_scratch`)r   rT   )r1   rW   rX   r   r   r   linreg   s    r[   c                 C   s   | t || j d d S )z:Squared loss.

    Defined in :numref:`sec_linear_scratch`   )r   rV   rU   )y_hatr;   r   r   r   squared_loss   s    r^   c                 C   s(   | D ]}|||j  |  |dd< qdS )zSMinibatch stochastic gradient descent.

    Defined in :numref:`sec_linear_scratch`N)grad)paramslr
batch_sizeparamr   r   r   sgd   s    rd   c                 C   s   t jj|  }t jj|||dS )zMConstruct a Gluon data iterator.

    Defined in :numref:`sec_linear_concise`shuffle)r   dataArrayDataset
DataLoader)data_arraysrb   is_traindatasetr   r   r   
load_array   s    rm   c              
      s*   ddddddddd	d
g
  fdd| D S )z]Return text labels for the Fashion-MNIST dataset.

    Defined in :numref:`sec_fashion_mnist`zt-shirtZtrouserZpulloverZdressZcoatZsandalZshirtZsneakerZbagz
ankle bootc                    s   g | ]} t | qS r   )int.0iZtext_labelsr   r   
<listcomp>   s     z,get_fashion_mnist_labels.<locals>.<listcomp>r   )labelsr   rr   r   get_fashion_mnist_labels   s    
    ru         ?c                 C   s   || || f}t jj|||d\}}| }tt|| D ]N\}\}	}
|	t |
 |	j	 
d |	j 
d |r<|	||  q<|S )zBPlot a list of images.

    Defined in :numref:`sec_fashion_mnist`r   F)r   r   subplotsflatten	enumerater6   imshownumpyr   Z	get_xaxisZset_visibleZ	get_yaxis	set_title)imgsnum_rowsnum_colstitlesscaler   _r   rq   aximgr   r   r   show_images   s    r   c                   C   s   t jdrdS dS )z`Use 4 processes to read the data except for Windows.

    Defined in :numref:`sec_fashion_mnist`winr      )sysplatform
startswithr   r   r   r   get_dataloader_workers   s    r   c                 C   s   t jj}|j g}|r,|d|j| |j|}|jdd	|}|jdd	|}t jj
|| dt dt jj
|| dt dfS )zlDownload the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_fashion_mnist`r   T)trainFrf   num_workers)r   rg   Zvisionr   ZToTensorinsertZResizeZComposeZFashionMNISTZtransform_firstri   r   )rb   resizerl   transZmnist_trainZ
mnist_testr   r   r   load_data_fashion_mnist   s    r   c                 C   sT   t | jdkr*| jd dkr*tj| dd} t| |j|k}ttt||jS )zXCompute the number of correct predictions.

    Defined in :numref:`sec_softmax_scratch`r+   axis)r4   rU   r   argmaxastypedtypefloat
reduce_sum)r]   r;   cmpr   r   r   accuracy   s    r   c                 C   sB   t d}|D ]$\}}|t| ||t| q|d |d  S )z\Compute the accuracy for a model on a dataset.

    Defined in :numref:`sec_softmax_scratch`r\   r   r+   )Accumulatoraddr   r   size)net	data_itermetricr1   r;   r   r   r   evaluate_accuracy   s    r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r   z)For accumulating sums over `n` variables.c                 C   s   dg| | _ dS )(Defined in :numref:`sec_softmax_scratch`        Nrg   )rA   nr   r   r   rB      s    zAccumulator.__init__c                 G   s   dd t | j|D | _d S )Nc                 S   s   g | ]\}}|t | qS r   r   )rp   arX   r   r   r   rs      s     z#Accumulator.add.<locals>.<listcomp>)r6   rg   rA   argsr   r   r   r      s    zAccumulator.addc                 C   s   dgt | j | _d S )Nr   )r4   rg   r@   r   r   r   reset   s    zAccumulator.resetc                 C   s
   | j | S Nr   rA   idxr   r   r   __getitem__   s    zAccumulator.__getitem__N)rM   rN   rO   rP   rB   r   r   r   r   r   r   r   r      s
   r   c           	   
   C   s   t d}t|tjr|j}|D ]b\}}t  | |}|||}W 5 Q R X |  ||jd  |	t
| t|||j q|d |d  |d |d  fS )zdTrain a model within one epoch (defined in Chapter 3).

    Defined in :numref:`sec_softmax_scratch`   r   r\   r+   )r   r.   r   Trainerstepr   recordbackwardrU   r   r   rH   r   r   )	r   
train_iterlossupdaterr   r1   r;   r]   lr   r   r   train_epoch_ch3   s    
 r   c                   @   s"   e Zd ZdZdddZd	d
 ZdS )AnimatorzFor plotting data in animation.Nr'   r(   r+   r   c                    sz    dkrg  t   t jj|	|
|d\__|	|
 dkrDjg_ fdd_dd|  ___	dS )r   Nr   r+   c                
      s   t jd  S Nr   )r   r&   r   r   r   rA   r    r"   r$   r!   r#   r%   r   r   <lambda>  s          z#Animator.__init__.<locals>.<lambda>)
r   r   r   rw   figr   config_axesr1   r8   r9   )rA   r    r!   r   r"   r#   r$   r%   r9   nrowsncolsr   r   r   r   rB     s    
zAnimator.__init__c                 C   s  t |ds|g}t|}t |ds,|g| }| jsFdd t|D | _| js`dd t|D | _tt||D ]<\}\}}|d k	rn|d k	rn| j| | | j| | qn| jd 	  t| j| j| j
D ]\}}}| jd ||| q|   t| j tjdd d S )Nr,   c                 S   s   g | ]}g qS r   r   rp   r   r   r   r   rs   !  s     z Animator.add.<locals>.<listcomp>c                 S   s   g | ]}g qS r   r   r   r   r   r   rs   #  s     r   T)wait)r-   r4   r1   ranger8   ry   r6   rF   r   r5   r9   r7   r   r   r   Zclear_output)rA   r:   r;   r   rq   r   rX   r<   r   r   r   r     s&    


zAnimator.add)NNNNNr'   r'   r(   r+   r+   r   )rM   rN   rO   rP   rB   r   r   r   r   r   r     s               
r   c                 C   s   t dd|gddgdddgd}t|D ]2}t| |||}t| |}	||d ||	f  q&|\}
}|
d	k srt|
|dkr|d
kst||	dkr|	d
kst|	dS )zSTrain a model (defined in Chapter 3).

    Defined in :numref:`sec_softmax_scratch`epochr+   g333333?g?
train loss	train acctest accr    r"   r#   r         ?gffffff?N)r   r   r   r   r   AssertionError)r   r   	test_iterr   
num_epochsr   animatorr   Ztrain_metricstest_accZ
train_loss	train_accr   r   r   	train_ch3/  s    
r      c                 C   s|   |D ]\}} qqt |}t t j| |dd}dd t||D }t jt |d| |ddfd||d| d dS )	zTPredict labels (defined in Chapter 3).

    Defined in :numref:`sec_softmax_scratch`r+   r   c                 S   s   g | ]\}}|d  | qS )
r   )rp   truepredr   r   r   rs   F  s     zpredict_ch3.<locals>.<listcomp>r      )r   N)r   ru   r   r6   r   rV   )r   r   r   r1   r;   Ztruespredsr   r   r   r   predict_ch3>  s    
   
r   c                 C   sN   t d}|D ].\}}|| ||}|t |t | q|d |d  S )z`Evaluate the loss of a model on the given dataset.

    Defined in :numref:`sec_model_selection`r\   r   r+   )r   r   r   r   r   )r   r   r   r   r1   r;   r   r   r   r   evaluate_lossJ  s
    
r   z..rg   c           	   	   C   s   | t kst|  dt  dt |  \}}tj|dd tj||dd }tj|rt	 }t
|d"}|d}|s~q|| qnW 5 Q R X | |kr|S td	| d
| d tj|ddd}t
|d}||j W 5 Q R X |S )zmDownload a file inserted into DATA_HUB, return the local filename.

    Defined in :numref:`sec_kaggle_house`z does not exist in .Texist_ok/rE   rbi   zDownloading z from z...)streamverifywb)DATA_HUBr   osmakedirspathjoinsplitexistshashlibsha1openreadupdate	hexdigestprintrequestsgetwritecontent)	nameZ	cache_dirurlZ	sha1_hashfnamer   frg   rr   r   r   downloadW  s$    
r   c                 C   s|   t | }tj|}tj|\}}|dkr:t|d}n"|dkrPt|d}nds\t	d|
| |rxtj||S |S )zODownload and extract a zip/tar file.

    Defined in :numref:`sec_kaggle_house`z.zipr   )z.tarz.gzFz$Only zip/tar files can be extracted.)r   r   r   dirnamesplitextzipfileZipFiletarfiler   r   
extractallr   )r   Zfolderr   base_dirdata_dirextfpr   r   r   download_extracto  s    
r  c                  C   s   t D ]} t|  qdS )zNDownload all files in the DATA_HUB.

    Defined in :numref:`sec_kaggle_house`N)r   r   )r   r   r   r   download_all  s    r  zkaggle_house_pred_train.csvZ(585e9cc93e70b39160e7921475f9bcd7d31219ceZkaggle_house_trainzkaggle_house_pred_test.csvZ(fa19780a7b011d9b009e8bff8e99922a8ee2eb90Zkaggle_house_testc                 C   s"   t  | d krt | S t  S )zVReturn gpu(i) if exists, otherwise return cpu().

    Defined in :numref:`sec_use_gpu`r+   )r   num_gpusgpucpu)rq   r   r   r   try_gpu  s    r	  c                  C   s(   dd t t D } | r| S t gS )z]Return all available GPUs, or [cpu()] if no GPU exists.

    Defined in :numref:`sec_use_gpu`c                 S   s   g | ]}t |qS r   )r   r  ro   r   r   r   rs     s     z try_all_gpus.<locals>.<listcomp>)r   r   r  r  devicesr   r   r   try_all_gpus  s    r  c              	   C   s   |j \}}t| j d | d | j d | d f}t|j d D ]F}t|j d D ]2}t| ||| ||| f | |||f< qTqB|S )zFCompute 2D cross-correlation.

    Defined in :numref:`sec_conv_layer`r   r+   )rU   r   zerosr   r   )r1   KhrW   r8   rq   jr   r   r   corr2d  s    
*2r  c                 C   s|   |s t |   d  d }td}|D ]<\}}|||| }}|t| ||t	| q.|d |d  S )z^Compute the accuracy for a model on a dataset using a GPU.

    Defined in :numref:`sec_lenet`r   r\   r+   )
r/   collect_paramsvalueslist_ctxr   r   	as_in_ctxr   r   r   )r   r   devicer   r1   r;   r   r   r   evaluate_accuracy_gpu  s    
 r  c                 C   s  | j d|t d tj }t|  dd|i}tj	dd|gddd	gd
}t
 t| }	}
t|D ](}td}t|D ]\}\}}|	  |||| }}t  | |}|||}W 5 Q R X |  ||jd  || t|||jd  |	  |d |d  }|d |d  }|d |
d  dksR||
d kr|||d |
  ||df qt| |}||d dd|f qhtd|dd|dd|d t|d | |	  ddt|  dS )zTTrain a model with a GPU (defined in Chapter 6).

    Defined in :numref:`sec_lenet`T)force_reinitctxr   rd   learning_rater   r+   r   r   r   )r    r"   r   r   r   r\      Nloss .3f, train acc , test acc .1f examples/sec on )
initializer   Xavierr   r   SoftmaxCrossEntropyLossr   r  r   r   r=   r4   r   r   ry   r?   r  r   r   r   r   rU   r   rH   r   rG   r  r   str)r   r   r   r   ra   r  r   trainerr   timernum_batchesr   r   rq   r1   r;   r]   r   train_lr   r   r   r   r   	train_ch6  s@    

 

 "
 r*  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	ResidualzThe Residual block of ResNet.Fr+   c                    sj   t  jf | tj|dd|d| _tj|ddd| _|rLtj|d|d| _nd | _t | _t | _	d S )Nr   r+   )kernel_sizepaddingstrides)r,  r-  )r,  r.  )
superrB   r	   Conv2Dconv1conv2conv3	BatchNormbn1bn2)rA   num_channelsuse_1x1convr.  kwargs	__class__r   r   rB     s    


zResidual.__init__c                 C   sD   t | | |}| | |}| jr6| |}t || S r   )r   relur5  r1  r6  r2  r3  rA   r1   r8   r   r   r   forward  s
    
zResidual.forward)Fr+   rM   rN   rO   rP   rB   r>  __classcell__r   r   r:  r   r+    s   r+  ztimemachine.txtZ(090b5e7e70c295757f55df93cb0a180b9691891atime_machinec               	   C   s2   t tdd} |  }W 5 Q R X dd |D S )ziLoad the time machine dataset into a list of text lines.

    Defined in :numref:`sec_text_preprocessing`rA  r   c                 S   s"   g | ]}t d d|  qS )z
[^A-Za-z]+ )resubstriplowerrp   liner   r   r   rs     s     z%read_time_machine.<locals>.<listcomp>)r   r   r   	readlines)r   linesr   r   r   read_time_machine  s    rK  wordc                 C   s<   |dkrdd | D S |dkr,dd | D S t d|  dS )z`Split text lines into word or character tokens.

    Defined in :numref:`sec_text_preprocessing`rL  c                 S   s   g | ]}|  qS r   r   rG  r   r   r   rs     s     ztokenize.<locals>.<listcomp>charc                 S   s   g | ]}t |qS r   )r/   rG  r   r   r   rs      s     zERROR: unknown token type: N)r   )rJ  tokenr   r   r   tokenize  s
    rP  c                   @   sJ   e Zd ZdZdddZdd Zdd	 Zd
d Zedd Z	edd Z
dS )VocabzVocabulary for text.Nr   c                 C   s   |dkrg }|dkrg }t |}t| dd dd| _dg| | _dd t| jD | _| jD ]>\}}||k rt q|| jkr`| j| t| jd	 | j|< q`dS )
z+Defined in :numref:`sec_text_preprocessing`Nc                 S   s   | d S Nr+   r   r:   r   r   r   r         z Vocab.__init__.<locals>.<lambda>Tkeyreverse<unk>c                 S   s   i | ]\}}||qS r   r   rp   r   rO  r   r   r   
<dictcomp>  s    z"Vocab.__init__.<locals>.<dictcomp>r+   )	count_corpussorteditems_token_freqsidx_to_tokenry   token_to_idxrF   r4   )rA   tokensmin_freqreserved_tokenscounterrO  freqr   r   r   rB     s$    
zVocab.__init__c                 C   s
   t | jS r   r4   r_  r@   r   r   r   r,     s    zVocab.__len__c                    s0   t |ttfs j| jS  fdd|D S )Nc                    s   g | ]}  |qS r   )r   rp   rO  r@   r   r   rs   !  s     z%Vocab.__getitem__.<locals>.<listcomp>)r.   r/   tupler`  r   unk)rA   ra  r   r@   r   r     s    zVocab.__getitem__c                    s*   t |ttfs j| S  fdd|D S )Nc                    s   g | ]} j | qS r   r_  )rp   indexr@   r   r   rs   &  s     z#Vocab.to_tokens.<locals>.<listcomp>)r.   r/   rh  r_  )rA   indicesr   r@   r   	to_tokens#  s    
zVocab.to_tokensc                 C   s   dS r   r   r@   r   r   r   ri  (  s    z	Vocab.unkc                 C   s   | j S r   )r^  r@   r   r   r   token_freqs,  s    zVocab.token_freqs)Nr   N)rM   rN   rO   rP   rB   r,   r   rm  propertyri  rn  r   r   r   r   rQ    s   

rQ  c                 C   s2   t | dkst| d tr(dd | D } t| S )zICount token frequencies.

    Defined in :numref:`sec_text_preprocessing`r   c                 S   s   g | ]}|D ]}|qqS r   r   rp   rH  rO  r   r   r   rs   7  s       z count_corpus.<locals>.<listcomp>)r4   r.   r/   collectionsCounter)ra  r   r   r   r[  0  s    r[  rE   c                    sF   t  }t|d}t|  fdd|D }| dkr>|d|  }| fS )zuReturn token indices and the vocabulary of the time machine dataset.

    Defined in :numref:`sec_text_preprocessing`rN  c                    s   g | ]}|D ]} | qqS r   r   rp  vocabr   r   rs   C  s       z,load_corpus_time_machine.<locals>.<listcomp>r   N)rK  rP  rQ  )
max_tokensrJ  ra  corpusr   rs  r   load_corpus_time_machine:  s    
rw  c           
      #   s    t dd d  t d  }ttd| }t |  fdd|| }td|| |D ]N}||||  }fdd|D }fdd|D }	t|t|	fV  qldS )	zhGenerate a minibatch of subsequences using random sampling.

    Defined in :numref:`sec_language_model`r   r+   Nc                    s    | |   S r   r   )pos)rv  	num_stepsr   r   rg   X  s    z"seq_data_iter_random.<locals>.datac                    s   g | ]} |qS r   r   rp   r  r   r   r   rs   a  s     z(seq_data_iter_random.<locals>.<listcomp>c                    s   g | ]} |d  qS r+   r   rz  r   r   r   rs   b  s     )randomrandintr4   r/   r   rf   r   tensor)
rv  rb   ry  Znum_subseqsZinitial_indicesr(  rq   Zinitial_indices_per_batchr1   r8   r   )rv  rg   ry  r   seq_data_iter_randomH  s    
r  c                 c   s   t d|}t| | d | | }t| |||  }t| |d |d |  }||d||d }}|jd | }td|| |D ]>}|dd||| f }	|dd||| f }
|	|
fV  qdS )zpGenerate a minibatch of subsequences using sequential partitioning.

    Defined in :numref:`sec_language_model`r   r+   rE   N)r|  r}  r4   r   r~  rV   rU   r   )rv  rb   ry  offset
num_tokensZXsZYsr(  rq   r1   r8   r   r   r   seq_data_iter_sequentiale  s    r  c                   @   s    e Zd ZdZdd Zdd ZdS )SeqDataLoaderz"An iterator to load sequence data.c                 C   s:   |rt j| _nt j| _t |\| _| _|| | _| _dS )z'Defined in :numref:`sec_language_model`N)	r   r  data_iter_fnr  rw  rv  rt  rb   ry  )rA   rb   ry  use_random_iterru  r   r   r   rB   w  s
    
zSeqDataLoader.__init__c                 C   s   |  | j| j| jS r   )r  rv  rb   ry  r@   r   r   r   __iter__  s    zSeqDataLoader.__iter__N)rM   rN   rO   rP   rB   r  r   r   r   r   r  u  s   	r  '  c                 C   s   t | |||}||jfS )zpReturn the iterator and the vocabulary of the time machine dataset.

    Defined in :numref:`sec_language_model`)r  rt  )rb   ry  r  ru  r   r   r   r   load_data_time_machine  s       r  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	RNNModelScratchz&An RNN Model implemented from scratch.c                 C   s.   || | _ | _||||| _|| | _| _dS )z$Defined in :numref:`sec_rnn_scratch`N)
vocab_sizenum_hiddensr`   
init_state
forward_fn)rA   r  r  r  
get_paramsr  r  r   r   r   rB     s    zRNNModelScratch.__init__c                 C   s    t |j| j}| ||| jS r   )r   one_hotTr  r  r`   rA   r1   stater   r   r   __call__  s    zRNNModelScratch.__call__c                 C   s   |  || j|S r   )r  r  )rA   rb   r  r   r   r   begin_state  s    zRNNModelScratch.begin_stateN)rM   rN   rO   rP   rB   r  r  r   r   r   r   r    s   r  c           	         s   |j d d}| d  g fdd}| dd D ]"}|| |\}}|  q6t|D ]0}|| |\}}t|jddd qbdfd	d
D S )zYGenerate new characters following the `prefix`.

    Defined in :numref:`sec_rnn_scratch`r+   rb   r  r   c                      s   t t jd g ddS )NrE   r  )r+   r+   )r   rV   r~  r   )r  outputsr   r   r     s    zpredict_ch8.<locals>.<lambda>Nr    c                    s   g | ]} j | qS r   rj  ro   rs  r   r   rs     s     zpredict_ch8.<locals>.<listcomp>)r  rF   r   rn   r   rV   r   )	prefixZ	num_predsr   rt  r  r  Z	get_inputr;   r   r   )r  r  rt  r   predict_ch8  s    r  c                 C   sr   t | tjr$dd |   D }n| j}ttdd |D }||krn|D ]}|j	dd  || 9  < qNdS )z<Clip the gradient.

    Defined in :numref:`sec_rnn_scratch`c                 S   s   g | ]}|  qS r   r   rp   pr   r   r   rs     s     z!grad_clipping.<locals>.<listcomp>c                 s   s   | ]}|j d   V  qdS )r\   N)r_   rH   r  r   r   r   	<genexpr>  s     z grad_clipping.<locals>.<genexpr>N)
r.   r   Blockr  r  r`   mathsqrtrH   r_   )r   thetar`   Znormrc   r   r   r   grad_clipping  s    r  c              
   C   s
  dt   }}t d}|D ]\}	}
|dks0|rF| j|	jd |d}n|D ]}|  qJ|
jd}|	||| }	}t	
 " | |	|\}}||| }W 5 Q R X |  t| d |dd ||t | t | qt|d |d  |d |  fS )z`Train a model within one epoch (defined in Chapter 8).

    Defined in :numref:`sec_rnn_scratch`Nr\   r   r  rE   r+   rb   )r   r=   r   r  rU   detachr  rV   r  r   r   meanr   r  r   r   r  exprG   )r   r   r   r   r  r  r  r'  r   r1   r8   sr;   r]   r   r   r   r   train_epoch_ch8  s"    




r  c                    s
  t j }tjdddgd|gd}tt jrhj dt	dd t 
 d	d
ifdd}	nfdd}	 fdd}
t|D ]<}t|||	 |\}}|d d dkr||d |g qtd|dd|ddt   t|
d t|
d dS )zOTrain a model (defined in Chapter 8).

    Defined in :numref:`sec_rnn_scratch`r   Z
perplexityr   
   )r    r!   r   r"   TrQ   )r  r  r   rd   r  c                    s
     | S r   )r   r  )r&  r   r   r     rT  ztrain_ch8.<locals>.<lambda>c                    s   t j | S r   )r   rd   r`   r  )ra   r   r   r   r     rT  c                    s   t | d S )N2   )r  )r  )r  r   rt  r   r   r     rT  r+   r   zperplexity r   ,  tokens/sec on ztime travellerZ	travellerN)r   r   r$  r   r   r.   r  r"  r   Normalr   r  r   r  r   r   r%  )r   r   rt  ra   r   r  r  r   r   r   Zpredictr   ZpplZspeedr   )r  ra   r   r&  rt  r   	train_ch8  s<    
 
      "r  c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )RNNModelz8The RNN model.

    Defined in :numref:`sec_rnn-concise`c                    s.   t t| jf | || _|| _t|| _d S r   )r/  r  rB   r
   r  r	   Densedense)rA   Z	rnn_layerr  r9  r:  r   r   rB     s    zRNNModel.__init__c                 C   s@   t |j| j}| ||\}}| |d|jd }||fS )NrE   )r   r  r  r  r
   r  rV   rU   )rA   inputsr  r1   r8   outputr   r   r   r>    s    zRNNModel.forwardc                 O   s   | j j||S r   )r
   r  )rA   r   r9  r   r   r   r    s    zRNNModel.begin_state)rM   rN   rO   rP   rB   r>  r  r@  r   r   r:  r   r    s   	r  zfra-eng.zipZ(94646ad1522d915e7b0f9296181140edcf86a4f5fra-engc               
   C   s>   t d} ttj| dd}| W  5 Q R  S Q R X dS )zRLoad the English-French dataset.

    Defined in :numref:`sec_machine_translation`r  zfra.txtr   N)r   r  r   r   r   r   r   )r  r   r   r   r   read_data_nmt  s    
r  c                    sB   dd   dd dd  fddtD }d|S )	zXPreprocess the English-French dataset.

    Defined in :numref:`sec_machine_translation`c                 S   s   | t dko|dkS )Nz,.!?rB  set)rN  Z	prev_charr   r   r   no_space  s    z preprocess_nmt.<locals>.no_spaceu    rB      c                    s6   g | ].\}}|d kr. ||d  r.d| n|qS )r   r+   rB  r   )rp   rq   rN  r  textr   r   rs     s   z"preprocess_nmt.<locals>.<listcomp>r  )replacerF  ry   r   )r  outr   r  r   preprocess_nmt  s    r  c                 C   sx   g g  }}t | dD ]V\}}|r0||kr0 qp|d}t|dkr||d d ||d d q||fS )zVTokenize the English-French dataset.

    Defined in :numref:`sec_machine_translation`r   	r\   r   rB  r+   )ry   r   r4   rF   )r  rY   sourcetargetrq   rH  partsr   r   r   tokenize_nmt  s    

r  c                 C   st   t   t jdd |D dd |D g\}}}t j| t j| |d jD ]}|d qTt j|  dS )z[Plot the histogram for list length pairs.

    Defined in :numref:`sec_machine_translation`c                 S   s   g | ]}t |qS r   r4   rp   r   r   r   r   rs   2  s     z+show_list_len_pair_hist.<locals>.<listcomp>r+   r   N)	r   r   r   histr    r!   patchesZ	set_hatchr   )r   r    r!   ZxlistZylistr   r  patchr   r   r   show_list_len_pair_hist,  s    
r  c                 C   s.   t | |kr| d| S | |g|t |    S )zLTruncate or pad sequences.

    Defined in :numref:`sec_machine_translation`Nr  )rH  ry  Zpadding_tokenr   r   r   truncate_pad9  s    r  c                    sd   fdd| D } fdd| D } t  fdd| D }t t |d kt jd}||fS )zrTransform text sequences of machine translation into minibatches.

    Defined in :numref:`subsec_mt_data_loading`c                    s   g | ]} | qS r   r   r  rs  r   r   rs   E  s     z#build_array_nmt.<locals>.<listcomp>c                    s   g | ]}| d  g qS )<eos>r   r  rs  r   r   rs   F  s     c                    s   g | ]}t | d  qS <pad>)r  r  ry  rt  r   r   rs   G  s     r  r+   )r   r~  r   r   int32)rJ  rt  ry  rJ   	valid_lenr   r  r   build_array_nmtA  s     r  X  c                 C   s   t t }t||\}}tj|ddddgd}tj|ddddgd}t|||\}}	t|||\}
}||	|
|f}t|| }|||fS )zuReturn the iterator and the vocabularies of the translation dataset.

    Defined in :numref:`subsec_mt_data_loading`r\   r  <bos>r  rb  rc  )r  r  r  r   rQ  r  rm   )rb   ry  rY   r  r  r  	src_vocab	tgt_vocabZ	src_arrayZsrc_valid_lenZ	tgt_arrayZtgt_valid_lenrj   r   r   r   r   load_data_nmtM  s    
r  c                       s(   e Zd ZdZ fddZdd Z  ZS )Encoderz@The base encoder interface for the encoder-decoder architecture.c                    s   t t| jf | d S r   )r/  r  rB   rA   r9  r:  r   r   rB   _  s    zEncoder.__init__c                 G   s   t d S r   NotImplementedError)rA   r1   r   r   r   r   r>  b  s    zEncoder.forwardr?  r   r   r:  r   r  ]  s   r  c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )DecoderznThe base decoder interface for the encoder-decoder architecture.

    Defined in :numref:`sec_encoder-decoder`c                    s   t t| jf | d S r   )r/  r  rB   r  r:  r   r   rB   i  s    zDecoder.__init__c                 G   s   t d S r   r  )rA   enc_outputsr   r   r   r   r  l  s    zDecoder.init_statec                 C   s   t d S r   r  r  r   r   r   r>  o  s    zDecoder.forward)rM   rN   rO   rP   rB   r  r>  r@  r   r   r:  r   r  e  s   r  c                       s(   e Zd ZdZ fddZdd Z  ZS )EncoderDecoderzbThe base class for the encoder-decoder architecture.

    Defined in :numref:`sec_encoder-decoder`c                    s"   t t| jf | || _|| _d S r   )r/  r  rB   encoderdecoder)rA   r  r  r9  r:  r   r   rB   v  s    zEncoderDecoder.__init__c                 G   s.   | j |f| }| jj|f| }| ||S r   )r  r  r  )rA   enc_Xdec_Xr   r  	dec_stater   r   r   r>  {  s    zEncoderDecoder.forwardr?  r   r   r:  r   r  r  s   r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )Seq2SeqEncoderzXThe RNN encoder for sequence to sequence learning.

    Defined in :numref:`sec_seq2seq`r   c                    s6   t t| jf | t||| _tj|||d| _d S )N)dropout)r/  r  rB   r	   	Embedding	embeddingr
   ZGRU)rA   r  Z
embed_sizer  
num_layersr  r9  r:  r   r   rB     s    zSeq2SeqEncoder.__init__c                 G   sF   |  |}|dd}| jj|jd |jd}| ||\}}||fS )Nr   r+   r  )r  swapaxesr
   r  rU   r  )rA   r1   r   r  r  r   r   r   r>    s
    
zSeq2SeqEncoder.forward)r   r?  r   r   r:  r   r    s    r  c                       s    e Zd ZdZ fddZ  ZS )MaskedSoftmaxCELosszXThe softmax cross-entropy loss with masks.

    Defined in :numref:`sec_seq2seq_decoder`c                    s:   t jt |dd}tj||ddd}tt| |||S )NrE   r   Tr+   )r   expand_dims	ones_liker   sequence_maskr/  r  r>  )rA   r   labelr  weightsr:  r   r   r>    s    zMaskedSoftmaxCELoss.forward)rM   rN   rO   rP   r>  r@  r   r   r:  r   r    s   r  c                    s  | j t d d t|  dd|i}t }tjddd|gd}t	|D ]}	t
 }
td	}|D ]} fd
d|D \}}}}tj|d g|jd   ddd}t||ddddf gd}t " | |||\}}||||}W 5 Q R X |  t| d | }|| || | qh|	d d dkrL||	d |d |d  f qLtd|d |d  dd|d |
  ddt   dS )zUTrain a model for sequence to sequence.

    Defined in :numref:`sec_seq2seq_decoder`T)r  r  Zadamr  r   r   r  )r    r!   r"   r\   c                    s   g | ]}|  qS r   )r  rp   r:   r  r   r   rs     s    z!train_seq2seq.<locals>.<listcomp>r  r   r  rE   r+   Nr  r  r  r   r  )r"  r   r#  r   r   r  r  r   r   r   r=   r   r   rJ   rU   rV   concatr   r   r   r  rH   r   r   r   rG   r%  )r   r   ra   r   r  r  r&  r   r   r   r'  r   batchr1   ZX_valid_lenr8   ZY_valid_lenZbosZ	dec_inputZY_hatr   r   r  r   r  r   train_seq2seq  sD    

   

 r  c                 C   s   ||  d |d g }tjt|g|d}t|||d }tjtj||ddd}	| |	|}
| j	
|
|}tjtj|d g|ddd}g g  }}t|D ]f}| 	||\}}|jdd}|jddd	 }|r|| j	j ||d kr  q|| qd|||fS )
zPPredict for sequence to sequence.

    Defined in :numref:`sec_seq2seq_training`rB  r  r  r  r   r   r  r\   r  )rF  r   r   rJ   r4   r   r  r  r  r  r  r   r   squeezer   itemrF   attention_weightsr   rm  )r   Zsrc_sentencer  r  ry  r  Zsave_attention_weightsZ
src_tokensZenc_valid_lenr  r  r  r  Z
output_seqZattention_weight_seqr   r8   r   r   r   r   predict_seq2seq  s(    
r  c              	   C   s"  |  d| d }}t|t| }}ttdd||  }td|d D ]}dtt }	}
t|| d D ]&}|
d	||||    d7  < qrt|| d D ]L}|
d	||||   dkr|	d7 }	|
d	||||    d8  < q|t
|	|| d  t
d|9 }qN|S )z@Compute the BLEU.

    Defined in :numref:`sec_seq2seq_training`rB  r   r+   r   )r   r4   r  r  minr   rq  r   rn   r   pow)Zpred_seqZ	label_seqkZpred_tokensZlabel_tokensZlen_predZ	len_labelscorer   Znum_matchesZ
label_subsrq   r   r   r   bleu  s    $$&r  r   r   Redsc                 C   s   t   | jd | jd  }}t jj|||dddd\}}	tt|	| D ]v\}
\}}tt||D ]Z\}\}}|jt ||d}|
|d kr|	| |dkr|
| |rb|||  qbqH|j||	dd d	S )
zGShow heatmaps of matrices.

    Defined in :numref:`sec_attention-cues`r   r+   TF)r   ZsharexZshareyr  )cmapg333333?)r   shrinkN)r   r   rU   r   rw   ry   r6   rz   r{   r   r   r|   Zcolorbar)Zmatricesr    r!   r   r   r   r~   r   r   r   rq   Zrow_axesZrow_matricesr  r   matrixZpcmr   r   r   show_heatmaps  s"      


r  c                 C   sp   |dkrt | S | j}|jdkr2||d }n
|d}t j| d|d |dddd} t | |S dS )zyPerform softmax operation by masking elements on the last axis.

    Defined in :numref:`sec_attention-scoring-functions`Nr+   rE   Tg    .)valuer   )r   ZsoftmaxrU   r*   repeatrV   r  )r1   
valid_lensrU   r   r   r   masked_softmax	  s    


 r  c                       s(   e Zd ZdZ fddZdd Z  ZS )AdditiveAttentionzMAdditive attention.

    Defined in :numref:`sec_attention-scoring-functions`c                    sX   t t| jf | tj|ddd| _tj|ddd| _tjdddd| _t|| _	d S )NFuse_biasrx   r+   )
r/  r  rB   r	   r  W_kW_qw_vDropoutr  )rA   r  r  r9  r:  r   r   rB      s
    zAdditiveAttention.__init__c                 C   sp   |  || | }}tj|ddtj|dd }t|}tj| |dd}t||| _t	
| | j|S )Nr\   r   r+   rE   )r  r  r   r  tanhr  r  r  r  r   	batch_dotr  )rA   querieskeysr  r  featuresscoresr   r   r   r>  )  s     
zAdditiveAttention.forwardr?  r   r   r:  r   r    s   	r  c                       s*   e Zd ZdZ fddZdddZ  ZS )DotProductAttentionzQScaled dot product attention.

    Defined in :numref:`subsec_additive-attention`c                    s"   t t| jf | t|| _d S r   )r/  r  rB   r	   r  r  rA   r  r9  r:  r   r   rB   ?  s    zDotProductAttention.__init__Nc                 C   sD   |j d }tj||ddt| }t||| _t| | j|S )NrE   T)Ztranspose_b)rU   r   r  r  r  r  r  r  )rA   r  r  r  r  dr  r   r   r   r>  H  s    
zDotProductAttention.forward)Nr?  r   r   r:  r   r  ;  s   	r  c                       s,   e Zd ZdZ fddZedd Z  ZS )AttentionDecoderz[The base attention-based decoder interface.

    Defined in :numref:`sec_seq2seq_attention`c                    s   t t| jf | d S r   )r/  r  rB   r  r:  r   r   rB   S  s    zAttentionDecoder.__init__c                 C   s   t d S r   r  r@   r   r   r   r  V  s    z"AttentionDecoder.attention_weights)rM   rN   rO   rP   rB   ro  r  r@  r   r   r:  r   r  O  s   r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )MultiHeadAttentionzGMulti-head attention.

    Defined in :numref:`sec_multihead-attention`Fc                    sp   t t| jf | || _t|| _tj||dd| _	tj||dd| _
tj||dd| _tj||dd| _d S )NFr	  )r/  r  rB   	num_headsr   r  	attentionr	   r  r  r  W_vW_o)rA   r  r  r  r
  r9  r:  r   r   rB   ^  s    zMultiHeadAttention.__init__c                 C   st   t | || j}t | || j}t | || j}|d k	rN|j| jdd}| ||||}t|| j}| |S )Nr   r   )	transpose_qkvr  r  r  r  r  r  transpose_outputr  )rA   r  r  r  r  r  Zoutput_concatr   r   r   r>  h  s    zMultiHeadAttention.forward)Fr?  r   r   r:  r   r  Z  s   
r  c                 C   sF   |  | jd | jd |d} | dddd} |  d| jd | jd S )zuTransposition for parallel computation of multiple attention heads.

    Defined in :numref:`sec_multihead-attention`r   r+   rE   r\   r   rV   rU   	transposer1   r  r   r   r   r    s    	r  c                 C   sF   |  d|| jd | jd } | dddd} |  | jd | jd dS )z[Reverse the operation of `transpose_qkv`.

    Defined in :numref:`sec_multihead-attention`rE   r+   r\   r   r   r   r"  r   r   r   r    s    r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )PositionalEncodingzYPositional encoding.

    Defined in :numref:`sec_self-attention-and-positional-encoding`  c              	      s   t t|   t|| _td||f| _t	|
ddtdt	d|d|  }t|| jd d d d dd df< t|| jd d d d dd df< d S )Nr+   rE   r  r   r\   )r/  r#  rB   r	   r  r  r   r  ParangerV   r   powersincos)rA   r  r  max_lenr1   r:  r   r   rB     s     $zPositionalEncoding.__init__c                 C   s8   || j d d d |jd d d f |j }| |S rR  )r%  rU   r  r  r  rA   r1   r   r   r   r>    s    .zPositionalEncoding.forward)r$  r?  r   r   r:  r   r#    s   
r#  c                       s(   e Zd ZdZ fddZdd Z  ZS )PositionWiseFFNzLPositionwise feed-forward network.

    Defined in :numref:`sec_transformer`c                    s8   t t| jf | tj|ddd| _tj|dd| _d S NFr<  )rx   
activation)rx   )r/  r,  rB   r	   r  dense1dense2)rA   ffn_num_hiddensZffn_num_outputsr9  r:  r   r   rB     s
    zPositionWiseFFN.__init__c                 C   s   |  | |S r   )r0  r/  r+  r   r   r   r>    s    zPositionWiseFFN.forwardr?  r   r   r:  r   r,    s   r,  c                       s(   e Zd ZdZ fddZdd Z  ZS )AddNormz^Residual connection followed by layer normalization.

    Defined in :numref:`sec_transformer`c                    s,   t t| jf | t|| _t | _d S r   )r/  r2  rB   r	   r  r  	LayerNormlnr  r:  r   r   rB     s    zAddNorm.__init__c                 C   s   |  | || S r   )r4  r  r=  r   r   r   r>    s    zAddNorm.forwardr?  r   r   r:  r   r2    s   r2  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )EncoderBlockzDTransformer encoder block.

    Defined in :numref:`sec_transformer`Fc                    sH   t t| jf | t||||| _t|| _t||| _	t|| _
d S r   )r/  r5  rB   r   r  r  r2  addnorm1r,  ffnaddnorm2)rA   r  r1  r  r  r
  r9  r:  r   r   rB     s       
zEncoderBlock.__init__c              	   C   s*   |  || ||||}| || |S r   )r6  r  r8  r7  )rA   r1   r  r8   r   r   r   r>    s    zEncoderBlock.forward)Fr?  r   r   r:  r   r5    s    	r5  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )TransformerEncoderz>Transformer encoder.

    Defined in :numref:`sec_transformer`Fc           
   	      sh   t t| jf | || _t||| _t||| _	t
 | _t|D ]}	| jt||||| qFd S r   )r/  r9  rB   r  r	   r  r  r   r#  pos_encoding
Sequentialblksr   r   r5  )
rA   r  r  r1  r  r  r  r
  r9  r   r:  r   r   rB     s    

zTransformerEncoder.__init__c                 G   s`   |  | |t| j }d gt| j | _t| jD ]"\}}|||}|j	j	j| j|< q8|S r   )
r:  r  r  r  r  r4   r<  r  ry   r  )rA   r1   r  r   rq   blkr   r   r   r>    s    
zTransformerEncoder.forward)Fr?  r   r   r:  r   r9    s    r9  c                 C   s"   t j j| ||tddd d S )Nz->)Z
arrowstyle)xyxytextZ
arrowprops)r   r   r3   annotatedict)r  r>  r?  r   r   r   r@    s    r@     c           	      C   s   d\}}}}||fg}t |D ]F}|r@| |||||\}}}}n| ||||\}}}}|||f qtd|d  dt|ddt|d |S )zlOptimize a 2D objective function with a customized trainer.

    Defined in :numref:`subsec_gd-learningrate`)r   r   zepoch r+   z, x1: r   z, x2: )r   rF   r   r   )	r&  stepsZf_gradx1x2s1s2resultsrq   r   r   r   train_2d  s    
*rK  c                 C   s|   t   t jjt| dddi t t dddt ddd\}}t jj||| ||dd	 t jd
 t j	d dS )zdShow the trace of 2D variables during optimization.

    Defined in :numref:`subsec_gd-learningrate`-ocolorz#ff7f0eg            ?皙?g      z#1f77b4)colorsrF  rG  N)rL  )
r   r   r   r7   r6   meshgridr&  Zcontourr    r!   )r   rJ  rF  rG  r   r   r   show_trace_2d  s    rR  zairfoil_self_noise.datZ(76e5be1548fd8222e5074cf0faae75edff8cf93fairfoilr    c                 C   sv   t jtdt jdd}||jdd |jdd }tj|d|ddf |d|df f| dd	}||jd
 d
 fS )$Defined in :numref:`sec_minibatches`rS  r  )r   	delimiterr   r   NrE   Trk   r+   )	r   Z
genfromtxtr   r   float32r  stdrm   rU   )rb   r   rg   r   r   r   r   get_data_ch11  s     "  rZ  r\   c                    sT  t jjd|dfdt d       fddtj }}tjddd|gd	d
gd}dt  }	}
t	|D ]}|D ]\}}t
  |||| }W 5 Q R X |  |  g|| |	|jd 7 }	|	d dkr||
  ||	|jd  t| t|||f |
  q|qttd|jd d dd|
 dd |
 |jd fS )rU  rQ   r+   )r   r   c                    s   t |  S r   )r   r[   r0   rX   rW   r   r   r   ,  rT  ztrain_ch11.<locals>.<lambda>r   r   r   )\(?ffffff?r    r!   r"   r#      loss: rE   r  r  
 sec/epoch)r   r|  rS   r  Zattach_gradr   r^   r   r=   r   r   r   r  r   rU   rG   r   r4   r   r?   r   r8   rI   rK   )Z
trainer_fnstateshyperparamsr   Zfeature_dimr   r   r   r   r   r'  r   r1   r;   r   r   r[  r   
train_ch11$  s2    
 
(rd  c                 C   sD  t  }|t d |tjdd t|	 | |}tj
 }tjddd|gddgd	}dt  }}	t|D ]}
|D ]\}}t  ||||}W 5 Q R X |  ||jd  ||jd 7 }|d
 dkr~|	  |||jd  t| t|||f |	  q~qvtd|jd d dd|	 dd dS )rU  r+   rQ   )sigmar   r   r   r\  r]  r^  r_  r`  rE   r  r  ra  N)r	   r;  r   r  r"  r   r  r   r   r  r   ZL2Lossr   r   r=   r   r   r   r   r   rU   rG   r4   r   r?   r   r8   rI   )Ztr_namerc  r   r   r   r&  r   r   r   r'  r   r1   r;   r   r   r   r   train_concise_ch11@  s0    
 
rf  c                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )	BenchmarkzFor measuring running time.Donec                 C   s
   || _ dS )z"Defined in :numref:`sec_hybridize`N)description)rA   ri  r   r   r   rB   [  s    zBenchmark.__init__c                 C   s   t  | _| S r   )r   r=   r'  r@   r   r   r   	__enter___  s    
zBenchmark.__enter__c                 G   s"   t | j d| j dd d S )Nz: z.4fz sec)r   ri  r'  rG   r   r   r   r   __exit__c  s    zBenchmark.__exit__N)rh  )rM   rN   rO   rP   rB   rj  rk  r   r   r   r   rg  Y  s   
rg  c                 C   s4   | j d |j d ksttj| |tj||fS )zPSplit `X` and `y` into multiple devices.

    Defined in :numref:`sec_multi_gpu`r   )rU   r   r   utilssplit_and_loadr1   r;   r  r   r   r   split_batchf  s    ro  c                 C   s~   ddd}t  }|t jdddddt  t d ||dd	d
d|dd	|dd	|dd	 |t  t |  |S )zTA slightly modified ResNet-18 model.

    Defined in :numref:`sec_multi_gpu_concise`Fc                 S   sN   t  }t|D ]8}|dkr8|s8|tj| ddd q|t|  q|S )Nr   Tr\   )r8  r.  )r	   r;  r   r   r   r+  )r7  Znum_residualsfirst_blockr=  rq   r   r   r   resnet_blockr  s      
zresnet18.<locals>.resnet_block@   r   r+   )r,  r.  r-  r<  r\   T)rp        i   )F)r	   r;  r   r0  r4  Z
ActivationZGlobalAvgPool2Dr  )num_classesrq  r   r   r   r   resnet18n  s    

 rv  c           
         s   t    d  }td}|D ]L\}}||||\}} fdd|D }	|tdd t|	|D |j	 q&|d |d  S )zrCompute the accuracy for a model on a dataset using multiple GPUs.

    Defined in :numref:`sec_multi_gpu_concise`r   r\   c                    s   g | ]} |qS r   r   rp   ZX_shardr   r   r   rs     s     z*evaluate_accuracy_gpus.<locals>.<listcomp>c                 s   s"   | ]\}}t t||V  qd S r   )r   r   r   rp   Z
pred_shardZy_shardr   r   r   r    s   z)evaluate_accuracy_gpus.<locals>.<genexpr>r+   )
r/   r  r  r  r   r   r   rH   r6   r   )
r   r   split_fr  r   r  rt   X_shardsy_shardspred_shardsr   rx  r   evaluate_accuracy_gpus  s    
 r~  c              	      s   ||||\}}t  0 fdd|D }	 fddt|	|D }
W 5 Q R X |
D ]}|  qR|j|jd dd tdd |
D }tdd	 t|	|D }||fS )
zqTrain for a minibatch with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`c                    s   g | ]} |qS r   r   rw  rx  r   r   rs     s     z$train_batch_ch13.<locals>.<listcomp>c                    s   g | ]\}} ||qS r   r   ry  r   r   r   rs     s     r   T)Zignore_stale_gradc                 S   s   g | ]}t | qS r   )r   rH   r  r   r   r   rs     s     c                 s   s   | ]\}}t ||V  qd S r   )r   r   ry  r   r   r   r    s   z#train_batch_ch13.<locals>.<genexpr>)r   r   r6   r   r   rU   rH   )r   r  rt   r   r&  r  rz  r{  r|  r}  lsr   Ztrain_loss_sumZtrain_acc_sumr   r   r   r   train_batch_ch13  s    


r  c              
   C   s~  t  t| }}	t jdd|gddgdddgd}
t|D ]}t d}t|D ]\}\}}|  t| ||||||\}}|	|||j
d |j |  |d |	d	  dks||	d krP|
	||d |	  |d |d
  |d |d  df qPt | ||}|
	|d dd|f q:td|d |d
  dd|d |d  dd|d t|d
 | |  ddt|  dS )ziTrain a model with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`r   r+   r   r   r   r   r   r   r  r\   r   Nr  r  r  r  r   r!  )r   r=   r4   r   r   r   ry   r?   r  r   rU   r   rG   r~  r   rH   r%  )r   r   r   r   r&  r   r  rz  r'  r(  r   r   r   rq   r  rt   r   accr   r   r   r   
train_ch13  s:    
       8r  z
hotdog.zipZ(fba480ffa8aa7e0febbb511d181409f899b9baa5Zhotdogc           	      C   s   | dddf | dddf | dddf | dddf f\}}}}|| d }|| d }|| }|| }t j||||fdd} | S )zeConvert from (upper-left, lower-right) to (center, width, height).

    Defined in :numref:`sec_bbox`Nr   r+   r\   r   rE   r   r   stack)	boxesrF  y1rG  y2cxcyrW   r  r   r   r   box_corner_to_center  s    Dr  c           	      C   s   | dddf | dddf | dddf | dddf f\}}}}|d|  }|d|  }|d|  }|d|  }t j||||fdd} | S )	zeConvert from (center, width, height) to (upper-left, lower-right).

    Defined in :numref:`sec_bbox`Nr   r+   r\   r   r   rE   r   r  )	r  r  r  rW   r  rF  r  rG  r  r   r   r   box_center_to_corner  s    Dr  c                 C   s<   t jj| d | d f| d | d  | d | d  d|ddS )zMConvert bounding box to matplotlib format.

    Defined in :numref:`sec_bbox`r   r+   r\   r   F)r>  widthheightfillZ	edgecolorZ	linewidth)r   r   Z	Rectangle)bboxrM  r   r   r   bbox_to_rect  s        r  c              	   C   s  | j dd \}}| jt|t|  }}}|| d }tj||d}	tj||d}
d\}}d| }d| }tj||d| | }tj||d| | }t||\}}|d|d }}t	|	t
|
d  |d t
|
dd  f| | }t	|	t
|
d  |d t
|
dd  f}tt| | ||fj|| dfd	 }tj||||gdd
j|dd
}|| }tj|dd
S )zhGenerate anchor boxes with different shapes centered on each pixel.

    Defined in :numref:`sec_anchor`rD  Nr+   r  )r   r   rN  rE   r   r\   r   )rU   r  r4   r   r~  r&  rQ  rV   r   concatenater  tiler  r  r  r  )rg   sizesratiosZ	in_heightZin_widthr  Z	num_sizesZ
num_ratiosZboxes_per_pixelZsize_tensorZratio_tensorZoffset_hZoffset_wZsteps_hZsteps_wZcenter_hZcenter_wZshift_xZshift_yrW   r  Zanchor_manipulationsZout_gridr  r   r   r   multibox_prior  sF    
 r  c           
      C   s   ddd}||}||dddddg}t |D ]\}}||t|  }tt||}| | |r.t||kr.|d	kr~d
nd	}	| j|jd |jd || ddd|	t|ddd q.dS )z9Show bounding boxes.

    Defined in :numref:`sec_anchor`Nc                 S   s&   | d kr|} nt | ttfs"| g} | S r   )r.   r/   rh  )objZdefault_valuesr   r   r   	make_list  s
    zshow_bboxes.<locals>.make_listrX   gr   mcrW   r  r   r+   center	   )Z	facecolorZlw)vahaZfontsizerM  r  )N)	ry   r4   r   r  r{   Z	add_patchr  r>  rA  )
r   Zbboxesrt   rP  r  rq   r  rM  rectZ
text_colorr   r   r   show_bboxes  s     

   
r  c           
      C   s   dd }|| }||}t | dddddf |ddddf }t | dddddf |ddddf }|| jdd}|dddddf |dddddf  }|dddf | | }	||	 S )zgCompute pairwise IoU across two lists of anchor or bounding boxes.

    Defined in :numref:`sec_anchor`c                 S   s@   | d d df | d d df  | d d df | d d df   S )Nr\   r   r   r+   r   )r  r   r   r   r   3  s   zbox_iou.<locals>.<lambda>Nr\   r   )r  r+   )r   maximumminimumclip)
Zboxes1Zboxes2Zbox_areaZareas1Zareas2Zinter_upperleftsZinter_lowerrightsZintersZinter_areasZunion_areasr   r   r   box_iou/  s    ..,r  r   c                 C   s   |j d | j d  }}t|| }tj|fdtj|d}tj|ddtj|dd }}	t|dkd }
|	|dk }|||
< t|fd}t|fd}t|D ]R}t|}|| 	d}|| 	d}|||< ||dd|f< |||ddf< q|S )	z`Assign closest ground-truth bounding boxes to anchor boxes.

    Defined in :numref:`sec_anchor`r   rE   r   r  r+   r   r   r  N)
rU   r  r   fullr  maxr   nonzeror   r   )Zground_truthanchorsr  iou_thresholdnum_anchorsZnum_gt_boxesZjaccardanchors_bbox_mapZmax_iousrl  Zanc_iZbox_jZcol_discardZrow_discardr   Zmax_idxZbox_idxZanc_idxr   r   r   assign_anchor_to_bboxC  s"    

r  ư>c              	   C   s   t | }t |}d|ddddf |ddddf   |ddddf  }dt ||ddddf |ddddf    }t j||gdd}|S )zXTransform for anchor box offsets.

    Defined in :numref:`subsec_labeling-anchor-boxes`r  Nr\   r  r+   r   )r   r  logr  )r  assigned_bbepsZc_ancZc_assigned_bbZ	offset_xyZ	offset_whr  r   r   r   offset_boxes^  s    

@6r  c                 C   sh  |j d | d }} g g g   }}}| j| j d  }}t|D ]}||ddddf }	t|	ddddf | |}
ttj|
dkdddd}t	j
|tj|d}t	j
|d	ftj|d}t|
dkd }|
| }|	|df dd ||< |	|ddf ||< t| || }||d ||d || q@t	|}t	|}t	|}|||fS )
zlLabel anchor boxes using ground-truth bounding boxes.

    Defined in :numref:`subsec_labeling-anchor-boxes`r   Nr+   rE   r   )r+   r   r  r  r   )rU   r  r  r   r  r   r  r  r   r   r  r  rX  r  r  rF   rV   r  )r  rt   rb   Zbatch_offsetZ
batch_maskZbatch_class_labelsr  r  rq   r  r  Z	bbox_maskZclass_labelsr  Zindices_trueZbb_idxr  Zbbox_offsetr   r   r   multibox_targeti  sB      


r  c                 C   s   t | }|ddddf |ddddf  d |ddddf  }t |ddddf d |ddddf  }t j||fdd}t |}|S )z{Predict bounding boxes based on anchor boxes with predicted offsets.

    Defined in :numref:`subsec_labeling-anchor-boxes`Nr\   r  r  r+   r   )r   r  r  r  r  )r  offset_predsZancZpred_bbox_xyZpred_bbox_whZ	pred_bboxZpredicted_bboxr   r   r   offset_inverse  s    
@2
r  c                 C   s   |  ddd }g }|jdkr|d }|| |jdkr>qt| |ddf dd| |dd ddf ddd}t||kd }||d  }qtj|tj| j	dS )zrSort confidence scores of predicted bounding boxes.

    Defined in :numref:`subsec_predicting-bounding-boxes-nms`NrE   r   r+   r   r  )
argsortr   rF   r  rV   r   r  rJ   r  r  )r  r  r  Bkeeprq   ZiouZindsr   r   r   nms  s    


 r  Q%z?c                 C   st  | j | jd  }}tj|dd}| jd | jd  }}g }	t|D ]$}
| |
 ||
 dd }}t|dd dt|dd d }}t||}t	|||}tj
|tj|d}t||f}tj|d	d
\}}||dk }t||f}d||< || d}|| ||  }}||k }d||< d||  ||< tjtj|ddtj|dd|fdd}|	| qBt|	S )ztPredict bounding boxes using non-maximum suppression.

    Defined in :numref:`subsec_predicting-bounding-boxes-nms`r   r   r+   r\   rE   r   Nr  T)Zreturn_countsrX  )r  rU   r   r  r   rV   r  r   r  r  r&  r  r   r  uniquer   r  rF   r  )Z	cls_probsr  r  Znms_thresholdZpos_thresholdr  rb   ru  r  r  rq   Zcls_probZoffset_predconfZclass_idZpredicted_bbr  Zall_idxcombinedr  countsZnon_keepZall_id_sortedZbelow_min_idxZ	pred_infor   r   r   multibox_detection  s8    *
r  zbanana-detection.zipZ(5de26c8fce5ccdea9f91267273464dc968d20d72banana-detectionc                 C   s   t d}tj|| rdndd}t|}|d}g g  }}| D ]>\}}|	t
tj|| rhdndd|  |	t| qH|tt|dd fS )	zkRead the banana detection dataset images and labels.

    Defined in :numref:`sec_object-detection-dataset`r  Zbananas_trainZbananas_valz	label.csvimg_nameimagesr+   rt  )r   r  r   r   r   pdread_csvZ	set_indexZiterrowsrF   r   imreadr/   r   r  rJ   )rk   r  Z	csv_fnamecsv_datar  targetsr  r  r   r   r   read_data_bananas  s$    
 


  r  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	BananasDatasetzqA customized dataset to load the banana detection dataset.

    Defined in :numref:`sec_object-detection-dataset`c                 C   s6   t |\| _| _tdtt| j |r*dnd  d S )Nread z training examplesz validation examples)r  r  rt   r   r%  r4   )rA   rk   r   r   r   rB     s    zBananasDataset.__init__c                 C   s$   | j | dddd| j| fS )NrX  r\   r   r+   )r  r   r!  rt   r   r   r   r   r     s    zBananasDataset.__getitem__c                 C   s
   t | jS r   r4   r  r@   r   r   r   r,     s    zBananasDataset.__len__NrM   rN   rO   rP   rB   r   r,   r   r   r   r   r    s   r  c                 C   s4   t jjtdd| dd}t jtdd| }||fS )zYLoad the banana detection dataset.

    Defined in :numref:`sec_object-detection-dataset`TrW  re   F)r   rg   ri   r  )rb   r   Zval_iterr   r   r   load_data_bananas  s     r  zVOCtrainval_11-May-2012.tarZ(4e443f8a2eca6b1dac8a6c57641b67dd40621a49voc2012c           	      C   s   t j| dd|rdnd}t|d}|  }W 5 Q R X g g  }}t|D ]L\}}|t	t j| d| d |t	t j| d| d	 qN||fS )
zZRead all VOC feature and label images.

    Defined in :numref:`sec_semantic_segmentation`Z	ImageSetsZSegmentationz	train.txtzval.txtr   Z
JPEGImagesz.jpgZSegmentationClassz.png)
r   r   r   r   r   r   ry   rF   r   r  )	voc_dirrk   Z	txt_fnamer   r  r  rt   rq   r   r   r   r   read_voc_images  s$    

    
r  rs  rr     
backgroundZ	aeroplaneZbicycleZbirdZboatZbottleZbusZcarcatZchairZcowZdiningtableZdogZhorseZ	motorbikepersonzpotted plantZsheepZsofar   z
tv/monitorc                  C   sD   t d} ttD ],\}}|| |d d |d  d |d  < q| S )zoBuild the mapping from RGB to class indices for VOC labels.

    Defined in :numref:`sec_semantic_segmentation`i   r   rt  r+   r\   )r   r  ry   VOC_COLORMAP)colormap2labelrq   colormapr   r   r   voc_colormap2label  s    
r  c                 C   s^   |  tj} | dddddf d | dddddf  d | dddddf  }|| S )zlMap any RGB values in VOC labels to their class indices.

    Defined in :numref:`sec_semantic_segmentation`Nr   rt  r+   r\   )r   r   r  )r  r  r   r   r   r   voc_label_indices$  s
    2r  c                 C   s,   t | ||f\} }t j|f| }| |fS )z`Randomly crop both feature and label images.

    Defined in :numref:`sec_semantic_segmentation`)r   Zrandom_cropZ
fixed_crop)featurer  r  r  r  r   r   r   voc_rand_crop-  s    r  c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )VOCSegDatasetzaA customized dataset to load the VOC dataset.

    Defined in :numref:`sec_semantic_segmentation`c                    s   t dddg _t dddg _| _t||d\}} fdd	 |D  _ | _t	  _
td
tt j d  d S )Ng
ףp=
?gv/?gCl?gZd;O?gy&1?g?rW  c                    s   g | ]}  |qS r   )normalize_imagerp   r  r@   r   r   rs   >  s   z*VOCSegDataset.__init__.<locals>.<listcomp>r  	 examples)r   rJ   rgb_meanrgb_std	crop_sizer  filterr  rt   r  r  r   r%  r4   )rA   rk   r  r  r  rt   r   r@   r   rB   9  s    
zVOCSegDataset.__init__c                 C   s   | dd | j | j S )NrX     )r   r  r  )rA   r   r   r   r   r  D  s    zVOCSegDataset.normalize_imagec                    s    fdd|D S )Nc                    s8   g | ]0}|j d   jd  kr|j d  jd kr|qS r   r+   )rU   r  )rp   r   r@   r   r   rs   H  s    z(VOCSegDataset.filter.<locals>.<listcomp>r   )rA   r}   r   r@   r   r  G  s    zVOCSegDataset.filterc                 C   s<   t | j| | j| f| j \}}|dddt|| jfS )Nr\   r   r+   )r  r  rt   r  r!  r  r  )rA   r   r  r  r   r   r   r   L  s    

zVOCSegDataset.__getitem__c                 C   s
   t | jS r   r  r@   r   r   r   r,   R  s    zVOCSegDataset.__len__N)	rM   rN   rO   rP   rB   r  r  r   r,   r   r   r   r   r  5  s   r  c                 C   s`   t dtjdd}t  }tjjt	d||| dd|d}tjjt	d||| d|d}||fS )	z_Load the VOC semantic segmentation dataset.

    Defined in :numref:`sec_semantic_segmentation`r  Z	VOCdevkitZVOC2012Tdiscard)rf   
last_batchr   F)r  r   )
r   r  r   r   r   r   r   rg   ri   r  )rb   r  r  r   r   r   r   r   r   load_data_vocU  s&     
   
  r  zkaggle_cifar10_tiny.zipZ(2068874e4b9a9f0fb07ebe0ad2b29754449ccacdZcifar10_tinyc              	   C   sF   t | d}| dd }W 5 Q R X dd |D }tdd |D S )zcRead `fname` to return a filename to label dictionary.

    Defined in :numref:`sec_kaggle_cifar10`r   r+   Nc                 S   s   g | ]}|  d qS ),)rstripr   r  r   r   r   rs   n  s     z#read_csv_labels.<locals>.<listcomp>c                 s   s   | ]\}}||fV  qd S r   r   )rp   r   r  r   r   r   r  o  s     z"read_csv_labels.<locals>.<genexpr>)r   rI  rA  )r   r   rJ  ra  r   r   r   read_csv_labelsg  s    r  c                 C   s   t j|dd t| | dS )zQCopy a file into a target directory.

    Defined in :numref:`sec_kaggle_cifar10`Tr   N)r   r   shutilcopy)filename
target_dirr   r   r   copyfileq  s    r  c           	   	   C   s   t |  d d }tdt|| }i }ttj	
| dD ]}||dd  }tj	
| d|}t|tj	
| dd| ||ks|| |k rt|tj	
| dd| ||dd ||< qFt|tj	
| dd| qF|S )	zgSplit the validation set out of the original training set.

    Defined in :numref:`sec_kaggle_cifar10`rE   r+   r   r   r   train_valid_testZtrain_validvalid)rq  rr  r  most_commonr  r  floorr   listdirr   r   r   r  r   )	r  rt   Zvalid_ratior   Zn_valid_per_labelZlabel_countZ
train_filer  r   r   r   r   reorg_train_validx  s*       r  c              	   C   sB   t t j| dD ](}tt j| d|t j| ddd qdS )ziOrganize the testing set for data loading during prediction.

    Defined in :numref:`sec_kaggle_cifar10`testr  unknownN)r   r  r   r   r  )r  Z	test_filer   r   r   
reorg_test  s    r  zkaggle_dog_tiny.zipZ(0cb91d09b814ecdc07b50f31f8dcad3e81d6a86dZdog_tinyzptb.zipZ(319d85e578af0cdc590547f26231e4e31cdf1e42ptbc               	   C   sD   t d} ttj| d}| }W 5 Q R X dd |dD S )z[Load the PTB dataset into a list of text lines.

    Defined in :numref:`sec_word2vec_data`r  zptb.train.txtc                 S   s   g | ]}|  qS r   rM  rG  r   r   r   rs     s     zread_ptb.<locals>.<listcomp>r   )r   r  r   r   r   r   r   r   )r  r   raw_textr   r   r   read_ptb  s    
r  c                    sL   fdd| D } t |  t   fddfdd| D  fS )zKSubsample high-frequency words.

    Defined in :numref:`sec_word2vec_data`c                    s   g | ]} fd d|D qS )c                    s   g | ]} |  j kr|qS r   )ri  rg  rs  r   r   rs     s      (subsample.<locals>.<listcomp>.<listcomp>r   rG  rs  r   r   rs     s   zsubsample.<locals>.<listcomp>c                    s"   t ddtd |    k S )Nr   r+   g-C6?)r|  uniformr  r  rO  )rd  r  r   r   r    s    
zsubsample.<locals>.keepc                    s   g | ]} fd d|D qS )c                    s   g | ]} |r|qS r   r   rg  r  r   r   rs     s      r  r   rG  r  r   r   rs     s     )r   r[  rH   r  )	sentencesrt  r   )rd  r  r  rt  r   	subsample  s    

r  c              	      s   g g  }}| D ] t  dk r q| 7 }tt  D ]\}td|}tttd|| tt  |d | }|| | fdd|D  q4q||fS )z_Return center words and context words in skip-gram.

    Defined in :numref:`sec_word2vec_data`r\   r+   r   c                    s   g | ]} | qS r   r   )rp   r   rH  r   r   rs     s     z,get_centers_and_contexts.<locals>.<listcomp>)	r4   r   r|  r}  r/   r  r  removerF   )rv  max_window_sizecenterscontextsrq   Zwindow_sizerl  r   r  r   get_centers_and_contexts  s    

r
  c                   @   s    e Zd ZdZdd Zdd ZdS )RandomGeneratorz@Randomly draw among {1, ..., n} according to n sampling weights.c                 C   s.   t tdt|d | _|| _g | _d| _dS )z&Defined in :numref:`sec_word2vec_data`r+   r   N)r/   r   r4   
populationsampling_weights
candidatesrq   )rA   r  r   r   r   rB     s    zRandomGenerator.__init__c                 C   sJ   | j t| jkr,tj| j| jdd| _d| _ |  j d7  _ | j| j d  S )Nr  r  r   r+   )rq   r4   r  r|  choicesr  r  r@   r   r   r   draw  s      zRandomGenerator.drawN)rM   rN   rO   rP   rB   r  r   r   r   r   r    s   r  c           
         sx    fddt dtD }g t| }}| D ]B}g }t|t|| k rh| }	|	|kr8||	 q8|| q0|S )zTReturn noise words in negative sampling.

    Defined in :numref:`sec_word2vec_data`c                    s   g | ]}  | d  qS )g      ?)rm  ro   rd  rt  r   r   rs     s   z!get_negatives.<locals>.<listcomp>r+   )r   r4   r  r  rF   )
all_contextsrt  rd  r  r  all_negatives	generatorr	  Z	negativesnegr   r  r   get_negatives  s    r  c           
      C   s   t dd | D }g g g g f\}}}}| D ]\}}}t|t| }	||g7 }||| dg||	   g7 }|dg|	 dg||	   g7 }|dgt| dg|t|   g7 }q*tt|dt|t|t|fS )zpReturn a minibatch of examples for skip-gram with negative sampling.

    Defined in :numref:`sec_word2vec_data`c                 s   s$   | ]\}}}t |t | V  qd S r   r  )rp   r   r  r   r   r   r   r    s     zbatchify.<locals>.<genexpr>r   r+   rR   )r  r4   r   rV   r~  )
rg   r*  r  Zcontexts_negativesmasksrt   r  r   negativecur_lenr   r   r   batchify  s    
( r  c                    s   t  }tj|dd t| \}} fdd|D }t||\}}t| ||}	tj|||	}
tjj	|
| dt
t d}| fS )zrDownload the PTB dataset and then load it into memory.

    Defined in :numref:`subsec_word2vec-minibatch-loading`r  rb  c                    s   g | ]} | qS r   r   rG  rs  r   r   rs     s     z!load_data_ptb.<locals>.<listcomp>T)rf   Zbatchify_fnr   )r  r   rQ  r  r
  r  r   rg   rh   ri   r  r   )rb   r  Znum_noise_wordsr  Z
subsampledrd  rv  Zall_centersr  r  rl   r   r   rs  r   load_data_ptb  s6             r  zglove.6B.50d.zipZ(0b8703943ccdb6eb788e6f091b8946e82231bc4dzglove.6b.50dzglove.6B.100d.zipZ(cd43bfb07e44e6f27cbcc7bc9ae3d80284fdaf5azglove.6b.100dzglove.42B.300d.zipZ(b5116e234e9eb9076672cfeabf5469f3eec904fazglove.42b.300dzwiki.en.zipZ(c1816da3821ae9f43899be655002f6c723e91b88zwiki.enc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )TokenEmbeddingzToken Embedding.c                 C   s2   |  |\| _| _d| _dd t| jD | _dS )z!Defined in :numref:`sec_synonyms`r   c                 S   s   i | ]\}}||qS r   r   rY  r   r   r   rZ  *  s      z+TokenEmbedding.__init__.<locals>.<dictcomp>N)_load_embeddingr_  
idx_to_vecunknown_idxry   r`  )rA   embedding_namer   r   r   rB   %  s    zTokenEmbedding.__init__c           	   	   C   s   dgg  }}t |}ttj|dd^}|D ]R}| d}|d dd |dd  D  }}t|dkr0|	| |	| q0W 5 Q R X dgt|d  g| }|t 
|fS )	NrX  zvec.txtr   rB  r   c                 S   s   g | ]}t |qS r   r   )rp   elemr   r   r   rs   5  s     z2TokenEmbedding._load_embedding.<locals>.<listcomp>r+   )r   r  r   r   r   r   r  r   r4   rF   r~  )	rA   r"  r_  r   r  r   rH  elemsrO  r   r   r   r  -  s    
 
zTokenEmbedding._load_embeddingc                    s&    fdd|D } j t| }|S )Nc                    s   g | ]} j | jqS r   )r`  r   r!  rg  r@   r   r   rs   >  s   z.TokenEmbedding.__getitem__.<locals>.<listcomp>)r   r   r~  )rA   ra  rl  Zvecsr   r@   r   r   =  s
    
zTokenEmbedding.__getitem__c                 C   s
   t | jS r   rf  r@   r   r   r   r,   C  s    zTokenEmbedding.__len__N)rM   rN   rO   rP   rB   r  r   r,   r   r   r   r   r  #  s
   r  c                 C   sV   dg|  dg }dgt | d  }|dk	rN||dg 7 }|dgt |d  7 }||fS )z_Get tokens of the BERT input sequence and their segment IDs.

    Defined in :numref:`sec_bert`<cls><sep>r   r\   Nr+   r  )tokens_atokens_bra  segmentsr   r   r   get_tokens_and_segmentsF  s    r*  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )BERTEncoderz=BERT encoder.

    Defined in :numref:`subsec_bert_input_rep`r$  c           
   
      s|   t t| jf | t||| _td|| _t | _t	|D ]}	| j
t||||d q@| jjdd||fd| _d S )Nr\   Tpos_embeddingr+   )rU   )r/  r+  rB   r	   r  token_embeddingsegment_embeddingr;  r<  r   r   r   r5  r`   r   r,  )
rA   r  r  r1  r  r  r  r*  r9  r   r:  r   r   rB   V  s    

    zBERTEncoder.__init__c                 C   s^   |  || | }|| jj|jdd d d |jd d d f  }| jD ]}|||}qJ|S )Nr  r+   )r-  r.  r,  rg   r  rU   r<  )rA   ra  r)  r  r1   r=  r   r   r   r>  d  s
    0
zBERTEncoder.forward)r$  r?  r   r   r:  r   r+  R  s    r+  c                       s(   e Zd ZdZ fddZdd Z  ZS )MaskLMzWThe masked language model task of BERT.

    Defined in :numref:`subsec_bert_input_rep`c                    s^   t t| jf | t | _| jtj|ddd | jt  | jtj|dd d S r-  )	r/  r/  rB   r	   r;  mlpr   r  r3  )rA   r  r  r9  r:  r   r   rB   q  s    
zMaskLM.__init__c                 C   s`   |j d }|d}|j d }td|}t||}|||f }|||df}| |}|S )Nr+   rE   r   )rU   rV   r   r&  r  r0  )rA   r1   pred_positionsZnum_pred_positionsrb   Z	batch_idxZmasked_X	mlm_Y_hatr   r   r   r>  y  s    



zMaskLM.forwardr?  r   r   r:  r   r/  m  s   r/  c                       s(   e Zd ZdZ fddZdd Z  ZS )NextSentencePredzOThe next sentence prediction task of BERT.

    Defined in :numref:`subsec_mlm`c                    s"   t t| jf | td| _d S )Nr\   )r/  r3  rB   r	   r  r  r  r:  r   r   rB     s    zNextSentencePred.__init__c                 C   s
   |  |S r   )r  r+  r   r   r   r>    s    zNextSentencePred.forwardr?  r   r   r:  r   r3    s   r3  c                       s,   e Zd ZdZd fdd	Zd	ddZ  ZS )
	BERTModelz4The BERT model.

    Defined in :numref:`subsec_nsp`r$  c                    sL   t t|   t|||||||| _tj|dd| _t||| _	t
 | _d S )Nr  )r.  )r/  r4  rB   r+  r  r	   r  hiddenr/  mlmr3  nsp)rA   r  r  r1  r  r  r  r*  r:  r   r   rB     s       zBERTModel.__init__Nc              	   C   sT   |  |||}|d k	r$| ||}nd }| | |d d dd d f }|||fS r   )r  r6  r7  r5  )rA   ra  r)  r  r1  Z	encoded_Xr2  	nsp_Y_hatr   r   r   r>    s    "zBERTModel.forward)r$  )NNr?  r   r   r:  r   r4    s    	r4  )zHhttps://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zipZ(3c914d17d80b1459be871a5039ac23e752a53cbe
wikitext-2c              	   C   sH   t j| d}t|d}| }W 5 Q R X dd |D }t| |S )%Defined in :numref:`sec_bert-dataset`zwiki.train.tokensr   c                 S   s0   g | ](}t |d dkr|  d qS )z . r\   )r4   r   rE  rF  rG  r   r   r   rs     s    z_read_wiki.<locals>.<listcomp>)r   r   r   r   rI  r|  rf   )r  	file_namer   rJ  
paragraphsr   r   r   
_read_wiki  s    
r=  c                 C   s0   t   dk rd}nt t |}d}| ||fS )r:  r   TF)r|  choice)sentenceZnext_sentencer<  is_nextr   r   r   _get_next_sentence  s
    rA  c                 C   sv   g }t t| d D ]\}t| | | |d  |\}}}t|t| d |krPqt||\}	}
||	|
|f q|S )r:  r+   r   )r   r4   rA  r   r*  rF   )	paragraphr<  rt  r*  Znsp_data_from_paragraphrq   r'  r(  r@  ra  r)  r   r   r   _get_nsp_data_from_paragraph  s     
 
rC  c                 C   s   dd | D }g }t | |D ]f}t||kr4 qd}t   dk rJd}n"t   dk r`| | }nt |j}|||< ||| | f q ||fS )r:  c                 S   s   g | ]}|qS r   r   rg  r   r   r   rs     s     z'_replace_mlm_tokens.<locals>.<listcomp>Ng?<mask>r   )r|  rf   r4   r>  r_  rF   )ra  candidate_pred_positionsnum_mlm_predsrt  mlm_input_tokenspred_positions_and_labelsZmlm_pred_positionZmasked_tokenr   r   r   _replace_mlm_tokens  s"    


rI  c           
      C   s   g }t | D ]\}}|dkrq|| qtdtt| d }t| |||\}}t|dd d}dd |D }d	d |D }	|| |||	 fS )
,Defined in :numref:`subsec_prepare_mlm_data`)r%  r&  r+   333333?c                 S   s   | d S r   r   rS  r   r   r   r     rT  z+_get_mlm_data_from_tokens.<locals>.<lambda>rV  c                 S   s   g | ]}|d  qS r   r   rp   vr   r   r   rs     s     z-_get_mlm_data_from_tokens.<locals>.<listcomp>c                 S   s   g | ]}|d  qS r{  r   rN  r   r   r   rs     s     )ry   rF   r  roundr4   rI  r\  )
ra  rt  rE  rq   rO  rF  rG  rH  r1  Zmlm_pred_labelsr   r   r   _get_mlm_data_from_tokens  s$       rQ  c              	   C   sL  t |d }g g g   }}}g g g   }}}	g }
| D ]\}}}}}|tj||d g|t|   dd |tj|dg|t|   dd |tjt|dd |tj|dg|t|   dd |tjdgt| dg|t|   dd |	tj|dg|t|   dd |
t| q4||||||	|
fS )	rJ  rK  r  r  r   r   rX  rN  r   )rP  rF   r   rJ   r4   )examplesr*  rt  Zmax_num_mlm_predsall_token_idsall_segmentsr  all_pred_positionsall_mlm_weightsall_mlm_labels
nsp_labelsZ	token_idsr1  Zmlm_pred_label_idsr)  r@  r   r   r   _pad_bert_inputs	  sR    




  rZ  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	_WikiTextDatasetrJ  c                    s   dd |D }dd |D }t j|dddddgd	 _g }|D ]}|t|| j| q> fd
d|D }t|| j\ _ _ _ _	 _
 _ _d S )Nc                 S   s   g | ]}t j|d dqS )rL  r  )r   rP  )rp   rB  r   r   r   rs   "	  s
    z-_WikiTextDataset.__init__.<locals>.<listcomp>c                 S   s   g | ]}|D ]}|qqS r   r   )rp   rB  r?  r   r   r   rs   $	  s     r  r  rD  r%  r&  r  c                    s&   g | ]\}}}t | j||f qS r   )rQ  rt  )rp   ra  r)  r@  r@   r   r   rs   .	  s   
)r   rQ  rt  extendrC  rZ  rT  rU  r  rV  rW  rX  rY  )rA   r<  r*  r  rS  rB  r   r@   r   rB   	  s@       
   
    z_WikiTextDataset.__init__c                 C   s<   | j | | j| | j| | j| | j| | j| | j| fS r   )rT  rU  r  rV  rW  rX  rY  r   r   r   r   r   7	  s      z_WikiTextDataset.__getitem__c                 C   s
   t | jS r   )r4   rT  r@   r   r   r   r,   =	  s    z_WikiTextDataset.__len__Nr  r   r   r   r   r[  	  s   r[  c                 C   sD   t  }t dd}t|}t||}tjj|| d|d}||jfS )zNLoad the WikiText-2 dataset.

    Defined in :numref:`subsec_prepare_mlm_data`r9  Tr   )	r   r   r  r=  r[  r   rg   ri   rt  )rb   r*  r   r  r<  	train_setr   r   r   r   load_data_wiki@	  s    
r^  c
                 C   s   g g g   }
}}t |||||||	D ]\}}}}}}}| |||d|\}}}||d|f|d|d}| | d  }|||}| }|
| || |||  t  q$|
||fS )z)Defined in :numref:`sec_bert-pretraining`rE   rR   g:0yE>)r6   rV   rH   r  rF   r   Zwaitall)r   r   r  Ztokens_X_shardsZsegments_X_shardsZvalid_lens_x_shardsZpred_positions_X_shardsZmlm_weights_X_shardsZmlm_Y_shardsZnsp_y_shardsZmlm_lsZnsp_lsr  Ztokens_X_shardZsegments_X_shardZvalid_lens_x_shardZpred_positions_X_shardZmlm_weights_X_shardZmlm_Y_shardZnsp_y_shardr   r2  r8  Zmlm_lZnsp_lr   r   r   _get_batch_loss_bertL	  sD            
 



r_  )z>http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gzZ(01ada507287d82875905620988597833ad4e0903aclImdbc           	      C   s   g g  }}dD ]}t j| |r"dnd|}t |D ]Z}tt j||d<}| ddd}|| ||dkr~d	nd
 W 5 Q R X q4q||fS )z_Read the IMDb review dataset text sequences and labels.

    Defined in :numref:`sec_sentiment`)rx  r  r   r  r   zutf-8r   r  rx  r+   r   )	r   r   r   r  r   r   decoder  rF   )	r  rk   rg   rt   r  Zfolder_namefiler   Zreviewr   r   r   	read_imdbn	  s    

$rc    c                    s   t dd}t|d}t|d}t j|d dd}t j|d dd}t j|ddt fd	d
|D }t fdd
|D }t ||d f| }	t j||d f| dd}
|	|
fS )zlReturn data iterators and the vocabulary of the IMDb review dataset.

    Defined in :numref:`sec_sentiment`r`  TFr   rL  r  r  r  c                    s"   g | ]}t |  d  qS r  r   r  rG  r  r   r   rs   	  s     z"load_data_imdb.<locals>.<listcomp>c                    s"   g | ]}t |  d  qS r  re  rG  r  r   r   rs   	  s     r+   rW  )r   r  rc  rP  rQ  r   rJ   rm   )rb   ry  r  
train_data	test_dataZtrain_tokensZtest_tokensZtrain_featuresZtest_featuresr   r   r   r  r   load_data_imdb}	  s"    

rh  c                 C   sD   t j||  t d}t j| |dddd}|dkr@dS dS )zUPredict the sentiment of a text sequence.

    Defined in :numref:`sec_sentiment_rnn`r  r+   rE   r   positiver  )r   rJ   r   r   r	  r   rV   )r   rt  sequencer  r   r   r   predict_sentiment	  s    rk  )z3https://nlp.stanford.edu/projects/snli/snli_1.0.zipZ(9fcde07509c7e87ec61c640c1b2753d9041758e4SNLIc              	      s   dd  ddddt j| |r$dnd}t|d	 }d
d | dd D }W 5 Q R X  fdd|D } fdd|D }fdd|D }|||fS )zRead the SNLI dataset into premises, hypotheses, and labels.

    Defined in :numref:`sec_natural-language-inference-and-dataset`c                 S   s2   t dd| } t dd| } t dd| } |  S )Nz\(r  z\)z\s{2,}rB  )rC  rD  rE  )r  r   r   r   extract_text	  s    zread_snli.<locals>.extract_textr   r+   r\   )
entailmentcontradictionneutralzsnli_1.0_train.txtzsnli_1.0_test.txtr   c                 S   s   g | ]}| d qS )r  rM  rp   rowr   r   r   rs   	  s     zread_snli.<locals>.<listcomp>Nc                    s$   g | ]}|d  kr |d qS r  r   rq  rm  	label_setr   r   rs   	  s      c                    s$   g | ]}|d  kr |d qS )r   r\   r   rq  rs  r   r   rs   	  s      c                    s$   g | ]}|d   kr |d   qS rM  r   rq  )rt  r   r   rs   	  s      )r   r   r   r   rI  )r  rk   r;  r   rowspremises
hypothesesrt   r   rs  r   	read_snli	  s    $rx  c                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )SNLIDatasetzsA customized dataset to load the SNLI dataset.

    Defined in :numref:`sec_natural-language-inference-and-dataset`Nc                 C   s   || _ t|d }t|d }|d krDtj|| ddgd| _n|| _| || _| || _t	|d | _
tdtt| j d  d S )	Nr   r+   r  r  r  r\   r  r  )ry  r   rP  rQ  rt  _padrv  rw  r   rJ   rt   r   r%  r4   )rA   rl   ry  rt  Zall_premise_tokensZall_hypothesis_tokensr   r   r   rB   	  s    
 
zSNLIDataset.__init__c                    s   t  fdd|D S )Nc                    s(   g | ] }t  j|  j jd  qS r  )r   r  rt  ry  rG  r@   r   r   rs   	  s     z$SNLIDataset._pad.<locals>.<listcomp>)r   rJ   )rA   rJ  r   r@   r   rz  	  s    zSNLIDataset._padc                 C   s   | j | | j| f| j| fS r   )rv  rw  rt   r   r   r   r   r   	  s    zSNLIDataset.__getitem__c                 C   s
   t | jS r   )r4   rv  r@   r   r   r   r,   	  s    zSNLIDataset.__len__)N)rM   rN   rO   rP   rB   rz  r   r,   r   r   r   r   ry  	  s
   
ry  r  c           
      C   sr   t  }t d}t|d}t|d}t||}t|||j}tjj|| d|d}tjj|| d|d}	||	|jfS )zDownload the SNLI dataset and return data iterators and vocabulary.

    Defined in :numref:`sec_natural-language-inference-and-dataset`rl  TFr   )	r   r   r  rx  ry  rt  r   rg   ri   )
rb   ry  r   r  rf  rg  r]  test_setr   r   r   r   r   load_data_snli	  s    



r|  c                    s0   t t fdd| D  } | tjj| ddfS )zwSplit multi-input `X` and `y` into multiple devices.

    Defined in :numref:`sec_natural-language-inference-attention`c                    s   g | ]}t jj| d dqS FZ
even_splitr   rl  rm  r  r
  r   r   rs   	  s     z,split_batch_multi_inputs.<locals>.<listcomp>Fr~  )r/   r6   r   rl  rm  rn  r   r
  r   split_batch_multi_inputs	  s    
r  c                 C   sj   t j|| t d}t j|| t d}t j| |d|dgdd}|dkrZdS |dkrfdS dS )	zPredict the logical relationship between the premise and hypothesis.

    Defined in :numref:`sec_natural-language-inference-attention`r  )r+   rE   r+   r   r   rn  ro  rp  )r   rJ   r   r	  r   rV   )r   rt  ZpremiseZ
hypothesisr  r   r   r   predict_snli	  s    r  )z:https://files.grouplens.org/datasets/movielens/ml-100k.zipZ(cd4dcac4241c8a4ad7badc7ca635da8a69dddb83ml-100kc                  C   s\   t d} ddddg}tjtj| dd|dd	}|j j	d
 }|j
 j	d
 }|||fS )Nr  user_iditem_idrating	timestampzu.datar  python)namesenginer   )r   r  r  r  r   r   r   r  r  rU   r  )r  r  rg   	num_users	num_itemsr   r   r   read_data_ml100k	  s    
r  r|  rO  c                    s@  |dkri i g   }}}|   D ]h}|d |d |d |d f\}	}
}}||	g |	|
||f |	|ksz||	 d |k r |
||f||	< q td|d D ]}	|t||	 dd d	 qd
d | D   fdd|D }t|}t  nFdd t	j
ddt| d| k D }dd |D }| | | |  } | fS )z3Split the dataset in random mode or seq-aware mode.	seq-awarer+   r\   r   r   rE   c                 S   s   | d S )Nr   r   r  r   r   r   r   	
  rT  z#split_data_ml100k.<locals>.<lambda>rL  c                 S   s   g | ]\}}|f|qS r   r   )rp   rV  r  r   r   r   rs   

  s     z%split_data_ml100k.<locals>.<listcomp>c                    s   g | ]}| kr|qS r   r   )rp   r  rg  r   r   rs   
  s      c                 S   s   g | ]}|d krdndqS )r+   TFr   r  r   r   r   rs   
  s     r   c                 S   s   g | ]
}| qS r   r   r  r   r   r   rs   
  s     )
itertuples
setdefaultrF   r   r\  r\  r]  r  Z	DataFramer   r|  r   r4   )rg   r  r  
split_mode
test_ratioZtrain_itemsZ
test_itemsZ
train_listrH  urq   r  rC   rf  maskZneg_maskr   r  r   split_data_ml100k	  s.    $
  r  explicitc                 C   s   g g g   }}}|dkr&t ||fni }|  D ]}t|d d t|d d  }	}
|dkrlt|d nd}||	 ||
 || |dkr||	g |
 q2|||
|	f< q2||||fS )Nr  r+   r\   r   implicit)r   r  r  rn   rF   r  )rg   r  r  feedbackZusersr]  r  ZinterrH  Z
user_index
item_indexr  r   r   r   load_data_ml100k
  s    "


r  r  rt  c                 C   s   t  \}}}t|||| |\}}t||||\}	}
}}t||||\}}}}tjt|	t|
t|}tjt|t|t|}tjj|dd|d}tjj||d}||||fS )NTrolloverrf   r  rb   r  )	r  r  r  r   rg   rh   r   rJ   ri   )r  r  r  rb   rg   r  r  rf  rg  Ztrain_uZtrain_iZtrain_rr   Ztest_uZtest_iZtest_rr]  r{  r   r   r   r   r   split_and_load_ml100k$
  sR                     r  c                    s  t  }	t jdd|gddgddgd}
t|D ]}t dd	 }}t|D ]\}}|	  g }t|trp|n|g}|D ]}|	t
j|| qzt|dkr|dd
 n|}|d
 }t 4 fddt| D } fddt||D }W 5 Q R X dd |D  |tdd |D  t| 7 }||d jd  |||d jd |d j |	  qLt|dkr|||d |}n|||}||d  }|
|d ||f q.td|d |d  dd|d t|d | |	  ddt|  d S )Nr   r+   r   r\   r   z	test RMSEr   r   r   rE   c                    s   g | ]} | qS r   r   rp   trx  r   r   rs   I
  s     z'train_recsys_rating.<locals>.<listcomp>c                    s   g | ]\}} ||qS r   r   )rp   r  r  r  r   r   rs   J
  s     c                 S   s   g | ]}|  qS r   r   r  r   r   r   rs   K
  s     c                 S   s   g | ]}|  qS r   asnumpyr  r   r   r   rs   L
  s     Z	inter_mattrain loss r  z, test RMSE r   r!  )r   r=   r   r   r   ry   r?   r.   r/   rF   r   rl  rm  r4   r   r   r6   rH   r  r   rU   r   r   rG   r   r%  )r   r   r   r   r&  r   r  	evaluatorr9  r'  r   r   r   r   rq   r  
input_datarO  Z
train_featZtrain_labelr   r  Z	test_rmser)  r   r  r   train_recsys_rating8
  s>    
""
$r  c                       s&   e Zd Zd fdd	Zdd Z  ZS )BPRLossNr   c                    s    t t| jf d dd| d S Nr   )weight
batch_axis)r/  r  rB   rA   r  r  r9  r:  r   r   rB   ]
  s    zBPRLoss.__init__c                 C   s*   || }t jt t|ddd }|S )Nr   T)Zkeepdims)r   rH   r  r   Zsigmoid)rA   ri  r  	distancesr   r   r   r   r>  `
  s    zBPRLoss.forward)Nr   rM   rN   rO   rB   r>  r@  r   r   r:  r   r  \
  s   r  c                       s(   e Zd Zd fdd	Zd	ddZ  ZS )
HingeLossbRecNr   c                    s    t t| jf d dd| d S r  )r/  r  rB   r  r:  r   r   rB   f
  s    zHingeLossbRec.__init__r+   c                 C   s$   || }t t | | d}|S r   )r   rH   r  )rA   ri  r  marginr  r   r   r   r   r>  j
  s    zHingeLossbRec.forward)Nr   )r+   r  r   r   r:  r   r  e
  s   r  c                    st    fddt | d | D } fddt | D }t| d }t|dkrdd||d d   | nd}t||fS )Nc                    s$   g | ]\}}|t  kr||fqS r   r  rp   r   valtest_matrixr   r   rs   p
  s    zhit_and_auc.<locals>.<listcomp>c                    s$   g | ]\}}|t  kr||fqS r   r  r  r  r   r   rs   r
  s    r+   r   rN  )ry   r4   )Z
rankedlistr  r  Zhits_kZhits_allr  aucr   r  r   hit_and_auco
  s
    (r  c                    s  i i g g f\}}}	}
t dd t|D }t|D ]lt|t |t  }g g g g f\}}fdd|D  fdd|D  |tg |d k	r||d d f  |tg tj	j
tj	j| dddd}t|D ]6\}} fd	d|D }|fd
dt| D  qdd |D }tt|}t|dd dd|< dd | D |< t| | d}|	|d  |
|d  q2tt|	tt|
fS )Nc                 S   s   g | ]}|qS r   r   ro   r   r   r   rs   {
  s     z$evaluate_ranking.<locals>.<listcomp>c                    s   g | ]}  |qS r   rF   ro   )item_idsr   r   rs   
  s     c                    s   g | ]}  qS r   r  r   )r  user_idsr   r   rs   
  s     Fr  i   r  c                    s   g | ]}t jj| d dqS r}  r  rN  r
  r   r   rs   
  s   c                    s   g | ]}t  |  qS r   )r/   r  r  rx  r   r   rs   
  s     c                 S   s   g | ]}|D ]}|qqS r   r   )rp   Zsublistr  r   r   r   rs   
  s       c                 S   s   | d S rR  r   )r  r   r   r   r   
  rT  z"evaluate_ranking.<locals>.<lambda>TrU  c                 S   s   g | ]}|d  qS rM  r   )rp   r   r   r   r   rs   
  s     r  r   r+   )r  r   r/   rn   r\  r   rJ   rF   r   rg   ri   rh   ry   r6   r\  r  r  )r   Z
test_inputseqr  r  r  r  Zranked_listZranked_itemshit_rater  Z	all_itemsZ	neg_itemsr:   r  Ztest_data_iterrk  r  Zitem_scorestempr   )r  r  r   r  r  r   evaluate_rankingx
  s<    
  
r  r+   c                    s  t  dd  }}}t jdd|gddgddgd}t|D ]x}t dd }}t|D ]\}}g }|D ]}|tj	||	 qjt
 d fd	d
t|dd  D }fdd
t|dd |d f D } fdd
t||D }W 5 Q R X dd
 |D  |tdd
 |D  t|	 7 }||d jd  |||d jd |d j |  qXt
 D |d | dkr|
||||||	\}}||d ||f W 5 Q R X q:td|d |d  ddt|ddt|d t|d | |  ddt|	  d S )Nr   r   r+   ztest hit rateztest AUCr   r   r   c                    s   g | ]} | qS r   r   r  rx  r   r   rs   
  s     z!train_ranking.<locals>.<listcomp>rE   c                    s   g | ]} | qS r   r   r  rx  r   r   rs   
  s     rD  c                    s   g | ]\}} ||qS r   r   )rp   r  r   r  r   r   rs   
  s     c                 S   s   g | ]}|j d dqS )F)Zretain_graphr  r  r   r   r   rs   
  s     c                 S   s   g | ]}|  qS r   r  r  r   r   r   rs   
  s     r  r  z, test hit rate z, test AUC r\   r   r!  )r   r=   r   r   r   ry   rF   r   rl  rm  r   r   r6   rH   r  r4   r   rU   r   r   rG   Zpredict_moder   r   r%  )r   r   r   r   r&  Ztest_seq_iterr  r  r   r  r  r  Z	eval_stepr'  r  r  r   r   r   r   rq   r  r  rO  Zp_posZp_negr  r   r  r   train_ranking
  s@    
""

   4r  zctr.zipZ(e18327c48c8e8e5c23da714dd614e390d369843fZctrc                   @   s&   e Zd Zd
ddZdd Zdd	 ZdS )
CTRDatasetNr   "   c              	      s  |di   | _ | _| _tdd }|| | _| _tj| j tjd| _	t
|}|D ]}i }	|dd}
t|
| j d kr~qRtddg}d|t|
d < t|
d g|	d< td| j d D ]2}|| |
|   d7  < |	d	g |
|  q|	| j| j< | jd | _qRW 5 Q R X | jd krn| jd krn fd
d| D }dd | D | _dd | D | _| j D ] \}}t|d | j	|d < qxtdt| j	 d d | _d S )Nr   c                   S   s   t tS r   )r   rn   r   r   r   r   r   
  rT  z%CTRDataset.__init__.<locals>.<lambda>rR  r   r  r+   r;   r:   c                    s(   i | ] \}}| fd d|  D qS )c                    s   h | ]\}}| kr|qS r   r   )rp   featr  min_thresholdr   r   	<setcomp>
  s
      z1CTRDataset.__init__.<locals>.<dictcomp>.<setcomp>)r]  )rp   rq   Zcntr  r   r   rZ  
  s    z'CTRDataset.__init__.<locals>.<dictcomp>c                 S   s$   i | ]\}}|d d t |D qS )c                 S   s   i | ]\}}||qS r   r   )rp   r   Zfeat_vr   r   r   rZ  
  s      z2CTRDataset.__init__.<locals>.<dictcomp>.<dictcomp>)ry   rp   rq   Zfeat_valuesr   r   r   rZ  
  s    c                 S   s   i | ]\}}|t |qS r   r  r  r   r   r   rZ  
  s      rE   )r   )Z	NUM_FEATScountrg   r   feat_mapperdefaultsr   r  int64Z
field_dimsr   r  r   r4   rX  rn   r   r  rF   r]  rJ   rK   r  offsets)rA   Z	data_pathr  r  r  Znum_featZ	feat_cntsr   rH  instancer  r  rq   fmr   r  r   rB   
  s>    

zCTRDataset.__init__c                 C   s   | j S r   )r  r@   r   r   r   r,   
  s    zCTRDataset.__len__c                    s>   t  fddt j| d D }| j  j| d fS )Nc                    s0   g | ](\}} j |d   | j|d   qS r{  )r  r   r  )rp   rq   rO  r@   r   r   rs   
  s   z*CTRDataset.__getitem__.<locals>.<listcomp>r:   r;   )r   rJ   ry   rg   r  )rA   r   r  r   r@   r   r   
  s    zCTRDataset.__getitem__)NNr   r  )rM   rN   rO   rB   r,   r   r   r   r   r   r  
  s       
r  c              	   C   s   | j d }tj|f| jd}tj|f| jd}t : || }	||}
||
 }||	|||| d }W 5 Q R X |  |	| t
| S )z=Update discriminator.

    Defined in :numref:`sec_basic_gan`r   r  r\   )rU   r   onesr  r  r   r   r  r   r   r   rH   )r1   Znet_Dnet_Gr   Z	trainer_Drb   r  r  Zreal_Yfake_Xfake_YZloss_Dr   r   r   update_D
  s    

"
r  c           
   	   C   sh   | j d }tj|f| jd}t   || }||}|||}	W 5 Q R X |	  || t|		 S )z9Update generator.

    Defined in :numref:`sec_basic_gan`r   r  )
rU   r   r  r  r   r   r   r   r   rH   )
r  r  r  r   Z	trainer_Grb   r  r  r  Zloss_Gr   r   r   update_G
  s    


r  zpokemon.zipZ(c065c0e2593b8b161a2d7873e42418bf6a21106cZpokemonc                 C   s   | j S r   )r   r   r   r   r   r     rT  r   c                 C   s   | j S r   )r  r  r   r   r   r     rT  c                 O   s   | j ||S r   r  r:   r   r9  r   r   r   r     rT  c                 O   s   | j ||S r   )rV   r  r   r   r   r     rT  c                 O   s   | j ||S r   )Zas_in_contextr  r   r   r   r      rT  c                 O   s   | j ||S r   )rH   r  r   r   r   r   !  rT  c                 O   s   | j ||S r   )r   r  r   r   r   r   "  rT  c                 O   s   | j ||S r   )r   r  r   r   r   r   #  rT  )r   )NNNNNNr'   r'   r(   r   N)T)Nrv   )N)r   )N)r   )N)rL  )rE   )Fr  )F)N)r  )F)Nr  r  )rB  N)r  rT  )r\   )r\   )NN)r   )r  )r   r  )T)T)N)rd  )r  )r|  rO  )r  )r  r  rO  rt  )r+   )Z	USE_MXNETZUSE_PYTORCHZUSE_TENSORFLOWrA  r   ZDATA_URLZmxnetr   r   r   r   r   r   r   Zmxnet.gluonr	   r
   Zmxnet.gluon.data.visionr   r  Z	nn_Modulerq  r   r  r   r|  rC  r  r   r   rC   r   r   Zpandasr  r   ZIPythonr   Z
matplotlibr   r   Zmatplotlib_inliner   modulesrM   r   r   r   r&   r7   r=   rZ   r[   r^   rd   rm   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r	  r  r  r  r*  r+  rK  rP  rQ  r[  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   ZSoftmaxCELossr  r  r  r  r  r  r  r  r  r  r  r  r#  r,  r2  r5  r9  r@  rK  rR  rZ  rd  rf  rg  ro  rv  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rg   ZDatasetr  r  r  r  ZVOC_CLASSESr  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r*  r+  r/  r3  r4  r=  rA  rC  rI  rQ  rZ  r[  r^  r_  rc  rh  rk  rx  ry  r|  r  r  r  r  r  r  r  ZLossr  r  r  r  r  r  r  r  r   r!  r  r  r&  rQ  r(  sinhr)  coshr  linspacer  r  rJ   r~  rS   randdotrT   r  rX  r  r  r  abseyer{   rV   tor   r   r   r   r   r   r   <module>   s  $
$          
!			)
%
,
  
	

!
  
(



 

'# 
#

(
   
 
     
  
              
	 

	






#
$
      $	
	!
(
