U
    0J<bE                    @   s*  e  Zd ZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ e	jZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlmZ ddlZddl Z dd	l!m"Z" dd
l#m$Z% ddl&m'Z' ej(e) Z*ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd Z+dddZ,dd Z-dddZ.G dd dZ/dd Z0dd Z1dd Z2dd  Z3dd"d#Z4d$d% Z5dd'd(Z6d)d* Z7dd+d,Z8d-d. Z9d/d0 Z:G d1d2 d2Z;d3d4 Z<G d5d6 d6Z=d7d8 Z>dd:d;Z?d<d= Z@e  Zd ZejABd>d?fd@dAZCddBdCZDdDdE ZEedF dGfedH< edI dJfedK< ddLdMZFdNdO ZGdPdQ ZHddRdSZIdTdU ZJG dVdW dWe	jZKe*jdX dYfe*jdZ< d[d\ ZLdd^d_ZMG d`da daZNdbdc ZOddedfZPdgdh ZQdidj ZRG dkdl dlZSddodpZTG dqdr drZUdsdt ZVdudv ZWdwdx ZXddydzZYG d{d| d|e	jZZe*jd} d~fe*jd< dd Z[dd Z\dddZ]dd Z^dd Z_dd Z`dddZaG dd de	jZbG dd de	jZcG dd de	jZdG dd de*jbZedddZfG dd de	jgZhdd ZidddZjdd ZkdddZldd ZmG dd de	jZnG dd de	jZoG dd de*jcZpG dd de	jZqdd Zrdd ZsG dd de	jZtG dd de	jZuG dd de	jZvG dd de	jZwG dd de*jbZxdd ZydddZzddÄ Z{e*jd dfe*jd< dddʄZ|ddd̈́Z}dddЄZ~G dd҄ d҃ZddԄ ZdddׄZddل Ze*G fddۄZe*jd dfe*jd< dd Zdd Zdd Zdd ZdddZdd ZdddZdddZdd Zdd Zdd ZdddZe*jd dfe*jd< dddZG dd  d ejjjZdd Ze*jd dfe*jd< dddZdddgdddgdddgdddgdddgdddgdddgdddgd	ddgd
ddgd	ddgd
ddgd	ddgd
ddgd	ddgd
ddgdd	dgdd	dgdd
dgdd
dgdd	dggZdddddddddddddddddddddgZd d! Zd"d# Zd$d% ZG d&d' d'ejjjZd(d) Ze*jd* d+fe*jd,< d-d. Zd/d0 Zd1d2 Zd3d4 Ze*jd5 d6fe*jd7< e*jd8 d9fe*jd:< d;d< Zd=d> Zd?d@ ZG dAdB dBZdCdD ZdEdF ZdGdH Ze*jdI dJfe*jdK< e*jdL dMfe*jdN< e*jdO dPfe*jdQ< e*jdR dSfe*jdT< G dUdV dVZddWdXZG dYdZ dZe	jZG d[d\ d\e	jZG d]d^ d^e	jZG d_d` d`e	jZdae*jdb< dcdd Zdedf Zdgdh Zdidj Zdkdl Zdmdn ZG dodp dpejjjZdqdr Zdsdt Zdue*jdv< dwdx Zddzd{Zd|d} Zd~e*jd< dd ZG dd dejjjZdddZdd Zdd Zdd Ze*jd dfe*jd< ejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZejZ֐dd Zdd Zאdd Zؐdd Zِdd Zڐdd Zېdd Zܐdd ZdS (  z,http://d2l-data.s3-accelerate.amazonaws.com/    N)Image)nn)
functionaldata)
transforms)defaultdict)display)pyplot)backend_inlinec                   C   s   t d dS )zWUse the svg format to display a plot in Jupyter.

    Defined in :numref:`sec_calculus`svgN)r   set_matplotlib_formats r   r   8/home/d2l-worker/workspace/d2l-en-release/./d2l/torch.pyuse_svg_display1   s    r   g      @      @c                 C   s   t   | tjjd< dS )zJSet the figure size for matplotlib.

    Defined in :numref:`sec_calculus`zfigure.figsizeN)r   d2lpltrcParamsfigsizer   r   r   set_figsize7   s    r   c                 C   sV   |  | | | | | | | | | | | |rJ| | |   dS )zCSet the axes for matplotlib.

    Defined in :numref:`sec_calculus`N)
set_xlabel
set_ylabel
set_xscale
set_yscaleset_xlimset_ylimlegendgrid)axesxlabelylabelxlimylimxscaleyscaler   r   r   r   set_axes>   s    






r(   linear-zm--zg-.zr:c              	   C   s   |dkrg }t |
 |r|ntj }dd }|| r<| g} |dkrZg gt|  |  } }n||rh|g}t| t|kr| t| } |  t| ||	D ].\}}}t|r|||| q||| qt|||||||| dS )z8Plot data points.

    Defined in :numref:`sec_calculus`Nc                 S   s.   t | dr| jdkp,t| to,t | d d S )Nndim   r   __len__)hasattrr,   
isinstancelistXr   r   r   has_one_axisY   s    zplot.<locals>.has_one_axis)	r   r   r   gcalenclazipplotr(   )r3   Yr"   r#   r   r$   r%   r&   r'   fmtsr   r!   r4   xyfmtr   r   r   r9   L   s&    r9   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )TimerzRecord multiple running times.c                 C   s   g | _ |   dS )z(Defined in :numref:`subsec_linear_model`N)timesstartselfr   r   r   __init__o   s    zTimer.__init__c                 C   s   t   | _dS )zStart the timer.N)timetikrB   r   r   r   rA   t   s    zTimer.startc                 C   s    | j t | j  | j d S )z-Stop the timer and record the time in a list.)r@   appendrE   rF   rB   r   r   r   stopx   s    z
Timer.stopc                 C   s   t | jt| j S )zReturn the average time.)sumr@   r6   rB   r   r   r   avg}   s    z	Timer.avgc                 C   s
   t | jS )zReturn the sum of time.)rJ   r@   rB   r   r   r   rJ      s    z	Timer.sumc                 C   s   t | j  S )zReturn the accumulated time.)nparrayr@   cumsumtolistrB   r   r   r   rN      s    zTimer.cumsumN)
__name__
__module____qualname____doc__rD   rA   rI   rK   rJ   rN   r   r   r   r   r?   m   s   r?   c                 C   sJ   t dd|t| f}t || | }|t dd|j7 }|t |dfS )zIGenerate y = Xw + b + noise.

    Defined in :numref:`sec_linear_scratch`r   r-   {Gz?rG   r-   )r   normalr6   matmulshapereshape)wbnum_examplesr3   r=   r   r   r   synthetic_data   s    r]   c                 C   s   t | || S )zIThe linear regression model.

    Defined in :numref:`sec_linear_scratch`)r   rW   )r3   rZ   r[   r   r   r   linreg   s    r^   c                 C   s   | t || j d d S )z:Squared loss.

    Defined in :numref:`sec_linear_scratch`   )r   rY   rX   )y_hatr=   r   r   r   squared_loss   s    ra   c              	   C   s>   t  , | D ] }|||j | 8 }|j  qW 5 Q R X dS )zSMinibatch stochastic gradient descent.

    Defined in :numref:`sec_linear_scratch`N)torchno_gradgradZzero_)paramslr
batch_sizeparamr   r   r   sgd   s    
ri   Tc                 C   s   t j|  }t j|||dS )zOConstruct a PyTorch data iterator.

    Defined in :numref:`sec_linear_concise`shuffle)r   ZTensorDataset
DataLoader)data_arraysrg   is_traindatasetr   r   r   
load_array   s    
rp   c              
      s*   ddddddddd	d
g
  fdd| D S )z]Return text labels for the Fashion-MNIST dataset.

    Defined in :numref:`sec_fashion_mnist`zt-shirttrouserpulloverdresscoatsandalshirtsneakerbagz
ankle bootc                    s   g | ]} t | qS r   )int.0itext_labelsr   r   
<listcomp>   s     z,get_fashion_mnist_labels.<locals>.<listcomp>r   )labelsr   r}   r   get_fashion_mnist_labels   s    
    r         ?c                 C   s   || || f}t jj|||d\}}| }tt|| D ]b\}\}	}
t|
rb|	|
	  n
|	|
 |	j
 d |	j
 d |r<|	||  q<|S )zBPlot a list of images.

    Defined in :numref:`sec_fashion_mnist`r   F)r   r   subplotsflatten	enumerater8   rb   	is_tensorimshownumpyr!   	get_xaxisset_visible	get_yaxis	set_title)imgsnum_rowsnum_colstitlesscaler   _r!   r|   aximgr   r   r   show_images   s    

r   c                   C   s   dS )zMUse 4 processes to read the data.

    Defined in :numref:`sec_fashion_mnist`   r   r   r   r   r   get_dataloader_workers   s    r   c                 C   sz   t  g}|r |dt | t |}tjjdd|dd}tjjdd|dd}tj	|| dt
 dtj	|| dt
 dfS )zlDownload the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_fashion_mnist`r   z../dataT)roottrain	transformdownloadFrk   num_workers)r   ToTensorinsertResizeComposetorchvisiondatasetsFashionMNISTr   rl   r   )rg   resizetransmnist_train
mnist_testr   r   r   load_data_fashion_mnist   s.    

      

r   c                 C   sT   t | jdkr*| jd dkr*tj| dd} t| |j|k}ttt||jS )zXCompute the number of correct predictions.

    Defined in :numref:`sec_softmax_scratch`r-   axis)r6   rX   r   argmaxastypedtypefloat
reduce_sum)r`   r=   cmpr   r   r   accuracy   s    r   c              	   C   sl   t | tjjr|   td}t 0 |D ]$\}}|t| ||t	
| q,W 5 Q R X |d |d  S )z\Compute the accuracy for a model on a dataset.

    Defined in :numref:`sec_softmax_scratch`r_   r   r-   )r0   rb   r   ModuleevalAccumulatorrc   addr   r   size)net	data_itermetricr3   r=   r   r   r   evaluate_accuracy   s    
(r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r   z)For accumulating sums over `n` variables.c                 C   s   dg| | _ dS )(Defined in :numref:`sec_softmax_scratch`        Nr   )rC   nr   r   r   rD      s    zAccumulator.__init__c                 G   s   dd t | j|D | _d S )Nc                 S   s   g | ]\}}|t | qS r   r   )r{   ar[   r   r   r   r      s     z#Accumulator.add.<locals>.<listcomp>)r8   r   rC   argsr   r   r   r      s    zAccumulator.addc                 C   s   dgt | j | _d S )Nr   )r6   r   rB   r   r   r   reset   s    zAccumulator.resetc                 C   s
   | j | S Nr   rC   idxr   r   r   __getitem__  s    zAccumulator.__getitem__N)rP   rQ   rR   rS   rD   r   r   r   r   r   r   r   r      s
   r   c           	      C   s   t | tjjr|   td}|D ]\}}| |}|||}t |tjjrh|  |	 
  |  n| 
  ||jd  |t| t|||  q"|d |d  |d |d  fS )zUThe training loop defined in Chapter 3.

    Defined in :numref:`sec_softmax_scratch`   r   r_   r-   )r0   rb   r   r   r   r   optim	Optimizer	zero_gradmeanbackwardsteprJ   rX   r   r   r   numel)	r   
train_iterlossupdaterr   r3   r=   r`   lr   r   r   train_epoch_ch3  s    

"r   c                   @   s"   e Zd ZdZdddZd	d
 ZdS )AnimatorzFor plotting data in animation.Nr)   r*   r-   r   c                    sz    dkrg  t   t jj|	|
|d\__|	|
 dkrDjg_ fdd_dd|  ___	dS )r   Nr   r-   c                
      s   t jd  S Nr   )r   r(   r!   r   r   rC   r"   r$   r&   r#   r%   r'   r   r   <lambda>/  s          z#Animator.__init__.<locals>.<lambda>)
r   r   r   r   figr!   config_axesr3   r:   r;   )rC   r"   r#   r   r$   r%   r&   r'   r;   nrowsncolsr   r   r   r   rD   "  s    
zAnimator.__init__c                 C   s  t |ds|g}t|}t |ds,|g| }| jsFdd t|D | _| js`dd t|D | _tt||D ]<\}\}}|d k	rn|d k	rn| j| | | j| | qn| jd 	  t| j| j| j
D ]\}}}| jd ||| q|   t| j tjdd d S )Nr.   c                 S   s   g | ]}g qS r   r   r{   r   r   r   r   r   ;  s     z Animator.add.<locals>.<listcomp>c                 S   s   g | ]}g qS r   r   r   r   r   r   r   =  s     r   T)wait)r/   r6   r3   ranger:   r   r8   rH   r!   r7   r;   r9   r   r	   r   clear_output)rC   r<   r=   r   r|   r   r[   r>   r   r   r   r   3  s&    


zAnimator.add)NNNNNr)   r)   r*   r-   r-   r   )rP   rQ   rR   rS   rD   r   r   r   r   r   r      s               
r   c                 C   s   t dd|gddgdddgd}t|D ]2}t| |||}t| |}	||d ||	f  q&|\}
}|
d	k srt|
|dkr|d
kst||	dkr|	d
kst|	dS )zSTrain a model (defined in Chapter 3).

    Defined in :numref:`sec_softmax_scratch`epochr-   g333333?g?
train loss	train acctest accr"   r$   r%   r         ?gffffff?N)r   r   r   r   r   AssertionError)r   r   	test_iterr   
num_epochsr   animatorr   train_metricstest_acc
train_loss	train_accr   r   r   	train_ch3I  s    
r      c                 C   s|   |D ]\}} qqt |}t t j| |dd}dd t||D }t jt |d| |ddfd||d| d dS )	zTPredict labels (defined in Chapter 3).

    Defined in :numref:`sec_softmax_scratch`r-   r   c                 S   s   g | ]\}}|d  | qS )
r   )r{   truepredr   r   r   r   `  s     zpredict_ch3.<locals>.<listcomp>r      )r   N)r   r   r   r8   r   rY   )r   r   r   r3   r=   truespredsr   r   r   r   predict_ch3X  s    
   
r   c                 C   s`   t d}|D ]@\}}| |}t ||j}|||}|t |t | q|d |d  S )z`Evaluate the loss of a model on the given dataset.

    Defined in :numref:`sec_model_selection`r_   r   r-   )r   r   rY   rX   r   r   r   )r   r   r   r   r3   r=   outr   r   r   r   evaluate_lossd  s    

r   z..r   c           	   	   C   s   | t kst|  dt  dt |  \}}tj|dd tj||dd }tj|rt	 }t
|d"}|d}|s~q|| qnW 5 Q R X | |kr|S td	| d
| d tj|ddd}t
|d}||j W 5 Q R X |S )zmDownload a file inserted into DATA_HUB, return the local filename.

    Defined in :numref:`sec_kaggle_house`z does not exist in .Texist_ok/rG   rbi   zDownloading z from z...)streamverifywb)DATA_HUBr   osmakedirspathjoinsplitexistshashlibsha1openreadupdate	hexdigestprintrequestsgetwritecontent)	name	cache_dirurl	sha1_hashfnamer
  fr   rr   r   r   r   s  s$    
r   c                 C   s|   t | }tj|}tj|\}}|dkr:t|d}n"|dkrPt|d}nds\t	d|
| |rxtj||S |S )zODownload and extract a zip/tar file.

    Defined in :numref:`sec_kaggle_house`z.zipr  )z.tarz.gzFz$Only zip/tar files can be extracted.)r   r  r  dirnamesplitextzipfileZipFiletarfiler  r   
extractallr  )r  folderr  base_dirdata_dirextfpr   r   r   download_extract  s    
r&  c                  C   s   t D ]} t|  qdS )zNDownload all files in the DATA_HUB.

    Defined in :numref:`sec_kaggle_house`N)r  r   )r  r   r   r   download_all  s    r'  zkaggle_house_pred_train.csv(585e9cc93e70b39160e7921475f9bcd7d31219cekaggle_house_trainzkaggle_house_pred_test.csv(fa19780a7b011d9b009e8bff8e99922a8ee2eb90kaggle_house_testc                 C   s,   t j | d kr"t d|  S t dS )zVReturn gpu(i) if exists, otherwise return cpu().

    Defined in :numref:`sec_use_gpu`r-   cuda:cpu)rb   cudadevice_countdevice)r|   r   r   r   try_gpu  s    r1  c                  C   s,   dd t tj D } | r | S tdgS )z^Return all available GPUs, or [cpu(),] if no GPU exists.

    Defined in :numref:`sec_use_gpu`c                 S   s   g | ]}t d | qS )r,  )rb   r0  rz   r   r   r   r     s   z try_all_gpus.<locals>.<listcomp>r-  )r   rb   r.  r/  r0  devicesr   r   r   try_all_gpus  s    r4  c              	   C   s   |j \}}t| j d | d | j d | d f}t|j d D ]F}t|j d D ]2}t| ||| ||| f | |||f< qTqB|S )zFCompute 2D cross-correlation.

    Defined in :numref:`sec_conv_layer`r   r-   )rX   r   zerosr   r   )r3   KhrZ   r:   r|   jr   r   r   corr2d  s    
*2r9  c              	      s   t | tjr*|    s*tt|  j t	d}t
 d |D ]X\}}t |trh fdd|D }n
| }| }|t| ||t| qBW 5 Q R X |d |d  S )z^Compute the accuracy for a model on a dataset using a GPU.

    Defined in :numref:`sec_lenet`r_   c                    s   g | ]}|  qS r   tor{   r<   r0  r   r   r     s     z)evaluate_accuracy_gpu.<locals>.<listcomp>r   r-   )r0   r   r   r   nextiter
parametersr0  r   r   rb   rc   r1   r;  r   r   r   )r   r   r0  r   r3   r=   r   r=  r   evaluate_accuracy_gpu  s    




*rA  c                 C   s  dd }|  | td| | | tjj|  |d}t }t	j
dd|gddd	gd
}	t	 t| }
}t|D ]6}t	d}|   t|D ]\}\}}|
  |  |||| }}| |}|||}|  |  t , |||jd  t	|||jd  W 5 Q R X |
  |d |d  }|d |d  }|d |d  dksn||d kr|	||d |  ||df qt| |}|	|d dd|f qvtd|dd|dd|d t|d | |
  ddt|  dS )zTTrain a model with a GPU (defined in Chapter 6).

    Defined in :numref:`sec_lenet`c                 S   s.   t | tjkst | tjkr*tj| j d S r   )typer   LinearConv2dinitxavier_uniform_weightmr   r   r   init_weights  s    ztrain_ch6.<locals>.init_weightsztraining onrf   r   r-   r   r   r   )r"   r$   r   r   r   r_      Nloss .3f, train acc , test acc .1f examples/sec on )applyr  r;  rb   r   SGDr@  r   CrossEntropyLossr   r   r?   r6   r   r   r   r   rA   r   r   r   rc   r   rX   r   rI   rA  rJ   str)r   r   r   r   rf   r0  rJ  	optimizerr   r   timernum_batchesr   r   r|   r3   r=   r`   r   train_lr   r   r   r   r   	train_ch6  sD    





0"
 r[  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	ResidualzThe Residual block of ResNet.Fr-   c                    sp   t    tj||dd|d| _tj||ddd| _|rNtj||d|d| _nd | _t|| _t|| _	d S )Nr   r-   )kernel_sizepaddingstride)r]  r^  )r]  r_  )
superrD   r   rD  conv1conv2conv3BatchNorm2dbn1bn2)rC   Zinput_channelsnum_channelsuse_1x1convstrides	__class__r   r   rD     s$    
    
zResidual.__init__c                 C   sH   t | | |}| | |}| jr6| |}||7 }t |S r   )Frelure  ra  rf  rb  rc  rC   r3   r:   r   r   r   forward  s    
zResidual.forward)Fr-   rP   rQ   rR   rS   rD   ro  __classcell__r   r   rj  r   r\    s
      r\  ztimemachine.txt(090b5e7e70c295757f55df93cb0a180b9691891atime_machinec               	   C   s2   t tdd} |  }W 5 Q R X dd |D S )ziLoad the time machine dataset into a list of text lines.

    Defined in :numref:`sec_text_preprocessing`rs  r  c                 S   s"   g | ]}t d d|  qS )z
[^A-Za-z]+ )resubstriplowerr{   liner   r   r   r   '  s     z%read_time_machine.<locals>.<listcomp>)r  r   r   	readlines)r  linesr   r   r   read_time_machine!  s    r}  wordc                 C   s<   |dkrdd | D S |dkr,dd | D S t d|  dS )z`Split text lines into word or character tokens.

    Defined in :numref:`sec_text_preprocessing`r~  c                 S   s   g | ]}|  qS r   r  ry  r   r   r   r   .  s     ztokenize.<locals>.<listcomp>charc                 S   s   g | ]}t |qS r   )r1   ry  r   r   r   r   0  s     zERROR: unknown token type: N)r  )r|  tokenr   r   r   tokenize)  s
    r  c                   @   sJ   e Zd ZdZdddZdd Zdd	 Zd
d Zedd Z	edd Z
dS )VocabzVocabulary for text.Nr   c                 C   s   |dkrg }|dkrg }t |}t| dd dd| _dg| | _dd t| jD | _| jD ]>\}}||k rt q|| jkr`| j| t| jd	 | j|< q`dS )
z+Defined in :numref:`sec_text_preprocessing`Nc                 S   s   | d S Nr-   r   r<   r   r   r   r   >      z Vocab.__init__.<locals>.<lambda>T)keyreverse<unk>c                 S   s   i | ]\}}||qS r   r   r{   r   r  r   r   r   
<dictcomp>B  s    z"Vocab.__init__.<locals>.<dictcomp>r-   )	count_corpussorteditems_token_freqsidx_to_tokenr   token_to_idxrH   r6   )rC   tokensmin_freqreserved_tokenscounterr  freqr   r   r   rD   6  s$    
zVocab.__init__c                 C   s
   t | jS r   r6   r  rB   r   r   r   r.   K  s    zVocab.__len__c                    s0   t |ttfs j| jS  fdd|D S )Nc                    s   g | ]}  |qS r   )r   r{   r  rB   r   r   r   Q  s     z%Vocab.__getitem__.<locals>.<listcomp>)r0   r1   tupler  r  unk)rC   r  r   rB   r   r   N  s    zVocab.__getitem__c                    s*   t |ttfs j| S  fdd|D S )Nc                    s   g | ]} j | qS r   r  )r{   indexrB   r   r   r   V  s     z#Vocab.to_tokens.<locals>.<listcomp>)r0   r1   r  r  )rC   indicesr   rB   r   	to_tokensS  s    
zVocab.to_tokensc                 C   s   dS r   r   rB   r   r   r   r  X  s    z	Vocab.unkc                 C   s   | j S r   )r  rB   r   r   r   token_freqs\  s    zVocab.token_freqs)Nr   N)rP   rQ   rR   rS   rD   r.   r   r  propertyr  r  r   r   r   r   r  4  s   

r  c                 C   s2   t | dkst| d tr(dd | D } t| S )zICount token frequencies.

    Defined in :numref:`sec_text_preprocessing`r   c                 S   s   g | ]}|D ]}|qqS r   r   r{   rz  r  r   r   r   r   g  s       z count_corpus.<locals>.<listcomp>)r6   r0   r1   collectionsCounter)r  r   r   r   r  `  s    r  rG   c                    sF   t  }t|d}t|  fdd|D }| dkr>|d|  }| fS )zuReturn token indices and the vocabulary of the time machine dataset.

    Defined in :numref:`sec_text_preprocessing`r  c                    s   g | ]}|D ]} | qqS r   r   r  vocabr   r   r   s  s       z,load_corpus_time_machine.<locals>.<listcomp>r   N)r}  r  r  )
max_tokensr|  r  corpusr   r  r   load_corpus_time_machinej  s    
r  c           
      #   s    t dd d  t d  }ttd| }t |  fdd|| }td|| |D ]N}||||  }fdd|D }fdd|D }	t|t|	fV  qldS )	zhGenerate a minibatch of subsequences using random sampling.

    Defined in :numref:`sec_language_model`r   r-   Nc                    s    | |   S r   r   )pos)r  	num_stepsr   r   r     s    z"seq_data_iter_random.<locals>.datac                    s   g | ]} |qS r   r   r{   r8  r   r   r   r     s     z(seq_data_iter_random.<locals>.<listcomp>c                    s   g | ]} |d  qS r-   r   r  r   r   r   r     s     )randomrandintr6   r1   r   rk   r   tensor)
r  rg   r  num_subseqsinitial_indicesrY  r|   initial_indices_per_batchr3   r:   r   )r  r   r  r   seq_data_iter_randomx  s    
r  c                 c   s   t d|}t| | d | | }t| |||  }t| |d |d |  }||d||d }}|jd | }td|| |D ]>}|dd||| f }	|dd||| f }
|	|
fV  qdS )zpGenerate a minibatch of subsequences using sequential partitioning.

    Defined in :numref:`sec_language_model`r   r-   rG   N)r  r  r6   r   r  rY   rX   r   )r  rg   r  offset
num_tokensXsYsrY  r|   r3   r:   r   r   r   seq_data_iter_sequential  s    r  c                   @   s    e Zd ZdZdd Zdd ZdS )SeqDataLoaderz"An iterator to load sequence data.c                 C   s:   |rt j| _nt j| _t |\| _| _|| | _| _dS )z'Defined in :numref:`sec_language_model`N)	r   r  data_iter_fnr  r  r  r  rg   r  )rC   rg   r  use_random_iterr  r   r   r   rD     s
    
zSeqDataLoader.__init__c                 C   s   |  | j| j| jS r   )r  r  rg   r  rB   r   r   r   __iter__  s    zSeqDataLoader.__iter__N)rP   rQ   rR   rS   rD   r  r   r   r   r   r    s   	r  F'  c                 C   s   t | |||}||jfS )zpReturn the iterator and the vocabulary of the time machine dataset.

    Defined in :numref:`sec_language_model`)r  r  )rg   r  r  r  r   r   r   r   load_data_time_machine  s       r  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	RNNModelScratchz%A RNN Model implemented from scratch.c                 C   s.   || | _ | _||||| _|| | _| _dS )z$Defined in :numref:`sec_rnn_scratch`N)
vocab_sizenum_hiddensre   
init_state
forward_fn)rC   r  r  r0  
get_paramsr  r  r   r   r   rD     s    zRNNModelScratch.__init__c                 C   s(   t |j| jtj}| ||| jS r   )	rl  one_hotTr  rB  rb   float32r  re   rC   r3   stater   r   r   __call__  s    zRNNModelScratch.__call__c                 C   s   |  || j|S r   )r  r  )rC   rg   r0  r   r   r   begin_state  s    zRNNModelScratch.begin_stateN)rP   rQ   rR   rS   rD   r  r  r   r   r   r   r    s   r  c           	         s   |j d d}| d  g fdd}| dd D ]"}|| |\}}|  q6t|D ]0}|| |\}}t|jddd qbdfd	d
D S )zYGenerate new characters following the `prefix`.

    Defined in :numref:`sec_rnn_scratch`r-   rg   r0  r   c                      s   t t jd g ddS )NrG   r=  r-   r-   )r   rY   r  r   )r0  outputsr   r   r     s
    zpredict_ch8.<locals>.<lambda>Ndim c                    s   g | ]} j | qS r   r  rz   r  r   r   r     s     zpredict_ch8.<locals>.<listcomp>)r  rH   r   ry   r   rY   r  )	prefix	num_predsr   r  r0  r  	get_inputr=   r   r   )r0  r  r  r   predict_ch8  s    r  c                 C   sn   t | tjr dd |  D }n| j}ttdd |D }||krj|D ]}|jdd  || 9  < qJdS )z<Clip the gradient.

    Defined in :numref:`sec_rnn_scratch`c                 S   s   g | ]}|j r|qS r   requires_gradr{   pr   r   r   r     s      z!grad_clipping.<locals>.<listcomp>c                 s   s   | ]}t |jd  V  qdS )r_   N)rb   rJ   rd   r  r   r   r   	<genexpr>  s     z grad_clipping.<locals>.<genexpr>N)	r0   r   r   r@  re   rb   sqrtrJ   rd   )r   thetare   normrh   r   r   r   grad_clipping  s    r  c                 C   sN  dt   }}t d}|D ]\}	}
|dks2|rH| j|	jd |d}n2t| tjrht|tsh|	  n|D ]}|	  ql|
j
d}|	||| }	}| |	|\}}|||  }t|tjjr|  |  t| d |  n|  t| d |dd ||t | t | qt|d |d  |d |  fS )z^Train a net within one epoch (defined in Chapter 8).

    Defined in :numref:`sec_rnn_scratch`Nr_   r   r  rG   r-   rg   )r   r?   r   r  rX   r0   r   r   r  Zdetach_r  rY   r;  longr   rb   r   r   r   r   r  r   r   r   mathexprI   )r   r   r   r   r0  r  r  rX  r   r3   r:   sr=   r`   r   r   r   r   train_epoch_ch8  s.    






r  c                    s   t  }tjdddgd|gd}tt jr@tj	 }	nfdd}	 fdd}
t
|D ]H}t|||	 |\}}|d	 d d
krft|
d ||d	 |g qftd|dd|ddt   t|
d t|
d dS )zOTrain a model (defined in Chapter 8).

    Defined in :numref:`sec_rnn_scratch`r   
perplexityr   
   )r"   r#   r   r$   c                    s   t j | S r   )r   ri   re   r  )rf   r   r   r   r     r  ztrain_ch8.<locals>.<lambda>c                    s   t | d S )N2   )r  )r  )r0  r   r  r   r   r     r  r-   r   ztime travellerzperplexity rQ  ,  tokens/sec on 	travellerN)r   rU  r   r   r0   r   rb   r   rT  r@  r   r  r  r   rV  )r   r   r  rf   r   r0  r  r   r   r   predictr   pplspeedr   )r0  rf   r   r  r   	train_ch8  s0          "r  c                       s2   e Zd ZdZ fddZdd Zd	ddZ  ZS )
RNNModelz8The RNN model.

    Defined in :numref:`sec_rnn-concise`c                    sj   t t| jf | || _|| _| jj| _| jjsJd| _t	
| j| j| _nd| _t	
| jd | j| _d S )Nr-   r_   )r`  r  rD   rnnr  hidden_sizer  bidirectionalnum_directionsr   rC  r)   )rC   	rnn_layerr  kwargsrj  r   r   rD   +  s    
zRNNModel.__init__c                 C   sR   t |j | j}|tj}| ||\}}| 	|
d|jd f}||fS )NrG   )rl  r  r  r  r  r;  rb   r  r  r)   rY   rX   )rC   inputsr  r3   r:   outputr   r   r   ro  9  s
    zRNNModel.forwardr-   c                 C   sr   t | jtjs.tj| j| jj || jf|dS tj| j| jj || jf|dtj| j| jj || jf|dfS d S )Nr=  )	r0   r  r   LSTMrb   r5  r  
num_layersr  )rC   r0  rg   r   r   r   r  C  s,       zRNNModel.begin_state)r-   )rP   rQ   rR   rS   rD   ro  r  rq  r   r   rj  r   r  '  s   
r  zfra-eng.zip(94646ad1522d915e7b0f9296181140edcf86a4f5fra-engc               
   C   s>   t d} ttj| dd}| W  5 Q R  S Q R X dS )zRLoad the English-French dataset.

    Defined in :numref:`sec_machine_translation`r  zfra.txtr  N)r   r&  r  r  r  r  r  )r#  r  r   r   r   read_data_nmtU  s    
r  c                    sB   dd   dd dd  fddtD }d|S )	zXPreprocess the English-French dataset.

    Defined in :numref:`sec_machine_translation`c                 S   s   | t dko|dkS )Nz,.!?rt  )set)r  	prev_charr   r   r   no_spacea  s    z preprocess_nmt.<locals>.no_spaceu    rt      c                    s6   g | ].\}}|d kr. ||d  r.d| n|qS )r   r-   rt  r   )r{   r|   r  r  textr   r   r   h  s   z"preprocess_nmt.<locals>.<listcomp>r  )replacerx  r   r  )r  r   r   r  r   preprocess_nmt]  s    r  c                 C   sx   g g  }}t | dD ]V\}}|r0||kr0 qp|d}t|dkr||d d ||d d q||fS )zVTokenize the English-French dataset.

    Defined in :numref:`sec_machine_translation`r   	r_   r   rt  r-   )r   r  r6   rH   )r  r\   sourcetargetr|   rz  partsr   r   r   tokenize_nmtl  s    

r	  c                 C   st   t   t jdd |D dd |D g\}}}t j| t j| |d jD ]}|d qTt j|  dS )z[Plot the histogram for list length pairs.

    Defined in :numref:`sec_machine_translation`c                 S   s   g | ]}t |qS r   r6   r{   r   r   r   r   r     s     z+show_list_len_pair_hist.<locals>.<listcomp>r-   r   N)	r   r   r   histr"   r#   patches	set_hatchr   )r   r"   r#   xlistylistr   r  patchr   r   r   show_list_len_pair_histz  s    
r  c                 C   s.   t | |kr| d| S | |g|t |    S )zLTruncate or pad sequences.

    Defined in :numref:`sec_machine_translation`Nr
  )rz  r  padding_tokenr   r   r   truncate_pad  s    r  c                    sd   fdd| D } fdd| D } t  fdd| D }t t |d kt jd}||fS )zrTransform text sequences of machine translation into minibatches.

    Defined in :numref:`subsec_mt_data_loading`c                    s   g | ]} | qS r   r   r  r  r   r   r     s     z#build_array_nmt.<locals>.<listcomp>c                    s   g | ]}| d  g qS )<eos>r   r  r  r   r   r     s     c                    s   g | ]}t | d  qS <pad>)r  r  r  r  r   r   r     s     r  r-   )r   r  r   r   int32)r|  r  r  rM   	valid_lenr   r  r   build_array_nmt  s     r  X  c                 C   s   t t }t||\}}tj|ddddgd}tj|ddddgd}t|||\}}	t|||\}
}||	|
|f}t|| }|||fS )zuReturn the iterator and the vocabularies of the translation dataset.

    Defined in :numref:`subsec_mt_data_loading`r_   r  <bos>r  r  r  )r  r  r	  r   r  r  rp   )rg   r  r\   r  r  r  	src_vocab	tgt_vocab	src_arraysrc_valid_len	tgt_arraytgt_valid_lenrm   r   r   r   r   load_data_nmt  s    
r%  c                       s(   e Zd ZdZ fddZdd Z  ZS )Encoderz@The base encoder interface for the encoder-decoder architecture.c                    s   t t| jf | d S r   )r`  r&  rD   rC   r  rj  r   r   rD     s    zEncoder.__init__c                 G   s   t d S r   NotImplementedError)rC   r3   r   r   r   r   ro    s    zEncoder.forwardrp  r   r   rj  r   r&    s   r&  c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )DecoderznThe base decoder interface for the encoder-decoder architecture.

    Defined in :numref:`sec_encoder-decoder`c                    s   t t| jf | d S r   )r`  r*  rD   r'  rj  r   r   rD     s    zDecoder.__init__c                 G   s   t d S r   r(  )rC   enc_outputsr   r   r   r   r    s    zDecoder.init_statec                 C   s   t d S r   r(  r  r   r   r   ro    s    zDecoder.forward)rP   rQ   rR   rS   rD   r  ro  rq  r   r   rj  r   r*    s   r*  c                       s(   e Zd ZdZ fddZdd Z  ZS )EncoderDecoderzbThe base class for the encoder-decoder architecture.

    Defined in :numref:`sec_encoder-decoder`c                    s"   t t| jf | || _|| _d S r   )r`  r,  rD   encoderdecoder)rC   r-  r.  r  rj  r   r   rD     s    zEncoderDecoder.__init__c                 G   s.   | j |f| }| jj|f| }| ||S r   )r-  r.  r  )rC   enc_Xdec_Xr   r+  	dec_stater   r   r   ro    s    zEncoderDecoder.forwardrp  r   r   rj  r   r,    s   r,  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )Seq2SeqEncoderzXThe RNN encoder for sequence to sequence learning.

    Defined in :numref:`sec_seq2seq`r   c                    s8   t t| jf | t||| _tj||||d| _d S )N)dropout)r`  r2  rD   r   	Embedding	embeddingGRUr  )rC   r  
embed_sizer  r  r3  r  rj  r   r   rD     s
    
zSeq2SeqEncoder.__init__c                 G   s.   |  |}|ddd}| |\}}||fS )Nr-   r   r_   )r5  permuter  )rC   r3   r   r  r  r   r   r   ro    s    
zSeq2SeqEncoder.forward)r   rp  r   r   rj  r   r2    s    r2  c                 C   sH   |  d}tj|tj| jddddf |dddf k }|| | < | S )zSMask irrelevant entries in sequences.

    Defined in :numref:`sec_seq2seq_decoder`r-   r   r0  N)r   rb   aranger  r0  )r3   r  valuemaxlenmaskr   r   r   sequence_mask  s    



r>  c                       s    e Zd ZdZ fddZ  ZS )MaskedSoftmaxCELosszXThe softmax cross-entropy loss with masks.

    Defined in :numref:`sec_seq2seq_decoder`c                    sJ   t |}t||}d| _tt| |ddd|}|| jdd}|S )Nnoner   r_   r-   r  )	rb   	ones_liker>  	reductionr`  r?  ro  r8  r   )rC   r   labelr  weightsZunweighted_lossZweighted_lossrj  r   r   ro    s    


 zMaskedSoftmaxCELoss.forward)rP   rQ   rR   rS   ro  rq  r   r   rj  r   r?    s   r?  c                    s  dd }|  | |   tjj|  |d}t }|   tj	ddd|gd}	t
|D ]}
t }td}|D ]}|   fd	d
|D \}}}}tj|d g|jd   ddd}t||ddddf gd}| |||\}}||||}|   t| d | }|  t  || | W 5 Q R X qv|
d d dkrZ|	|
d |d |d  f qZtd|d |d  dd|d |  ddt   dS )zUTrain a model for sequence to sequence.

    Defined in :numref:`sec_seq2seq_decoder`c                 S   sT   t | tjkrtj| j t | tjkrP| jD ]}d|kr0tj| j|  q0d S )NrG  )	rB  r   rC  rE  rF  rG  r6  Z_flat_weights_names_parameters)rI  rh   r   r   r   xavier_init_weights  s    
z*train_seq2seq.<locals>.xavier_init_weightsrK  r   r   r  )r"   r#   r$   r_   c                    s   g | ]}|  qS r   r:  r<  r=  r   r   r     s     z!train_seq2seq.<locals>.<listcomp>r  r   r=  rG   r-   NrM  rN  r  rQ  r  )rS  r;  rb   r   Adamr@  r?  r   r   r   r   r?   r   r   r  rX   rY   concatrJ   r   r  r   rc   r   r  rI   rV  )r   r   rf   r   r   r0  rF  rW  r   r   r   rX  r   batchr3   X_valid_lenr:   Y_valid_lenbos	dec_inputY_hatr   r   r  r   r=  r   train_seq2seq  sB    


  
 rO  c                 C   s4  |    || d |d g }tjt|g|d}t|||d }tjtj|tj	|ddd}	| 
|	|}
| j|
|}tjtj|d gtj	|ddd}g g  }}t|D ]j}| ||\}}|jd	d}|jddtj }|r || jj ||d kr q || qd|||fS )
zPPredict for sequence to sequence.

    Defined in :numref:`sec_seq2seq_training`rt  r  r=  r  r9  r   r  r  r_   )r   rx  r  rb   r  r6   r   r  	unsqueezer  r-  r.  r  r   r   squeezerB  r  itemrH   attention_weightsr  r  )r   src_sentencer  r   r  r0  save_attention_weights
src_tokensenc_valid_lenr/  r+  r1  r0  
output_seqattention_weight_seqr   r:   r   r   r   r   predict_seq2seq'  s<       
rZ  c              	   C   s"  |  d| d }}t|t| }}ttdd||  }td|d D ]}dtt }	}
t|| d D ]&}|
d	||||    d7  < qrt|| d D ]L}|
d	||||   dkr|	d7 }	|
d	||||    d8  < q|t
|	|| d  t
d|9 }qN|S )z@Compute the BLEU.

    Defined in :numref:`sec_seq2seq_training`rt  r   r-   r   )r  r6   r  r  minr   r  r   ry   r  pow)pred_seq	label_seqkpred_tokenslabel_tokenslen_pred	len_labelscorer   num_matches
label_subsr|   r   r   r   bleuK  s    $$&rg  r   r   Redsc                 C   s   t   | jd | jd  }}t jj|||dddd\}}	tt|	| D ]v\}
\}}tt||D ]Z\}\}}|jt ||d}|
|d kr|	| |dkr|
| |rb|||  qbqH|j||	dd d	S )
zGShow heatmaps of matrices.

    Defined in :numref:`sec_attention-cues`r   r-   TF)r   sharexshareyrQ  )cmapg333333?)r   shrinkN)r   r   rX   r   r   r   r8   r   r   r   r   r   colorbar)matricesr"   r#   r   r   rl  r   r   r   r!   r|   row_axesrow_matricesr8  r   matrixpcmr   r   r   show_heatmaps]  s"      


rt  c                 C   s|   |dkrt jj| ddS | j}| dkr<t||d }n
|d}tj	| d|d |dd} t jj| |ddS dS )zyPerform softmax operation by masking elements on the last axis.

    Defined in :numref:`sec_attention-scoring-functions`NrG   r  r-   g    .)r;  )
r   r   softmaxrX   r  rb   repeat_interleaverY   r   r>  )r3   
valid_lensrX   r   r   r   masked_softmaxq  s    
rx  c                       s(   e Zd ZdZ fddZdd Z  ZS )AdditiveAttentionzMAdditive attention.

    Defined in :numref:`sec_attention-scoring-functions`c                    sX   t t| jf | tj||dd| _tj||dd| _tj|ddd| _t|| _	d S )NFbiasr-   )
r`  ry  rD   r   rC  W_kW_qw_vDropoutr3  )rC   key_size
query_sizer  r3  r  rj  r   r   rD     s
    zAdditiveAttention.__init__c                 C   sd   |  || | }}|d|d }t|}| |d}t||| _t	| 
| j|S )Nr_   r-   rG   )r}  r|  rP  rb   tanhr~  rQ  rx  rS  bmmr3  )rC   querieskeysvaluesrw  featuresscoresr   r   r   ro    s    
zAdditiveAttention.forwardrp  r   r   rj  r   ry    s   ry  c                       s*   e Zd ZdZ fddZdddZ  ZS )DotProductAttentionzQScaled dot product attention.

    Defined in :numref:`subsec_additive-attention`c                    s"   t t| jf | t|| _d S r   )r`  r  rD   r   r  r3  )rC   r3  r  rj  r   r   rD     s    zDotProductAttention.__init__Nc                 C   sH   |j d }t||ddt| }t||| _t| | j|S )NrG   r-   r_   )	rX   rb   r  	transposer  r  rx  rS  r3  )rC   r  r  r  rw  dr  r   r   r   ro    s    
zDotProductAttention.forward)Nrp  r   r   rj  r   r    s   	r  c                       s,   e Zd ZdZ fddZedd Z  ZS )AttentionDecoderz[The base attention-based decoder interface.

    Defined in :numref:`sec_seq2seq_attention`c                    s   t t| jf | d S r   )r`  r  rD   r'  rj  r   r   rD     s    zAttentionDecoder.__init__c                 C   s   t d S r   r(  rB   r   r   r   rS    s    z"AttentionDecoder.attention_weights)rP   rQ   rR   rS   rD   r  rS  rq  r   r   rj  r   r    s   r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )MultiHeadAttentionzGMulti-head attention.

    Defined in :numref:`sec_multihead-attention`Fc           	         sp   t t| jf | || _t|| _tj|||d| _	tj|||d| _
tj|||d| _tj|||d| _d S )Nrz  )r`  r  rD   	num_headsr   r  	attentionr   rC  r}  r|  W_vW_o)	rC   r  r  
value_sizer  r  r3  r{  r  rj  r   r   rD     s    zMultiHeadAttention.__init__c                 C   sv   t | || j}t | || j}t | || j}|d k	rPtj|| jdd}| ||||}t|| j}| 	|S )Nr   )repeatsr  )
transpose_qkvr}  r  r|  r  rb   rv  r  transpose_outputr  )rC   r  r  r  rw  r  output_concatr   r   r   ro    s      zMultiHeadAttention.forward)Frp  r   r   rj  r   r    s    
r  c                 C   sF   |  | jd | jd |d} | dddd} |  d| jd | jd S )zuTransposition for parallel computation of multiple attention heads.

    Defined in :numref:`sec_multihead-attention`r   r-   rG   r_   r   rY   rX   r8  r3   r  r   r   r   r    s    	r  c                 C   sF   |  d|| jd | jd } | dddd} |  | jd | jd dS )z[Reverse the operation of `transpose_qkv`.

    Defined in :numref:`sec_multihead-attention`rG   r-   r_   r   r   r  r  r   r   r   r    s    r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )PositionalEncodingzYPositional encoding.

    Defined in :numref:`sec_self-attention-and-positional-encoding`  c              
      s   t t|   t|| _td||f| _tj	|t
jdddt
dt
j	d|dt
jd|  }t
|| jd d d d dd df< t
|| jd d d d dd df< d S )Nr-   r   rG   r  r   r_   )r`  r  rD   r   r  r3  r   r5  Pr:  rb   r  rY   r\  sincos)rC   r  r3  max_lenr3   rj  r   r   rD   
  s$     
   $zPositionalEncoding.__init__c                 C   s8   || j d d d |jd d d f |j }| |S r  )r  rX   r;  r0  r3  rC   r3   r   r   r   ro    s    .zPositionalEncoding.forward)r  rp  r   r   rj  r   r    s   r  c                       s(   e Zd ZdZ fddZdd Z  ZS )PositionWiseFFNzLPositionwise feed-forward network.

    Defined in :numref:`sec_transformer`c                    s<   t t| jf | t||| _t | _t||| _d S r   )	r`  r  rD   r   rC  dense1ReLUrm  dense2)rC   ffn_num_inputffn_num_hiddensffn_num_outputsr  rj  r   r   rD     s    
zPositionWiseFFN.__init__c                 C   s   |  | | |S r   )r  rm  r  r  r   r   r   ro  $  s    zPositionWiseFFN.forwardrp  r   r   rj  r   r    s   r  c                       s(   e Zd ZdZ fddZdd Z  ZS )AddNormz^Residual connection followed by layer normalization.

    Defined in :numref:`sec_transformer`c                    s.   t t| jf | t|| _t|| _d S r   )r`  r  rD   r   r  r3  	LayerNormln)rC   Znormalized_shaper3  r  rj  r   r   rD   +  s    zAddNorm.__init__c                 C   s   |  | || S r   )r  r3  rn  r   r   r   ro  0  s    zAddNorm.forwardrp  r   r   rj  r   r  '  s   r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )EncoderBlockzDTransformer encoder block.

    Defined in :numref:`sec_transformer`Fc              	      sT   t t| jf | t||||||	|
| _t||	| _t|||| _	t||	| _
d S r   )r`  r  rD   r   r  r  r  addnorm1r  ffnaddnorm2)rC   r  r  r  r  
norm_shaper  r  r  r3  use_biasr  rj  r   r   rD   7  s"           zEncoderBlock.__init__c              	   C   s*   |  || ||||}| || |S r   )r  r  r  r  )rC   r3   rw  r:   r   r   r   ro  C  s    zEncoderBlock.forward)Frp  r   r   rj  r   r  3  s    r  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )TransformerEncoderz>Transformer encoder.

    Defined in :numref:`sec_transformer`Fc                    s|   t t| jf | || _t||| _t||| _	t
 | _t|
D ]0}| jdt| t||||||||	||
 qFd S )Nblock)r`  r  rD   r  r   r4  r5  r   r  pos_encoding
Sequentialblksr   
add_modulerV  r  )rC   r  r  r  r  r  r  r  r  r  r  r3  r  r  r|   rj  r   r   rD   K  s     

    zTransformerEncoder.__init__c                 G   s`   |  | |t| j }d gt| j | _t| jD ]"\}}|||}|j	j	j| j|< q8|S r   )
r  r5  r  r  r  r6   r  rS  r   r  )rC   r3   rw  r   r|   blkr   r   r   ro  Y  s    
zTransformerEncoder.forward)Frp  r   r   rj  r   r  G  s    r  c                 C   s"   t j j| ||tddd d S )Nz->)
arrowstyle)xyxytext
arrowprops)r   r   r5   annotatedict)r  r  r  r   r   r   r  e  s    r     c           	      C   s   d\}}}}||fg}t |D ]F}|r@| |||||\}}}}n| ||||\}}}}|||f qtd|d  dt|ddt|d |S )zlOptimize a 2D objective function with a customized trainer.

    Defined in :numref:`subsec_gd-learningrate`)r   r   zepoch r-   z, x1: r  z, x2: )r   rH   r  r   )	trainerstepsf_gradx1x2s1s2resultsr|   r   r   r   train_2di  s    
*r  c                 C   s|   t   t jjt| dddi t t dddt ddd\}}t jj||| ||dd	 t jd
 t j	d dS )zdShow the trace of 2D variables during optimization.

    Defined in :numref:`subsec_gd-learningrate`-ocolorz#ff7f0eg            ?g?g      z#1f77b4)colorsr  r  N)r  )
r   r   r   r9   r8   meshgridr:  contourr"   r#   )r  r  r  r  r   r   r   show_trace_2dy  s    r  zairfoil_self_noise.dat(76e5be1548fd8222e5074cf0faae75edff8cf93fairfoilr    c                 C   s|   t jtdt jdd}t||jdd |jdd }tj	|d|ddf |d|df f| dd	}||j
d
 d
 fS )$Defined in :numref:`sec_minibatches`r  r  )r   	delimiterr   r   NrG   Trn   r-   )rL   
genfromtxtr   r   r  rb   
from_numpyr   stdrp   rX   )rg   r   r   r   r   r   r   get_data_ch11  s     "& r  r_   c              
      s6  t jdd|dfddt jddd  fddtj }}tjd	d
d|gddgd}dt  }	}
t|D ]}|D ]\}}|||| }|	  |  g|| |	|j
d 7 }	|	d dkrr|
  ||	|j
d  t| t|||f |
  qrqjtd|jd d dd|
 dd |
 |jd fS )r  r   rT   r-   T)r   r  r   r  r  c                    s   t |  S r   )r   r^   r2   r[   rZ   r   r   r     r  ztrain_ch11.<locals>.<lambda>r   r   r   )\(?ffffff?r"   r#   r$   r%      loss: rG   rN  r  
 sec/epoch)rb   rV   r5  r   ra   r   r?   r   r   r   rX   rI   r   r6   r   rA   r  r:   rK   rN   )
trainer_fnstateshyperparamsr   feature_dimr   r   r   r   r   rX  r   r3   r=   r   r   r  r   
train_ch11  s0     (r  r   c              
   C   sB  t t dd}dd }|| | | f|}t jdd}tjddd	|gd
dgd}d	t  }	}
t	|D ]}|D ]\}}|
  ||}||j}|||}|   |  |	|jd	 7 }	|	d d	krx|
  ||	|jd	  t| t|||d f |
  qxqptd|jd	 d dd|
 dd dS )r  rL  r-   c                 S   s&   t | tjkr"tjjj| jdd d S )NrT   )r  )rB  r   rC  rb   rE  Znormal_rG  rH  r   r   r   rJ    s    z(train_concise_ch11.<locals>.init_weightsr@  )rB  r   r   r   r  r  r  r  r_   r  rG   rN  r  r  N)r   r  rC  rS  r@  ZMSELossr   r   r?   r   r   rY   rX   r   r   r   rI   r   r6   r   rA   r  r:   rK   )r  r  r   r   r   rJ  rW  r   r   r   rX  r   r3   r=   r   r   r   r   r   train_concise_ch11  s4    
 
r  c                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )	BenchmarkzFor measuring running time.Donec                 C   s
   || _ dS )z"Defined in :numref:`sec_hybridize`N)description)rC   r  r   r   r   rD     s    zBenchmark.__init__c                 C   s   t  | _| S r   )r   r?   rX  rB   r   r   r   	__enter__  s    
zBenchmark.__enter__c                 G   s"   t | j d| j dd d S )Nz: z.4fz sec)r  r  rX  rI   r   r   r   r   __exit__  s    zBenchmark.__exit__N)r  )rP   rQ   rR   rS   rD   r  r  r   r   r   r   r    s   
r  c                 C   s4   | j d |j d ksttj| |tj||fS )zPSplit `X` and `y` into multiple devices.

    Defined in :numref:`sec_multi_gpu`r   )rX   r   r   parallelscatter)r3   r=   r3  r   r   r   split_batch  s    r  r-   c              
   C   s   ddd}t t j|dddddt dt  }|d|ddd	d
d |d|ddd	 |d|ddd	 |d|ddd	 |dt d |dt t  t d|  |S )zTA slightly modified ResNet-18 model.

    Defined in :numref:`sec_multi_gpu_concise`Fc              	   S   sT   g }t |D ]<}|dkr6|s6|tj| |ddd q|t|| qtj| S )Nr   Tr_   )rh  ri  )r   rH   r   r\  r   r  )in_channelsZout_channelsnum_residualsfirst_blockr  r|   r   r   r   resnet_block  s     
zresnet18.<locals>.resnet_block@   r   r-   )r]  r_  r^  Zresnet_block1r_   T)r  Zresnet_block2   Zresnet_block3   Zresnet_block4i   Zglobal_avg_poolr  fc)F)	r   r  rD  rd  r  r  ZAdaptiveAvgPool2dFlattenrC  )num_classesr  r  r   r   r   r   resnet18  s      

r  c           
         s   t |tr fdd|D }n| d }| d }|   |  | |}|||}|   |  | }t	||}	||	fS )zqTrain for a minibatch with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`c                    s   g | ]}|  d  qS r   r:  r<  r2  r   r   r     s     z$train_batch_ch13.<locals>.<listcomp>r   )
r0   r1   r;  r   r   rJ   r   r   r   r   )
r   r3   r=   r   r  r3  r   r   train_loss_sumtrain_acc_sumr   r2  r   train_batch_ch13  s    

r  c              	   C   s  t  t| }}t jdd|gddgdddgd}	tj| |d|d } t|D ]}
t d	}t	|D ]\}\}}|
  t| |||||\}}||||jd |  |  |d |d
  dks||d krh|	|
|d |  |d |d  |d |d  df qht | |}|	|
d dd|f qRtd|d |d  dd|d |d  dd|d t|d | |  ddt|  dS )ziTrain a model with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`r   r-   r   r   r   r   r   )Z
device_idsr   rL  r_   r   NrM  rN  rO  rP  rQ  rR  )r   r?   r6   r   r   ZDataParallelr;  r   r   r   rA   r  r   rX   r   rI   rA  r  rJ   rV  )r   r   r   r   r  r   r3  rX  rY  r   r   r   r|   r  r   r   accr   r   r   r   
train_ch13  s:    
      8r   z
hotdog.zip(fba480ffa8aa7e0febbb511d181409f899b9baa5hotdogc           	      C   s   | dddf | dddf | dddf | dddf f\}}}}|| d }|| d }|| }|| }t j||||fdd} | S )zeConvert from (upper-left, lower-right) to (center, width, height).

    Defined in :numref:`sec_bbox`Nr   r-   r_   r   rG   r   r   stack)	boxesr  y1r  y2cxcyrZ   r7  r   r   r   box_corner_to_center2  s    Dr
  c           	      C   s   | dddf | dddf | dddf | dddf f\}}}}|d|  }|d|  }|d|  }|d|  }t j||||fdd} | S )	zeConvert from (center, width, height) to (upper-left, lower-right).

    Defined in :numref:`sec_bbox`Nr   r-   r_   r   r   rG   r   r  )	r  r  r	  rZ   r7  r  r  r  r  r   r   r   box_center_to_corner>  s    Dr  c                 C   s<   t jj| d | d f| d | d  | d | d  d|ddS )zMConvert bounding box to matplotlib format.

    Defined in :numref:`sec_bbox`r   r-   r_   r   F)r  widthheightfill	edgecolor	linewidth)r   r   	Rectangle)bboxr  r   r   r   bbox_to_rectJ  s        r  c              	   C   s  | j dd \}}| jt|t|  }}}|| d }tj||d}	tj||d}
d\}}d| }d| }tj||d| | }tj||d| | }t||\}}|d|d }}t	|	t
|
d  |d t
|
dd  f| | }t	|	t
|
d  |d t
|
dd  f}t| | ||fj|| dd	 }tj||||gdd
j|dd
}|| }|dS )zhGenerate anchor boxes with different shapes centered on each pixel.

    Defined in :numref:`sec_anchor`r  Nr-   r=  )r   r   r  rG   r   r_   r  )rX   r0  r6   r   r  rb   r:  r  rY   catr  r  r  repeatrv  rP  )r   sizesratios	in_heightin_widthr0  	num_sizes
num_ratiosboxes_per_pixelsize_tensorratio_tensoroffset_hoffset_wsteps_hsteps_wcenter_hcenter_wshift_yshift_xrZ   r7  anchor_manipulationsout_gridr  r   r   r   multibox_priorU  sH      r)  c           
      C   s   ddd}||}||dddddg}t |D ]\}}||t|  }tt||}| | |r.t||kr.|d	kr~d
nd	}	| j|jd |jd || ddd|	t|ddd q.dS )z9Show bounding boxes.

    Defined in :numref:`sec_anchor`Nc                 S   s&   | d kr|} nt | ttfs"| g} | S r   )r0   r1   r  )objdefault_valuesr   r   r   	make_list  s
    zshow_bboxes.<locals>.make_listr[   gr  rI  crZ   r_  r   r-   center	   )	facecolorlw)vahafontsizer  r  )N)	r   r6   r   r  r   	add_patchr  r  r  )
r!   bboxesr   r  r,  r|   r  r  rect
text_colorr   r   r   show_bboxes|  s     

   
r:  c           
      C   s   dd }|| }||}t | dddddf |ddddf }t | dddddf |ddddf }|| jdd}|dddddf |dddddf  }|dddf | | }	||	 S )zgCompute pairwise IoU across two lists of anchor or bounding boxes.

    Defined in :numref:`sec_anchor`c                 S   s@   | d d df | d d df  | d d df | d d df   S )Nr_   r   r   r-   r   )r  r   r   r   r     s   zbox_iou.<locals>.<lambda>Nr_   r   )r[  r-   )rb   maxr[  clamp)
boxes1boxes2box_areaareas1areas2inter_upperleftsinter_lowerrightsintersinter_areasunion_areasr   r   r   box_iou  s    ..,rG  r   c                 C   s   |j d | j d  }}t|| }tj|fdtj|d}tj|dd\}}	t|dkd}
|	|dk }|||
< t|fd}t|fd}t|D ]N}t	|}||  }||  }|||< ||dd|f< |||ddf< q|S )z`Assign closest ground-truth bounding boxes to anchor boxes.

    Defined in :numref:`sec_anchor`r   rG   r9  r-   r  r   N)
rX   rG  rb   fullr  r;  nonzerorY   r   r   )ground_truthanchorsr0  iou_thresholdnum_anchorsnum_gt_boxesjaccardanchors_bbox_mapmax_iousr  anc_ibox_jcol_discardrow_discardr   max_idxbox_idxanc_idxr   r   r   assign_anchor_to_bbox  s&    

rY  ư>c              	   C   s   t | }t |}d|ddddf |ddddf   |ddddf  }dt ||ddddf |ddddf    }t j||gdd}|S )zXTransform for anchor box offsets.

    Defined in :numref:`subsec_labeling-anchor-boxes`r  Nr_   rL  r-   r   )r   r
  logrH  )rK  assigned_bbepsc_ancc_assigned_bb	offset_xy	offset_whr  r   r   r   offset_boxes  s    

@6rb  c                 C   s\  |j d | d }} g g g   }}}| j| j d  }}t|D ]}||ddddf }	t|	ddddf | |}
|
dk ddd}tj	|tj
|d}tj	|dftj|d}t|
dk}|
| }|	|df 
 d ||< |	|ddf ||< t| || }||d ||d || q@t|}t|}t|}|||fS )zlLabel anchor boxes using ground-truth bounding boxes.

    Defined in :numref:`subsec_labeling-anchor-boxes`r   Nr-   rG   r   r9  )rX   rQ  r0  r   rY  r   rP  r  rb   r5  r  r  rI  rb  rH   rY   r  )rK  r   rg   batch_offset
batch_maskbatch_class_labelsr0  rM  r|   rC  rP  	bbox_maskclass_labelsr\  indices_truebb_idxr  bbox_offsetr   r   r   multibox_target  s@       



rk  c                 C   s   t | }|ddddf |ddddf  d |ddddf  }t |ddddf d |ddddf  }t j||fdd}t |}|S )z{Predict bounding boxes based on anchor boxes with predicted offsets.

    Defined in :numref:`subsec_labeling-anchor-boxes`Nr_   r  rL  r-   r   )r   r
  r  rH  r  )rK  offset_predsancpred_bbox_xypred_bbox_wh	pred_bboxpredicted_bboxr   r   r   offset_inverse  s    
@2
rr  c                 C   s   t j|ddd}g }| dkr|d }|| | dkr@qt| |ddf dd| |dd ddf ddd}t ||kd}||d  }qtj|| j	dS )	zrSort confidence scores of predicted bounding boxes.

    Defined in :numref:`subsec_predicting-bounding-boxes-nms`rG   T)r  Z
descendingr   r-   Nr   r=  )
rb   argsortr   rH   rG  rY   rI  r   r  r0  )r  r  rL  Bkeepr|   iouindsr   r   r   nms  s    
 rx  Q%z?c                 C   sN  | j | jd  }}|d}| jd | jd  }}g }	t|D ]}
| |
 ||
 dd }}t|dd d\}}t||}t|||}tj	|tj
|d}t||f}|jdd	\}}||dk }t||f}d||< || }|| ||  }}||k }d||< d||  ||< tj|d|d|fdd
}|	| q>t|	S )ztPredict bounding boxes using non-maximum suppression.

    Defined in :numref:`subsec_predicting-bounding-boxes-nms`r   r-   r_   rG   r   Nr9  T)return_countsr  )r0  rX   rQ  r   rY   rb   r;  rr  rx  r:  r  r  uniquerP  rH   r   r  )	cls_probsrl  rK  nms_thresholdpos_thresholdr0  rg   r  rM  r   r|   cls_proboffset_predconfclass_idpredicted_bbru  all_idxcombineduniquescountsnon_keepall_id_sortedbelow_min_idx	pred_infor   r   r   multibox_detection  s8    

r  zbanana-detection.zip(5de26c8fce5ccdea9f91267273464dc968d20d72banana-detectionc                 C   s   t d}tj|| rdndd}t|}|d}g g  }}| D ]@\}}|	t
jtj|| rjdndd|  |	t| qH|t|dd fS )	zkRead the banana detection dataset images and labels.

    Defined in :numref:`sec_object-detection-dataset`r  bananas_trainbananas_valz	label.csvimg_nameimagesr-   r  )r   r&  r  r  r  pdread_csv	set_indexiterrowsrH   r   io
read_imager1   rb   r  rP  )rn   r#  	csv_fnamecsv_datar  targetsr  r  r   r   r   read_data_bananas4  s$    
 



  r  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	BananasDatasetzqA customized dataset to load the banana detection dataset.

    Defined in :numref:`sec_object-detection-dataset`c                 C   s6   t |\| _| _tdtt| j |r*dnd  d S )Nread z training examplesz validation examples)r  r  r   r  rV  r6   )rC   rn   r   r   r   rD   L  s    zBananasDataset.__init__c                 C   s   | j |  | j| fS r   )r  r   r   r   r   r   r   r   Q  s    zBananasDataset.__getitem__c                 C   s
   t | jS r   r6   r  rB   r   r   r   r.   T  s    zBananasDataset.__len__NrP   rQ   rR   rS   rD   r   r.   r   r   r   r   r  H  s   r  c                 C   s8   t jjjtdd| dd}t jjtdd| }||fS )zYLoad the banana detection dataset.

    Defined in :numref:`sec_object-detection-dataset`Tr  rj   F)rb   utilsr   rl   r  )rg   r   val_iterr   r   r   load_data_bananasW  s     r  zVOCtrainval_11-May-2012.tar(4e443f8a2eca6b1dac8a6c57641b67dd40621a49voc2012c           
      C   s   t j| dd|rdnd}tjjjj}t|d}|	 
 }W 5 Q R X g g  }}t|D ]R\}}	|tjt j| d|	 d |tjt j| d|	 d	| qZ||fS )
zZRead all VOC feature and label images.

    Defined in :numref:`sec_semantic_segmentation`	ImageSetsSegmentationz	train.txtzval.txtr  
JPEGImagesz.jpgSegmentationClassz.png)r  r  r  r   r  imageZImageReadModeRGBr  r  r  r   rH   r  )
voc_dirrn   	txt_fnamemoder  r  r  r   r|   r  r   r   r   read_voc_imagesd  s*    

    r  r  r     
background	aeroplanebicyclebirdboatbottlebuscarr  chaircowdiningtabledoghorse	motorbikepersonzpotted plantsheepsofar   z
tv/monitorc                  C   sJ   t jdt jd} ttD ],\}}|| |d d |d  d |d  < q| S )zoBuild the mapping from RGB to class indices for VOC labels.

    Defined in :numref:`sec_semantic_segmentation`i   r  r   r  r-   r_   )rb   r5  r  r   VOC_COLORMAP)colormap2labelr|   colormapr   r   r   voc_colormap2label  s    r  c                 C   sj   |  ddd d} | dddddf d | dddddf  d | dddddf  }|| S )zlMap any RGB values in VOC labels to their class indices.

    Defined in :numref:`sec_semantic_segmentation`r-   r_   r   r  Nr  )r8  r   r   )r  r  r   r   r   r   voc_label_indices  s
    2r  c                 C   sD   t jj| ||f}t jjj| f| } t jjj|f| }| |fS )z`Randomly crop both feature and label images.

    Defined in :numref:`sec_semantic_segmentation`)r   r   Z
RandomCropr  r   crop)featurerC  r  r  r8  r   r   r   voc_rand_crop  s     r  c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )VOCSegDatasetzaA customized dataset to load the VOC dataset.

    Defined in :numref:`sec_semantic_segmentation`c                    s   t jjdddgdddgd _| _t||d\}} fd	d
 |D  _ | _t	  _
tdtt j d  d S )Ng
ףp=
?gv/?gCl?gZd;O?gy&1?g?)r   r  r  c                    s   g | ]}  |qS r   )normalize_image)r{   r  rB   r   r   r     s   z*VOCSegDataset.__init__.<locals>.<listcomp>r  	 examples)r   r   	Normalizer   	crop_sizer  filterr  r   r  r  r  rV  r6   )rC   rn   r  r  r  r   r   rB   r   rD     s     
zVOCSegDataset.__init__c                 C   s   |  | d S )N   )r   r   )rC   r   r   r   r   r    s    zVOCSegDataset.normalize_imagec                    s    fdd|D S )Nc                    s8   g | ]0}|j d   jd kr|j d  jd  kr|qS )r-   r   r_   )rX   r  )r{   r   rB   r   r   r     s    z(VOCSegDataset.filter.<locals>.<listcomp>r   )rC   r   r   rB   r   r    s    zVOCSegDataset.filterc                 C   s2   t | j| | j| f| j \}}|t|| jfS r   )r  r  r   r  r  r  )rC   r   r  rC  r   r   r   r     s    
zVOCSegDataset.__getitem__c                 C   s
   t | jS r   r  rB   r   r   r   r.     s    zVOCSegDataset.__len__N)	rP   rQ   rR   rS   rD   r  r  r   r.   r   r   r   r   r    s   r  c                 C   sd   t dtjdd}t  }tjjj	t
d||| dd|d}tjjj	t
d||| d|d}||fS )z_Load the VOC semantic segmentation dataset.

    Defined in :numref:`sec_semantic_segmentation`r  	VOCdevkitVOC2012T)rk   	drop_lastr   F)r  r   )r   r&  r  r  r  r   rb   r  r   rl   r  )rg   r  r  r   r   r   r   r   r   load_data_voc  s&     
   
  r  zkaggle_cifar10_tiny.zip(2068874e4b9a9f0fb07ebe0ad2b29754449ccacdcifar10_tinyc              	   C   sF   t | d}| dd }W 5 Q R X dd |D }tdd |D S )zcRead `fname` to return a filename to label dictionary.

    Defined in :numref:`sec_kaggle_cifar10`r  r-   Nc                 S   s   g | ]}|  d qS ),)rstripr  r  r   r   r   r     s     z#read_csv_labels.<locals>.<listcomp>c                 s   s   | ]\}}||fV  qd S r   r   )r{   r  rC  r   r   r   r    s     z"read_csv_labels.<locals>.<genexpr>)r  r{  r  )r  r  r|  r  r   r   r   read_csv_labels  s    r  c                 C   s   t j|dd t| | dS )zQCopy a file into a target directory.

    Defined in :numref:`sec_kaggle_cifar10`Tr   N)r  r  shutilcopy)filename
target_dirr   r   r   copyfile  s    r  c           	   	   C   s   t |  d d }tdt|| }i }ttj	
| dD ]}||dd  }tj	
| d|}t|tj	
| dd| ||ks|| |k rt|tj	
| dd| ||dd ||< qFt|tj	
| dd| qF|S )	zgSplit the validation set out of the original training set.

    Defined in :numref:`sec_kaggle_cifar10`rG   r-   r   r   r   train_valid_testtrain_validvalid)r  r  r  most_commonr;  r  floorr  listdirr  r  r  r  r  )	r#  r   valid_ratior   n_valid_per_labellabel_count
train_filerC  r  r   r   r   reorg_train_valid  s*       r  c              	   C   sB   t t j| dD ](}tt j| d|t j| ddd qdS )ziOrganize the testing set for data loading during prediction.

    Defined in :numref:`sec_kaggle_cifar10`testr  unknownN)r  r  r  r  r  )r#  	test_filer   r   r   
reorg_test  s    r  zkaggle_dog_tiny.zip(0cb91d09b814ecdc07b50f31f8dcad3e81d6a86ddog_tinyzptb.zip(319d85e578af0cdc590547f26231e4e31cdf1e42ptbc               	   C   sD   t d} ttj| d}| }W 5 Q R X dd |dD S )z[Load the PTB dataset into a list of text lines.

    Defined in :numref:`sec_word2vec_data`r  zptb.train.txtc                 S   s   g | ]}|  qS r   r  ry  r   r   r   r     s     zread_ptb.<locals>.<listcomp>r   )r   r&  r  r  r  r  r  r  )r#  r  raw_textr   r   r   read_ptb  s    
r  c                    sL   fdd| D } t |  t   fddfdd| D  fS )zKSubsample high-frequency words.

    Defined in :numref:`sec_word2vec_data`c                    s   g | ]} fd d|D qS )c                    s   g | ]} |  j kr|qS r   )r  r  r  r   r   r     s      (subsample.<locals>.<listcomp>.<listcomp>r   ry  r  r   r   r     s   zsubsample.<locals>.<listcomp>c                    s"   t ddtd |    k S )Nr   r-   g-C6?)r  uniformr  r  r  )r  r  r   r   ru    s    
zsubsample.<locals>.keepc                    s   g | ]} fd d|D qS )c                    s   g | ]} |r|qS r   r   r  ru  r   r   r   !  s      r  r   ry  r  r   r   r   !  s     )r   r  rJ   r  )	sentencesr  r   )r  ru  r  r  r   	subsample  s    

r  c              	      s   g g  }}| D ] t  dk r q| 7 }tt  D ]\}td|}tttd|| tt  |d | }|| | fdd|D  q4q||fS )z_Return center words and context words in skip-gram.

    Defined in :numref:`sec_word2vec_data`r_   r-   r   c                    s   g | ]} | qS r   r   )r{   r   rz  r   r   r   5  s     z,get_centers_and_contexts.<locals>.<listcomp>)	r6   r   r  r  r1   r;  r[  removerH   )r  max_window_sizecenterscontextsr|   window_sizer  r   r  r   get_centers_and_contexts$  s    

r   c                   @   s    e Zd ZdZdd Zdd ZdS )RandomGeneratorz@Randomly draw among {1, ..., n} according to n sampling weights.c                 C   s.   t tdt|d | _|| _g | _d| _dS )z&Defined in :numref:`sec_word2vec_data`r-   r   N)r1   r   r6   
populationsampling_weights
candidatesr|   )rC   r  r   r   r   rD   :  s    zRandomGenerator.__init__c                 C   sJ   | j t| jkr,tj| j| jdd| _d| _ |  j d7  _ | j| j d  S )Nr  )r_  r   r-   )r|   r6   r  r  choicesr  r  rB   r   r   r   drawB  s      zRandomGenerator.drawN)rP   rQ   rR   rS   rD   r  r   r   r   r   r  8  s   r  c           
         sx    fddt dtD }g t| }}| D ]B}g }t|t|| k rh| }	|	|kr8||	 q8|| q0|S )zTReturn noise words in negative sampling.

    Defined in :numref:`sec_word2vec_data`c                    s   g | ]}  | d  qS )g      ?)r  rz   r  r  r   r   r   Q  s   z!get_negatives.<locals>.<listcomp>r-   )r   r6   r  r  rH   )
all_contextsr  r  r6  r  all_negatives	generatorr  	negativesnegr   r  r   get_negativesK  s    r  c           
      C   s   t dd | D }g g g g f\}}}}| D ]\}}}t|t| }	||g7 }||| dg||	   g7 }|dg|	 dg||	   g7 }|dgt| dg|t|   g7 }q*tt|dt|t|t|fS )zpReturn a minibatch of examples for skip-gram with negative sampling.

    Defined in :numref:`sec_word2vec_data`c                 s   s$   | ]\}}}t |t | V  qd S r   r
  )r{   r   r.  r   r   r   r   r  b  s     zbatchify.<locals>.<genexpr>r   r-   rU   )r;  r6   r   rY   r  )
r   r  r  contexts_negativesmasksr   r/  contextnegativecur_lenr   r   r   batchify^  s    
( r  c                    s   t  }t }t j|dd t| \}} fdd|D }t||\}}	t|	 ||}
G dd dtjj	j
}|||	|
}tjj	j|| dt|d}| fS )	zrDownload the PTB dataset and then load it into memory.

    Defined in :numref:`subsec_word2vec-minibatch-loading`r  r  c                    s   g | ]} | qS r   r   ry  r  r   r   r   u  s     z!load_data_ptb.<locals>.<listcomp>c                   @   s$   e Zd Zdd Zdd Zdd ZdS )z!load_data_ptb.<locals>.PTBDatasetc                 S   s<   t |t |  kr t |ks&n t|| _|| _|| _d S r   )r6   r   r  r  r  )rC   r  r  r  r   r   r   rD   |  s    &z*load_data_ptb.<locals>.PTBDataset.__init__c                 S   s   | j | | j| | j| fS r   )r  r  r  )rC   r  r   r   r   r     s    z-load_data_ptb.<locals>.PTBDataset.__getitem__c                 S   s
   t | jS r   )r6   r  rB   r   r   r   r.     s    z)load_data_ptb.<locals>.PTBDataset.__len__N)rP   rQ   rR   rD   r   r.   r   r   r   r   
PTBDataset{  s   r  T)rk   Z
collate_fnr   )r   r   r  r  r  r   r  rb   r  r   Datasetrl   r  )rg   r  num_noise_wordsr   r  
subsampledr  r  all_centersr  r	  r  ro   r   r   r  r   load_data_ptbm  s,        r  zglove.6B.50d.zip(0b8703943ccdb6eb788e6f091b8946e82231bc4dzglove.6b.50dzglove.6B.100d.zip(cd43bfb07e44e6f27cbcc7bc9ae3d80284fdaf5azglove.6b.100dzglove.42B.300d.zip(b5116e234e9eb9076672cfeabf5469f3eec904fazglove.42b.300dzwiki.en.zip(c1816da3821ae9f43899be655002f6c723e91b88zwiki.enc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )TokenEmbeddingzToken Embedding.c                 C   s2   |  |\| _| _d| _dd t| jD | _dS )z!Defined in :numref:`sec_synonyms`r   c                 S   s   i | ]\}}||qS r   r   r  r   r   r   r    s      z+TokenEmbedding.__init__.<locals>.<dictcomp>N)_load_embeddingr  
idx_to_vecunknown_idxr   r  )rC   embedding_namer   r   r   rD     s    zTokenEmbedding.__init__c           	   	   C   s   dgg  }}t |}ttj|dd^}|D ]R}| d}|d dd |dd  D  }}t|dkr0|	| |	| q0W 5 Q R X dgt|d  g| }|t 
|fS )	Nr  zvec.txtr  rt  r   c                 S   s   g | ]}t |qS r   r   )r{   elemr   r   r   r     s     z2TokenEmbedding._load_embedding.<locals>.<listcomp>r-   )r   r&  r  r  r  r  r  r  r6   rH   r  )	rC   r#  r  r!  r#  r  rz  elemsr  r   r   r   r     s    
 
zTokenEmbedding._load_embeddingc                    s&    fdd|D } j t| }|S )Nc                    s   g | ]} j | jqS r   )r  r  r"  r  rB   r   r   r     s   z.TokenEmbedding.__getitem__.<locals>.<listcomp>)r!  r   r  )rC   r  r  vecsr   rB   r   r     s
    
zTokenEmbedding.__getitem__c                 C   s
   t | jS r   r  rB   r   r   r   r.     s    zTokenEmbedding.__len__N)rP   rQ   rR   rS   rD   r   r   r.   r   r   r   r   r    s
   r  c                 C   sV   dg|  dg }dgt | d  }|dk	rN||dg 7 }|dgt |d  7 }||fS )z_Get tokens of the BERT input sequence and their segment IDs.

    Defined in :numref:`sec_bert`<cls><sep>r   r_   Nr-   r
  )tokens_atokens_br  segmentsr   r   r   get_tokens_and_segments  s    r,  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	BERTEncoderz=BERT encoder.

    Defined in :numref:`subsec_bert_input_rep`r     c                    s   t t| jf | t||| _td|| _t | _t	|D ],}| j
| t|
||||||||d
 q@ttd|	|| _d S )Nr_   Tr-   )r`  r-  rD   r   r4  token_embeddingsegment_embeddingr  r  r   r  r   r  	Parameterrb   randnpos_embedding)rC   r  r  r  r  r  r  r  r3  r  r  r  r  r  r|   rj  r   r   rD     s(    
        zBERTEncoder.__init__c                 C   sV   |  || | }|| jjd d d |jd d d f  }| jD ]}|||}qB|S r  )r/  r0  r3  r   rX   r  )rC   r  r+  rw  r3   r  r   r   r   ro    s
    (
zBERTEncoder.forward)r  r.  r.  r.  rp  r   r   rj  r   r-    s          r-  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )MaskLMzWThe masked language model task of BERT.

    Defined in :numref:`subsec_bert_input_rep`r.  c              	      sB   t t| jf | tt||t t|t||| _d S r   )	r`  r4  rD   r   r  rC  r  r  mlp)rC   r  r  
num_inputsr  rj  r   r   rD     s    
zMaskLM.__init__c                 C   s`   |j d }|d}|j d }td|}t||}|||f }|||df}| |}|S )Nr-   rG   r   )rX   rY   rb   r:  rv  r5  )rC   r3   pred_positionsnum_pred_positionsrg   	batch_idxmasked_X	mlm_Y_hatr   r   r   ro    s    



zMaskLM.forward)r.  rp  r   r   rj  r   r4    s   r4  c                       s(   e Zd ZdZ fddZdd Z  ZS )NextSentencePredzOThe next sentence prediction task of BERT.

    Defined in :numref:`subsec_mlm`c                    s$   t t| jf | t|d| _d S )Nr_   )r`  r<  rD   r   rC  r  )rC   r6  r  rj  r   r   rD   	  s    zNextSentencePred.__init__c                 C   s
   |  |S r   )r  r  r   r   r   ro  		  s    zNextSentencePred.forwardrp  r   r   rj  r   r<  	  s   r<  c                       s,   e Zd ZdZd	 fdd	Zd
ddZ  ZS )	BERTModelz4The BERT model.

    Defined in :numref:`subsec_nsp`r  r.  c                    sf   t t|   t|||||||||	|
||d| _tt||t | _	t
|||| _t|| _d S )N)r  r  r  r  )r`  r=  rD   r-  r-  r   r  rC  Tanhhiddenr4  mlmr<  nsp)rC   r  r  r  r  r  r  r  r3  r  r  r  r  Zhid_in_featuresZmlm_in_featuresZnsp_in_featuresrj  r   r   rD   	  s"          zBERTModel.__init__Nc              	   C   sT   |  |||}|d k	r$| ||}nd }| | |d d dd d f }|||fS r   )r-  r@  rA  r?  )rC   r  r+  rw  r7  	encoded_Xr;  	nsp_Y_hatr   r   r   ro   	  s    "zBERTModel.forward)r  r.  r.  r.  r.  r.  r.  )NNrp  r   r   rj  r   r=  	  s              r=  )zHhttps://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip(3c914d17d80b1459be871a5039ac23e752a53cbe
wikitext-2c              	   C   sH   t j| d}t|d}| }W 5 Q R X dd |D }t| |S )%Defined in :numref:`sec_bert-dataset`zwiki.train.tokensr  c                 S   s0   g | ](}t |d dkr|  d qS )z . r_   )r6   r  rw  rx  ry  r   r   r   r   5	  s    z_read_wiki.<locals>.<listcomp>)r  r  r  r  r{  r  rk   )r#  	file_namer  r|  
paragraphsr   r   r   
_read_wiki/	  s    
rI  c                 C   s0   t   dk rd}nt t |}d}| ||fS )rF  r   TF)r  choice)sentencenext_sentencerH  is_nextr   r   r   _get_next_sentence:	  s
    rN  c                 C   sv   g }t t| d D ]\}t| | | |d  |\}}}t|t| d |krPqt||\}	}
||	|
|f q|S )rF  r-   r   )r   r6   rN  r   r,  rH   )	paragraphrH  r  r  nsp_data_from_paragraphr|   r)  r*  rM  r  r+  r   r   r   _get_nsp_data_from_paragraphD	  s     
 
rQ  c                 C   s   dd | D }g }t | |D ]f}t||kr4 qd}t   dk rJd}n"t   dk r`| | }nt |j}|||< ||| | f q ||fS )rF  c                 S   s   g | ]}|qS r   r   r  r   r   r   r   V	  s     z'_replace_mlm_tokens.<locals>.<listcomp>Ng?<mask>r   )r  rk   r6   rJ  r  rH   )r  candidate_pred_positionsnum_mlm_predsr  mlm_input_tokenspred_positions_and_labelsmlm_pred_positionmasked_tokenr   r   r   _replace_mlm_tokensQ	  s"    


rY  c           
      C   s   g }t | D ]\}}|dkrq|| qtdtt| d }t| |||\}}t|dd d}dd |D }d	d |D }	|| |||	 fS )
,Defined in :numref:`subsec_prepare_mlm_data`)r'  r(  r-   333333?c                 S   s   | d S r   r   r  r   r   r   r   }	  r  z+_get_mlm_data_from_tokens.<locals>.<lambda>)r  c                 S   s   g | ]}|d  qS r  r   r{   vr   r   r   r   ~	  s     z-_get_mlm_data_from_tokens.<locals>.<listcomp>c                 S   s   g | ]}|d  qS r  r   r\  r   r   r   r   	  s     )r   rH   r;  roundr6   rY  r  )
r  r  rS  r|   r  rT  rU  rV  r7  mlm_pred_labelsr   r   r   _get_mlm_data_from_tokensn	  s$       r`  c              	   C   s^  t |d }g g g   }}}g g g   }}}	g }
| D ]\}}}}}|tj||d g|t|   tjd |tj|dg|t|   tjd |tjt|tjd |tj|dg|t|   tjd |tjdgt| dg|t|   tjd |	tj|dg|t|   tjd |
tj|tjd q4||||||	|
fS )rZ  r[  r  r  r   r  r   )r^  rH   rb   r  r6   r  r  )examplesr  r  max_num_mlm_predsall_token_idsall_segmentsrw  all_pred_positionsall_mlm_weightsall_mlm_labels
nsp_labels	token_idsr7  mlm_pred_label_idsr+  rM  r   r   r   _pad_bert_inputs	  sR    




  rk  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	_WikiTextDatasetrZ  c                    s   dd |D }dd |D }t j|dddddgd	 _g }|D ]}|t|| j| q> fd
d|D }t|| j\ _ _ _ _	 _
 _ _d S )Nc                 S   s   g | ]}t j|d dqS )r~  r  )r   r  )r{   rO  r   r   r   r   	  s
    z-_WikiTextDataset.__init__.<locals>.<listcomp>c                 S   s   g | ]}|D ]}|qqS r   r   )r{   rO  rK  r   r   r   r   	  s     rL  r  rR  r'  r(  r  c                    s&   g | ]\}}}t | j||f qS r   )r`  r  )r{   r  r+  rM  rB   r   r   r   	  s   
)r   r  r  extendrQ  rk  rc  rd  rw  re  rf  rg  rh  )rC   rH  r  r  ra  rO  r   rB   r   rD   	  s@       
   
    z_WikiTextDataset.__init__c                 C   s<   | j | | j| | j| | j| | j| | j| | j| fS r   )rc  rd  rw  re  rf  rg  rh  r   r   r   r   r   	  s      z_WikiTextDataset.__getitem__c                 C   s
   t | jS r   )r6   rc  rB   r   r   r   r.   	  s    z_WikiTextDataset.__len__Nr  r   r   r   r   rl  	  s   rl  c                 C   sF   t  }t dd}t|}t||}tjjj|| d|d}||j	fS )zNLoad the WikiText-2 dataset.

    Defined in :numref:`subsec_prepare_mlm_data`rE  Tr   )
r   r   r&  rI  rl  rb   r  r   rl   r  )rg   r  r   r#  rH  	train_setr   r   r   r   load_data_wiki	  s    
 ro  c
                 C   sn   | ||| d|\}
}}|| d|| d| dd }| | d  }|||	}|| }|||fS )z)Defined in :numref:`sec_bert-pretraining`rG   r-   g:0yE>)rY   rJ   )r   r   r  Ztokens_XZ
segments_XZvalid_lens_xZpred_positions_XZmlm_weights_XZmlm_YZnsp_yr   r;  rC  mlm_lnsp_lr   r   r   r   _get_batch_loss_bert	  s    


rr  )z>http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz(01ada507287d82875905620988597833ad4e0903aclImdbc           	      C   s   g g  }}dD ]}t j| |r"dnd|}t |D ]Z}tt j||d<}| ddd}|| ||dkr~d	nd
 W 5 Q R X q4q||fS )z_Read the IMDb review dataset text sequences and labels.

    Defined in :numref:`sec_sentiment`)r  r  r   r  r   zutf-8r   r  r  r-   r   )	r  r  r  r  r  r  decoder  rH   )	r#  rn   r   r   rC  folder_namefiler  reviewr   r   r   	read_imdb	  s    

$ry    c                    s   t dd}t|d}t|d}t j|d dd}t j|d dd}t j|ddt fd	d
|D }t fdd
|D }t |t|d f| }	t j|t|d f| dd}
|	|
fS )zlReturn data iterators and the vocabulary of the IMDb review dataset.

    Defined in :numref:`sec_sentiment`rt  TFr   r~  r  rL  r  c                    s"   g | ]}t |  d  qS r  r   r  ry  r  r   r   r   	  s     z"load_data_imdb.<locals>.<listcomp>c                    s"   g | ]}t |  d  qS r  r{  ry  r  r   r   r   	  s     r-   r  )r   r&  ry  r  r  rb   r  rp   )rg   r  r#  
train_data	test_datatrain_tokenstest_tokenstrain_featurestest_featuresr   r   r   r  r   load_data_imdb	  s(    

r  c                 C   sD   t j||  t d}t j| |dddd}|dkr@dS dS )zUPredict the sentiment of a text sequence.

    Defined in :numref:`sec_sentiment_rnn`r=  r-   rG   r  positiver  )rb   r  r  r   r1  r   rY   )r   r  sequencerC  r   r   r   predict_sentiment
  s    r  )z3https://nlp.stanford.edu/projects/snli/snli_1.0.zip(9fcde07509c7e87ec61c640c1b2753d9041758e4SNLIc              	      s   dd  ddddt j| |r$dnd}t|d	 }d
d | dd D }W 5 Q R X  fdd|D } fdd|D }fdd|D }|||fS )zRead the SNLI dataset into premises, hypotheses, and labels.

    Defined in :numref:`sec_natural-language-inference-and-dataset`c                 S   s2   t dd| } t dd| } t dd| } |  S )Nz\(r  z\)z\s{2,}rt  )ru  rv  rw  )r  r   r   r   extract_text
  s    zread_snli.<locals>.extract_textr   r-   r_   )
entailmentcontradictionneutralzsnli_1.0_train.txtzsnli_1.0_test.txtr  c                 S   s   g | ]}| d qS )r  r  r{   rowr   r   r   r   #
  s     zread_snli.<locals>.<listcomp>Nc                    s$   g | ]}|d  kr |d qS )r   r-   r   r  r  	label_setr   r   r   $
  s      c                    s$   g | ]}|d  kr |d qS )r   r_   r   r  r  r   r   r   %
  s      c                    s$   g | ]}|d   kr |d   qS r  r   r  )r  r   r   r   &
  s      )r  r  r  r  r{  )r#  rn   rG  r  rowspremises
hypothesesr   r   r  r   	read_snli
  s    $r  c                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )SNLIDatasetzsA customized dataset to load the SNLI dataset.

    Defined in :numref:`sec_natural-language-inference-and-dataset`Nc                 C   s   || _ t|d }t|d }|d krDtj|| ddgd| _n|| _| || _| || _t	|d | _
tdtt| j d  d S )	Nr   r-   rL  r  r  r_   r  r  )r  r   r  r  r  _padr  r  rb   r  r   r  rV  r6   )rC   ro   r  r  all_premise_tokensall_hypothesis_tokensr   r   r   rD   -
  s    
 
zSNLIDataset.__init__c                    s   t  fdd|D S )Nc                    s(   g | ] }t  j|  j jd  qS r  )r   r  r  r  ry  rB   r   r   r   <
  s     z$SNLIDataset._pad.<locals>.<listcomp>)rb   r  )rC   r|  r   rB   r   r  ;
  s    zSNLIDataset._padc                 C   s   | j | | j| f| j| fS r   )r  r  r   r   r   r   r   r   @
  s    zSNLIDataset.__getitem__c                 C   s
   t | jS r   )r6   r  rB   r   r   r   r.   C
  s    zSNLIDataset.__len__)N)rP   rQ   rR   rS   rD   r  r   r.   r   r   r   r   r  )
  s
   
r  r  c           
      C   sv   t  }t d}t|d}t|d}t||}t|||j}tjjj	|| d|d}tjjj	|| d|d}	||	|jfS )zDownload the SNLI dataset and return data iterators and vocabulary.

    Defined in :numref:`sec_natural-language-inference-and-dataset`r  TFr   )
r   r   r&  r  r  r  rb   r  r   rl   )
rg   r  r   r#  r|  r}  rn  test_setr   r   r   r   r   load_data_snliF
  s    



r  c                 C   sr   |    tj|| t d}tj|| t d}tj| |d|dgdd}|dkrbdS |dkrndS dS )	zPredict the logical relationship between the premise and hypothesis.

    Defined in :numref:`sec_natural-language-inference-attention`r=  )r-   rG   r-   r  r   r  r  r  )r   rb   r  r   r1  r   rY   )r   r  premise
hypothesisrC  r   r   r   predict_snliX
  s    r  c                 C   s   | j d }tj|f| jd}tj|f| jd}|  || }	||}
||
 }||	||	j ||||j  d }|  |	  |S )z=Update discriminator.

    Defined in :numref:`sec_basic_gan`r   r=  r_   )
rX   rb   onesr0  r5  r   detachrY   r   r   )r3   Znet_Dnet_Gr   	trainer_Drg   r  r5  real_Yfake_Xfake_Yloss_Dr   r   r   update_Dd
  s    
r  c           
      C   sZ   | j d }tj|f| jd}|  || }||}||||j }	|	  |  |	S )z9Update generator.

    Defined in :numref:`sec_basic_gan`r   r=  )rX   rb   r  r0  r   rY   r   r   )
r  r  r  r   	trainer_Grg   r  r  r  loss_Gr   r   r   update_Gw
  s    
r  zpokemon.zip(c065c0e2593b8b161a2d7873e42418bf6a21106cpokemonc                 O   s   |   j||S r   )r  r   r<   r   r  r   r   r   r   
  r  r   c                 O   s   | j ||S r   )r   r  r   r   r   r   
  r  c                 O   s   | j ||S r   )rY   r  r   r   r   r   
  r  c                 O   s   | j ||S r   r:  r  r   r   r   r   
  r  c                 O   s   | j ||S r   )rJ   r  r   r   r   r   
  r  c                 O   s   | j ||S r   )r   r  r   r   r   r   
  r  c                 O   s   | j ||S r   )rB  r  r   r   r   r   
  r  c                 O   s   | j ||S r   )tr  r   r   r   r   
  r  )r   )NNNNNNr)   r)   r*   r   N)T)Nr   )N)r   )N)r   )N)r~  )rG   )Fr  )F)N)r  )r   )F)Nrh  ri  )r  N)r  r  )r_   )r   )r-   )NN)r   )rZ  )r   ry  )T)T)N)rz  )r  )r  r  DATA_URLr   rL   rb   r   PILr   r   Ztorch.nnr   rl  Ztorch.utilsr   r   r   	nn_Moduler  r	  r  r  r  ru  r  sysr  rE   r  r   pandasr  r  IPythonr	   
matplotlibr
   r   matplotlib_inliner   modulesrP   r   r   r   r(   r9   r?   r]   r^   ra   ri   rp   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r&  r'  r1  r4  r9  rA  r[  r\  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r%  r&  r*  r,  r2  r>  rU  r?  rO  rZ  rg  rt  rx  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r
  r  r  r)  r:  rG  rY  rb  rk  rr  rx  r  r  r  r  r  r  r  r  VOC_CLASSESr  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r,  r-  r4  r<  r=  rI  rN  rQ  rY  r`  rk  rl  ro  rr  ry  r  r  r  r  r  r  r  r  r  r5  r  r:  r  r  sinhr  coshr  linspacer  r[  rV   randrW   r  r  r  rH  r  abseyer   rY   r;  r   r   r   r  r   r   r   r   <module>   sv  
          
!			)*
,
  
	&
+

)
$ 
)




'$ 
#


(
   
 
     
  
              
	
 

	


#



#
$
