
    ZTh"                         d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ  G d de      Zde
eef   de	eef   fdZ y)    N)Path)OptionalTupleUnion)urljoinurlparse)ProgressFileconvert_keys_to_snake_case)ParquetConfig)!ParquetComplexTypesImportModeType)DataImportService)TimeFormatType)SiftRestConfig_RestServicec                   *    e Zd ZU dZdZdZeed<   eed<   eed<   eed<   def fd	Z		 dd
e
eef   dededefdZdededefdZej$                  ej(                  dddfded
e
eef   dedededee   dee   dee   defdZd
e
eef   defdZ xZS )ParquetUploadServicez/api/v1/data-imports:uploadz/api/v1/data-imports:urlz"/api/v0/data-imports:detect-config
_rest_conf_upload_uri_url_uri_apikey	rest_confc                    t         |   |       t        | j                  | j                        | _        t        | j                  | j                        | _        t        | j                  | j                        | _	        y )N)r   )
super__init__r   	_base_uriUPLOAD_PATHr   URL_PATHr   DETECT_CONFIG_PATH_detect_config_uri)selfr   	__class__s     `/home/www/backend.miabetepe.com/venv/lib/python3.12/site-packages/sift_py/data_import/parquet.pyr   zParquetUploadService.__init__   s[    9-"4>>43C3CD>")$..$:Q:Q"R    pathparquet_configshow_progressreturnc           	      .   t        |       | j                  j                  | j                  ddit	        j
                  d|j                         i            }|j                  dk7  r%t        d|j                   d|j                         	 |j	                         }	 |d	   }|d
   }t        ||       5 }	ddi}
| j                  j                  ||
|	      }|j                  dk7  r%t        d|j                   d|j                         t        | j                  |      cddd       S # t        j                  j                  t        f$ r t        d|j                         w xY w# t        $ r}t        d|       d}~ww xY w# 1 sw Y   yxY w)a  
        Uploads the Parquet file pointed to by `path` using a custom Parquet config.

        Args:
            path: The path to the Parquet file.
            parquet_config: The Parquet config.
            show_progress: Whether to show the status bar or not.
        Content-Encodingzapplication/octet-streamr%   )urlheadersdata   z3Config file upload request failed with status code . Invalid response: 	uploadUrldataImportId Response missing required keys: N)disablez1Data file upload request failed with status code )_extract_parquet_footer_sessionpostr   jsondumpsto_dictstatus_code	ExceptiontextdecoderJSONDecodeErrorKeyErrorr	   r   r   )r    r$   r%   r&   responseupload_info
upload_urldata_import_idefr+   s              r"   uploadzParquetUploadService.upload"   s    	 %==%%  "$> -~/E/E/GHI & 
 3&EhFZFZE[[]^f^k^k]lm 	B"--/K	D)+6J"-n"=N $M(9: 	Fa"$>G }})) * H ##s*GH\H\G]]_`h`m`m_no  %T__nE!	F 	F ,,h7 	B0@AA	B  	D>qcBCC	D	F 	Fs1   D- 
E, 7A,F-<E),	F5FFFr*   c                 n   t        |      }|j                  dvrt        d|j                   d      | j                  j	                  | j
                  t        j                  ||j                         d            }|j                  dk7  r%t        d|j                   d|j                         	 |j                         }	 |d   }t        | j                  |      S # t        j                  j                  t        f$ r}t        d	|       d
}~ww xY w# t        $ r}t        d|       d
}~ww xY w)z]
        Uploads the Parquet file pointed to by `url` using a custom Parquet config.
        )s3httphttpszInvalid URL scheme: 'z*'. Only S3 and HTTP(S) URLs are supported.)r*   r%   )r*   r,   r-   z+URL upload request failed with status code r.   r/   Nr1   r2   )r   schemer;   r5   r6   r   r7   r8   r9   r:   r<   r=   r>   r?   r   r   )r    r*   r%   
parsed_urlr@   rA   rD   rC   s           r"   upload_from_urlz$ParquetUploadService.upload_from_url]   sI    c]
$;;'
(9(9'::de  ==%%&4&<&<&> & 
 3&=h>R>R=SSUV^VcVcUde 	6"--/K	D"-n"=N !.AA ,,h7 	60455	6
  	D>qcBCC	Ds0   3C D #DDD	D4!D//D4N
asset_name	time_pathtime_formatcomplex_types_import_moderun_namerun_idrelative_start_timec	                     | j                  |      }	||	d<   ||	d   d   d<   ||	d   d   d<   |||	d   d   d<   ||	d<   |||	d<   |||	d	<   t        |	      }
| j                  ||
      S )
a  
        Uploads the Parquet file pointed to by `path` to the specified asset. This function will
        automatically generate the Parquet Config using the footer. See the options
        below for what parameters can be overridden. Use `upload` if you need to specify a custom Parquet config.

        Set `time_path` to specify which column contains timestamp information and `time_format`
        to specify the time data format. Default is `TimeFormatType.ABSOLUTE_UNIX_NANOSECONDS`.

        Override `complex_types_import_mode` to specify how to import complex types (maps and list). Default is both strings and bytes.
        Override `run_name` to specify the name of the run to create for this data. Default is None.
        Override `run_id` to specify the id of the run to add this data to. Default is None.
        Override `relative_start_time` if a relative time format is used. Default is None.
        rN   flat_datasettime_columnr$   formatrT   rQ   rR   rS   )_detect_config_flat_datasetr   rF   )r    rN   r$   rO   rP   rQ   rR   rS   rT   config_infor%   s              r"   flat_dataset_uploadz(ParquetUploadService.flat_dataset_upload   s    0 66t<$.L!=FN#M26:?JN#M28<*PcK'67LM3L/0&.K
#$*K!&{3{{400r#   c                 |   t        |      \  }}t        j                  |      j                  d      }t	        j
                  |dd      }t        j                  |j                               }| j                  j                  | j                  |ddi      }|j                  dk7  r%t        d|j                   d	|j                         	 t        |j	                               }d|vrt        d|       ||d   d<   |d   S # t        j                   j"                  t$        f$ r}	t        d
|	       d}	~	ww xY w)zjReturns a dictionary representing the flat dataset Parquet config detected
        from the file.
        zutf-8!DATA_TYPE_KEY_PARQUET_FLATDATASET)r,   typer)   gzip)r*   r,   r+   r-   z.Detect config request failed with status code r.   r/   Nr%   z4Parquet config missing from detect config response: footer_offset)r4   base64	b64encodedecoder7   r8   r_   compressencoder5   r6   r   r:   r;   r<   r
   r=   r>   r?   )
r    r$   footer_bytesr`   encoded_datarequest_datacompressed_datar@   rZ   rD   s
             r"   rY   z0ParquetUploadService._detect_config_flat_dataset   sQ    '>d&C#m''5<<WEzz$;
 --(;(;(=>==%%''oHZ\bGc & 
 3&@AUAU@VVXYaYfYfXgh 	64X]]_EK ;.RS^R_`aa :G$%o6+,, ,,h7 	60455	6s   D #D;(D66D;)T)__name__
__module____qualname__r   r   r   r   __annotations__strr   r   r   r   boolr   rF   rM   r   ABSOLUTE_UNIX_NANOSECONDSr   BOTHr   r[   dictrY   __classcell__)r!   s   @r"   r   r      sI   /K)H=MLS. S #	9FCI9F &9F 	9F
 
9Fv'B'B &'B 
	'B\ '5&N&NGhGmGm"& $-1+1+1 CI+1 	+1
 $+1 $E+1 3-+1 +1 &c]+1 
+1Z!-c4i0@ !-T !-r#   r   filenamer'   c                    t        | d      5 }|j                  dd       |j                  d      }t        j                  d|dd       d   }|dd }|d	k7  rt        d
      |j                  |dz    d       |j                  |      }|t        j                  j                  |       t        |      z
  dz
  fcddd       S # 1 sw Y   yxY w)z1Return the Parquet footer bytes and footer offsetrbi      z<IN   r   s   PAR1z)Invalid Parquet file: missing magic bytes)
openseekreadstructunpack
ValueErrorosr$   getsizelen)rt   rE   footer_tail_bytes
footer_lenmagicrf   s         r"   r4   r4      s     
h	 	O	r1FF1I]]4):2A)>?B
!!"%GHII	a !$vvj)RWW__X6\9JJQNN	O 	O 	Os   B+CC)!ra   r_   r7   r   r}   pathlibr   typingr   r   r   urllib.parser   r   sift_py.data_import._utilsr	   r
   sift_py.data_import.configr   )sift_py.data_import.parquet_complex_typesr   sift_py.data_import.statusr   sift_py.data_import.time_formatr   sift_py.restr   r   r   rn   bytesintr4    r#   r"   <module>r      si       	   ) ) * O 4 W 8 : 5B-< B-JOeCI&6 O5;L Or#   