
    ZTh?              
          d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZ	 d dlZ	 d dlZd dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dl m!Z!  G d d      Z"dee#ef   de#dedefdZ$dee#ef   dedejJ                  fdZ&dejJ                  dejJ                  dejJ                  fdZ'dejP                  de#de)de	e   dejJ                  f
dZ*dedejV                  dejV                  fd Z,ded!ejJ                  defd"Z-dede	e   fd#Z.y# e$ rZ ed      edZ[ww xY w# e$ rZ ed      edZ[ww xY w)$    N)defaultdict)	ExitStack)Path)DictListTupleUnioncastzThe h5py package is required to use the HDF5 upload service. Please include this dependency in your project by specifying `sift-stack-py[hdf5]`.zThe polars package is required to use the HDF5 upload service. Please include this dependency in your project by specifying `sift-stack-py[hdf5]`.)Hdf5DataCfg)	CsvConfig
Hdf5Config)CsvUploadService)DataImportService)NamedTemporaryFile)SiftRestConfigc            	       d    e Zd ZU dZeed<   eed<   defdZ	 dde	ee
f   deded	efd
Zd	efdZy)Hdf5UploadServicez'
    Service to upload HDF5 files.
    _csv_upload_service_prev_run_id	rest_confc                 2    t        |      | _        d| _        y )N )r   r   r   )selfr   s     ]/home/www/backend.miabetepe.com/venv/lib/python3.12/site-packages/sift_py/data_import/hdf5.py__init__zHdf5UploadService.__init__)   s    #3I#>     pathhdf5_configshow_progressreturnc                    t        |t              rt        |      n|}|j                         st	        d| d      t        |      }t               5 }g }|D ]R  }|j                  t        dd            }	t        ||	j                  |      }
|j                  |	j                  |
f       T |st	        d      |j                  j                  dk7  rc| j                  j                  |j                  j                        }|D ]'  \  }}
d|
j                   _        ||
j                   _        ) || _        n<|j                  j"                  dk7  r|j                  j"                  | _        nd| _        d}|D ]9  \  }}
| j                  j'                  ||
|	      }||})|j)                  |       ; 	 ddd       |S t	        d
      # 1 sw Y   xY w)aU  
        Uploads the HDF5 file pointed to by `path` using a custom HDF5 config.

        Args:
            path: The path to the HDF5 file.
            hdf5_config: The HDF5 config.
            show_progress: Whether to show the status bar or not.

        Returns:
            DataImportService used to get the status of the import
        zProvided path, 'z$', does not point to a regular file.wz.csv)modesuffixz2No data found for upload during processing of filer   N)r   zNo data uploaded by service)
isinstancestrr   is_file	Exception_split_hdf5_configsr   enter_contextr   _convert_to_csv_filenameappend_hdf5_configrun_namer   _create_run_csv_configrun_idr   uploadextend)r   r   r   r   
posix_pathsplit_configsstack	csv_itemsconfig	temp_file
csv_configr2   _import_servicefilenamenew_import_services                   r   r3   zHdf5UploadService.upload-   s   $ $.dC#8T$Zd
!!#.tf4XYZZ ,K8 [ (	>E57I' ?!//0BTZ0[\	1NN

   )..*!=>?  TUU ''00B611==k>V>V>_>_`%. ;MAz68J**34:J**1; %+!))00B6$/$<$<$C$C!$&! "N(1 >$*%)%=%=%D%Dj &E &" ")%7N"))*<=>C(	>T %!!9::[(	> (	>s   EG		Gc                     | j                   S )z-Return the run_id used in the previous upload)r   )r   s    r   get_previous_upload_run_idz,Hdf5UploadService.get_previous_upload_run_id{   s       r   N)T)__name__
__module____qualname____doc__r   __annotations__r&   r   r   r	   r   r   boolr   r3   rA    r   r   r   r   !   sl     *).  #	L;CIL;  L; 	L;
 
L;\!C !r   r   src_pathdst_filer   r    c                 X    t        | |      }t        ||      }|j                  |       |S )a  Converts the HDF5 file to a temporary CSV on disk that we will upload.

    Args:
        src_path: The source path to the HDF5 file.
        dst_file: The output CSV file path.
        hdf5_config: The HDF5 config.

    Returns:
        The CSV config for the import.
    )_convert_hdf5_to_dataframes_create_csv_config	write_csv)rI   rJ   r   	merged_dfcsv_cfgs        r   r+   r+      s/      ,HkBI i8G!Nr   c                    t        t              }|j                  j                  D ].  }|j                  |j
                  f}||   j                  |       0 g }t        j                  | ddd      5 }|j                         D ]'  \  \  }}}	t        ||||	      }
|j                  |
       ) 	 ddd       t        |      dkD  r~g }t        dt        |      d      D ]R  }|dz   t        |      k  r+||   }||dz      }t        ||      }|j                  |       ?|j                  ||          T |}t        |      dkD  r~|d   j                  d	      }|S # 1 sw Y   xY w)
zConvert the HDF5 file to a polars DataFrame.

    Args:
        src_path: The source path to the HDF5 file.
        hdf5_config: The HDF5 config.

    Returns:
        A polars DataFrame containing the data.
    rlatestT)libverswmrN   r      	timestamp)r   listr.   datatime_datasettime_columnr-   h5pyFileitems_extract_hdf5_data_to_dataframelenrange_merge_timeseries_dataframessort)rI   r   data_cfg_ts_mapdata_cfg	map_tupledata_framesh5f	time_pathtime_col	data_cfgsdf
next_roundidf1df2mergedrO   s                    r   rL   rL      s{    ALD@QO,,11 4**H,@,@A		"))(34 K	8S	= #0?0E0E0G 	#,!Y90i9UBr"	## k
Q

q#k*A. 	2A1us;''!!n!!a%(5c3?!!&)!!+a.1	2 ! k
Q
 A##K0I+# #s   2;EE!rp   rq   c           	         | j                   D cg c]
  }|dk7  s	| }}|j                   D cg c]
  }|dk7  s	| }}t        |      t        |      z  }|rt        j                         }|j	                         }|D ]  }|j                  || d| i      } | j                  |ddd      }|D ]s  }| d| }	|j                  t        j                  t        j                  |      t        j                  |	      g      j                  |            j                  |	      }u |S | j                  |ddd      }|S c c}w c c}w )zDMerge two timeseries dataframes together. Handles duplicate channelsrX   r<   fullT)onhowcoalesce)columnssetuuiduuid4clonerenamejoinwith_columnsplrw   colaliasdrop)
rp   rq   r   df1_channelsdf2_channelsdup_channelsuiddf2_renamedrO   temp_col_names
             r   rc   rc      sN    $';;EC#2DCELE#&;;EC#2DCELE|$s<'88Ljjliik 	DC%,,ccU!C5>-BCK	D HH[[ftHT	   	"C"e1SENM!..RVVC["&&*?@AGGLd=! 	"  HHS[ftHL	1 FEs   
EE
EE	hdf5_filerj   rk   hdf5_data_configsc           
         || vrt        d|       t        t        j                  | |         }t	        j
                  |dd       }|dz
  }|j                  d   |k  rt        | d| d      ||j                  |      }|j                  t        j                  k(  r|j                  t        j                        }t	        j
                  d|i      }|D ]  }	|	j                  | vrt        d|	j                         ||	j                  k(  sJ d| d	|	j                          ||	j                  k(  sJ d
| d|	j                          t        t        j                  | |	j                           }
t	        j
                  |
dd       }|	j                  dz
  }|j                  d   |k  r1t        |	j                   d|	j                   d|	j                         ||j                  |      }t!        |      t!        |      k7  r0t        |	j                   dt!        |       dt!        |       d      |j                  t        j                  k(  r|j                  t        j                        }|	j"                  dk(  r9t%        |	j&                  D cg c]  }|j(                   c}      rt+        |	|      }|j-                  |j/                  |	j                              } |S c c}w )ah  Extract data from an hdf5_file to a polars DataFrame.

    Args:
        hdf5_file: HDF5 File
        time_path: HDF5 time array path
        time_col: HDF5 time array col (1-indexed)
        hdf5_data_config: List of HDF5 Data Configs being extracted

    Returns:
        A multi-column polars DataFrame containing the timestamps and associated channels
    z#HDF5 file does not contain dataset NrV   z: time_column=z out of rangerX   )rZ   zWorking time dataset z) does not match data cfg defined dataset zWorking time col z% does not match data cfg defined col z: value_column=z out of range for z1: time and value columns have different lengths (z vs )CHANNEL_DATA_TYPE_ENUM)r(   r
   r]   Datasetr   	DataFrameshaperx   dtypeBinaryStringvalue_datasetr[   r\   value_columnr,   ra   	data_typeany
enum_types	is_signed_convert_signed_enumsr   r   )r   rj   rk   r   r[   df_timetime_idxtime_series
data_framehdf5_data_configr   df_valueval_idxvalue_series	enum_types                  r   r`   r`      s    $ 	!=i[IJJi	&:;Lll<?+G!|H}}Q8#9+^H:]KLL'//(34K BII%!&&ryy1K#=>J- )X--:ABRB`B`Aabcc ,999 	
#I;.WXhXuXuWvw	
9 +777 	
z)NO_OkOkNlm	
7 T\\95E5S5S+TU <<a 01"//!3>>!'#(()9I9V9V8WWijz  kI  kI  jJ  K    0 0 9:{s<00#(())Z[^_j[kZllpqt  vB  rC  qD  DE  F 
 *',,RYY7L %%)AAc2B2M2MNYY  NG
 11A<PL,,\-?-?@P@U@U-VW
S)XV  Os   K+
rf   rZ   c                 *   t        | j                  D cg c]  }|j                   c}      }| j                  D ]  }|j                  r|j                  dk\  r|j                  dk  r%t	        | j
                   d|j                   d      |j                  dz   }||v r't	        | j
                   d|j                   d|       ||_         t        j                  |j                         j                  t        j                              S c c}w )a'  
    Convert signed enums to unsigned ints for ingestion
    Ignores keys >= 0, such as those which may have been converted previously by the user
    Will raise an exception if casting will cause a collision with an existing key
    Or otherwise cannot cast signed negative int to a uint32
    r   i   z: Cannot convert key z/ to uint32 due to being below valid int32 rangel        z: Converting key z, to unsigned int collides with existing key )ry   r   keyr   r(   r,   r   Seriesto_numpyastypenpuint32)rf   rZ   r   cur_enum_keysunsigned_keys        r   r   r   2  s     8K8KL9LMM(( %	""immq&8==>)==/!6y}}oEtu  !}}0=(==/!29==/Amnzm{|  %	% 99T]]_++BII677# Ms   DrO   c                    | j                   j                  | j                   j                  | j                   j                  d| j                   j                  j
                  d| j                   j                  j                  dd}| j                   j                  D ci c]  }|j                  | }}|j                  d   dk(  sJ d|j                  d           i }t        |j                  dd       D ]W  \  }}||   }|dz   }	|j                  |j                  |j                  |j                  |j                  |j                  d	||	<   Y ||d
<   t!        |      S c c}w )zConstruct a CsvConfig from a Hdf5Config

    Args:
        hdf5_config: The HDF5 config
        merged_df: The merged dataFrame of data

    Returns:
        The CSV config.
    rW   rV   )formatcolumn_numberrelative_start_time)
asset_namer/   r2   first_data_rowr\   r   rX   zOUnexpected merged DataFrame layout. Expected first column to be timestamp, not N)r,   r   unitsdescriptionr   bit_field_elementsdata_columns)r.   r   r/   r2   timer   r   rZ   r,   rx   	enumerater   r   r   r   r   r   )
r   rO   csv_config_dictd_cfg
config_mapr   idxchannel_namerf   col_nums
             r   rM   rM   M  sj    "..99,,55**11!..33::#.#;#;#@#@#T#T

O 2=1I1I1N1NO%**e#OJOQ;. 
YZcZkZklmZnYop. L&y'8'8'<= 

\l+'MM!++^^#//"--"*"="=!
W

 '3ON#_%%+ Ps   E c           	         | j                   j                  | j                   j                  | j                   j                  | j                   j                  | j                   j
                  D cg c]  }|j                  dk7  r| c}d}g }|d   r|j                  t        |             | j                   j
                  D ]  }|j                  dk7  rt        | j                   j                  | j                   j                  | j                   j                  | j                   j                  |gd      }|j                  |        |S c c}w )a+  
    Split up hdf5_config into separate configs used to generate each CSV file
    Needed as string channels cannot be merged without creating empty string data points in the app

    Args:
        hdf5_config: The HDF5 config.

    Returns:
        List of HDF5Configs for later CSV conversion
    CHANNEL_DATA_TYPE_STRING)r   r/   r2   r   rZ   rZ   )	r.   r   r/   r2   r   rZ   r   r-   r   )r   rf   non_string_config_dictfiltered_hdf5_configsstring_configs        r   r)   r)   }  s;    "..99,,55**11((-- (4499
!!%?? 

  f%$$Z0F%GH,,11 4!;;")66AA'44==%2299#0055!

 	$$]34 ! 7
s   -E)/rz   collectionsr   
contextlibr   pathlibr   typingr   r   r   r	   r
   numpyr   r]   ImportErroreRuntimeErrorpolarsr   sift_py.data_import._configr   sift_py.data_import.configr   r   sift_py.data_import.csvr   sift_py.data_import.statusr   sift_py.data_import.tempfiler   sift_py.restr   r   r&   r+   r   rL   rc   r^   intr`   r   r   rM   r)   rH   r   r   <module>r      s    #    1 1  4 < 4 8 ; '\! \!~CI  	0)CI)-7)\\)Xbll  ",, <MyyMM M K(	M
 \\M`8K 8ryy 8RYY 86-&J -&2<< -&I -&`-!Z -!D4D -!e  
	^   
	^ s.   D D. D+	D&&D+.E3	D<<E