ITPub博客

首页 > 数据库 > Oracle > Read an Excel xlsx with PL/SQL

Read an Excel xlsx with PL/SQL

原创 Oracle 作者:wildck 时间:2013-09-28 22:57:31 0 删除 编辑

At the OTN SQL and PLSQL forum I promised to publish some code I use for a project I’ still working on. This code allows you to select the content from an Excel document

select * from table( as_read_xlsx.read( as_read_xlsx.file2blob( 'DOC', 'Book1.xlsx' ) ) ); SHEET_NR SHEET_NAME ROW_NR COL_NR CELL  CEL STRING_VAL NUMBER_VAL DATE_VAL ---------- ---------- ------ ------ ----- --- ---------- ---------- -------------------------- 1 Mijn naam 1 1 A1    N 11 1 Mijn naam 1 2 B1    N 12 1 Mijn naam 1 3 C1    N 13 1 Mijn naam 2 1 A2    N 21 1 Mijn naam 2 2 B2    N 22 1 Mijn naam 2 3 C2    N 23 1 Mijn naam 3 1 A3    N 31 1 Mijn naam 3 2 B3    N 32 1 Mijn naam 3 3 C3    N 33 1 Mijn naam 4 4 D4    S   D4 1 Mijn naam 6 2 B6    D 1 Mijn naam 7 2 B7    D 1 Mijn naam 8 2 B8    D 1 Mijn naam 9 2 B9    D 1 Mijn naam 10 2 B10   D 2 Sheet3 2 2 B2    S Test 2 Sheet3 3 3 C3    D 19-JAN-2013 20:17:00 2 Sheet3 4 1 A4    S Anton 18 rows selected.
 
CREATE OR REPLACE package as_read_xlsx
is
/**********************************************
**
** Author: Anton Scheffer
** Date: 19-01-2013
** Website: http://technology.amis.nl/blog
**
** Changelog:
** 18-02-2013 - Ralph Bieber
                Handle cell type "str" to prevent ORA-06502
                if cell content is a string calculated by formula, 
                then cell type is "str" instead of "s" and value is inside  tag
** 19-02-2013 - Ralph Bieber
                Add column formula in tp_one_cell record, to show, if value is calculated by formula 
** 20-02-2013 - Anton Scheffer
                Handle cell types 'inlineStr' and 'e' to prevent ORA-06502
** 19-03-2013 - Anton Scheffer
                Support for formatted and empty strings
                Handle columns per row to prevent ORA-31186: Document contains too many nodes 
** 12-06-2013 - Anton Scheffer
                Handle sharedStrings.xml on older Oracle database versions

******************************************************************************
******************************************************************************
Copyright (C) 2013 by Anton Scheffer

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

******************************************************************************
*************************************************************************** */
/*
**
** Some examples
**
--
-- every sheet and every cell
    select *
    from table( as_read_xlsx.read( as_read_xlsx.file2blob( 'DOC', 'Book1.xlsx' ) ) )
--
-- cell A3 from the first and the second sheet
    select *
    from table( as_read_xlsx.read( as_read_xlsx.file2blob( 'DOC', 'Book1.xlsx' ), '1:2', 'A3' ) )
--
-- every cell from the sheet with the name "Sheet3"
    select *
    from table( as_read_xlsx.read( as_read_xlsx.file2blob( 'DOC', 'Book1.xlsx' ), 'Sheet3' ) )
--
*/
  type tp_one_cell is record
    ( sheet_nr number(2)
    , sheet_name varchar(4000)
    , row_nr number(10)
    , col_nr number(10)
    , cell varchar2(100)
    , cell_type varchar2(1)
    , string_val varchar2(4000)
    , number_val number
    , date_val date
    , formula varchar2(4000)
  );
  type tp_all_cells is table of tp_one_cell;
--
  function read( p_xlsx blob, p_sheets varchar2 := null, p_cell varchar2 := null )
  return tp_all_cells pipelined;
--
  function file2blob
    ( p_dir varchar2
    , p_file_name varchar2
    )
  return blob;
--

END as_read_xlsx;
/
CREATE OR REPLACE package body as_read_xlsx
is
--
  function read( p_xlsx blob, p_sheets varchar2 := null, p_cell varchar2 := null )
  return tp_all_cells pipelined
  is
    t_date1904 boolean;
    type tp_date is table of boolean index by pls_integer;
    t_xf_date tp_date;
    t_numfmt_date tp_date;
    type tp_strings is table of varchar2(32767) index by pls_integer;
    t_strings tp_strings;
    t_sheet_ids tp_strings;
    t_sheet_names tp_strings;
    t_r varchar2(32767);
    t_s varchar2(32767);
    t_val varchar2(32767);
    t_t varchar2(400);
    t_nr number;
    t_x pls_integer;
    t_xx pls_integer;
    t_ns varchar2(200) := 'xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"';
    t_nd dbms_xmldom.domnode;
    t_nd2 dbms_xmldom.domnode;
    t_nl dbms_xmldom.domnodelist;
    t_nl2 dbms_xmldom.domnodelist;
    t_nl3 dbms_xmldom.domnodelist;
    t_one_cell tp_one_cell;
--
    function blob2node( p_blob blob )
    return dbms_xmldom.domnode
    is
    begin
      if p_blob is null or dbms_lob.getlength( p_blob ) = 0
      then
        return null;
      end if;
      return dbms_xmldom.makenode( dbms_xmldom.getdocumentelement( dbms_xmldom.newdomdocument( xmltype( p_blob, nls_charset_id( 'AL32UTF8' ) ) ) ) );
    end;
--  
    function blob2num( p_blob blob, p_len integer, p_pos integer )
    return number
    is
    begin
      return utl_raw.cast_to_binary_integer( dbms_lob.substr( p_blob, p_len, p_pos ), utl_raw.little_endian );
    end;
--  
    function little_endian( p_big number, p_bytes pls_integer := 4 )
    return raw
    is
    begin
      return utl_raw.substr( utl_raw.cast_from_binary_integer( p_big, utl_raw.little_endian ), 1, p_bytes );
    end;
--  
    function col_alfan( p_col varchar2 )
    return pls_integer
    is
    begin
      return ascii( substr( p_col, -1 ) ) - 64
           + nvl( ( ascii( substr( p_col, -2, 1 ) ) - 64 ) * 26, 0 )
           + nvl( ( ascii( substr( p_col, -3, 1 ) ) - 64 ) * 676, 0 );
    end;
--  
    function get_file
      ( p_zipped_blob blob
      , p_file_name varchar2
      )
    return blob
    is
      t_tmp blob;
      t_ind integer;
      t_hd_ind integer;
      t_fl_ind integer;
      t_encoding varchar2(10);
      t_len integer;
    begin
      t_ind := dbms_lob.getlength( p_zipped_blob ) - 21;
      loop
        exit when t_ind < 1 or dbms_lob.substr( p_zipped_blob, 4, t_ind ) = hextoraw( '504B0506' ); -- End of central directory signature
        t_ind := t_ind - 1;
      end loop;
--  
      if t_ind <= 0
      then
        return null;
      end if;
--  
      t_hd_ind := blob2num( p_zipped_blob, 4, t_ind + 16 ) + 1;
      for i in 1 .. blob2num( p_zipped_blob, 2, t_ind + 8 )
      loop
        if utl_raw.bit_and( dbms_lob.substr( p_zipped_blob, 1, t_hd_ind + 9 ), hextoraw( '08' ) ) = hextoraw( '08' )
        then
          t_encoding := 'AL32UTF8'; -- utf8
        else
          t_encoding := 'US8PC437'; -- IBM codepage 437
        end if;
        if p_file_name = utl_i18n.raw_to_char
                           ( dbms_lob.substr( p_zipped_blob
                                            , blob2num( p_zipped_blob, 2, t_hd_ind + 28 )
                                            , t_hd_ind + 46
                                            )
                           , t_encoding
                           )
        then
          t_len := blob2num( p_zipped_blob, 4, t_hd_ind + 24 ); -- uncompressed length
          if t_len = 0
          then
            if substr( p_file_name, -1 ) in ( '/', '\' )
            then  -- directory/folder
              return null;
            else -- empty file
              return empty_blob();
            end if;
          end if;
--  
          if dbms_lob.substr( p_zipped_blob, 2, t_hd_ind + 10 ) = hextoraw( '0800' ) -- deflate
          then
            t_fl_ind := blob2num( p_zipped_blob, 4, t_hd_ind + 42 );
            t_tmp := hextoraw( '1F8B0800000000000003' ); -- gzip header
            dbms_lob.copy( t_tmp
                         , p_zipped_blob
                         ,  blob2num( p_zipped_blob, 4, t_hd_ind + 20 )
                         , 11
                         , t_fl_ind +
 
 

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/28251841/viewspace-773563/,如需转载,请注明出处,否则将追究法律责任。

上一篇: 没有了~
请登录后发表评论 登录
全部评论

注册时间:2013-09-28

  • 博文量
    6
  • 访问量
    27282