org.apache.solr.common.util.ContentStream.java及其实现类

org.apache.solr.common.util.ContentStream.java 主要是获取文件,URL,字节数组,字符串等的数据流.主要方法又InputStream getStream()和 Reader getReader().

其实现类:org.apache.solr.common.util.ContentStreamBase.java为抽象类.以下有URLStream.java,FileStream.java,StringStream.java,ByteArrayStream.java四个实现类.分别针对URL,文件,字符串,字节数组.

接口:ContentStream.java

package org.apache.solr.common.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;

/**
 *
 * @since solr 1.2
 */
public interface ContentStream {
  String getName();
  String getSourceInfo();
  String getContentType();

  /**
   * @return 数据流大小 或者 <code>null</code>
   */
  Long getSize(); //如果知道返回大小否则位null

  /**
   * 获取一个打开的数据流,那么必须负责关闭它.可以考虑这样使用:
   * <pre>
   *   InputStream stream = stream.getStream();
   *   try {
   *     // use the stream...
   *   }
   *   finally {
   *     IOUtils.closeQuietly(stream);
   *   }
   *  </pre>
   *
   *  为了保证正在运行,只有在第一次时调用<code>getStream()</code> 或者 <code>getReader()</code>,
   *  其他调用的运行行为时不明确的.
   *
   * 注意: 在属性(name, contentType, etc)保证被赋值之前,必须调用<code>getStream()</code> 或者
   *  <code>getReader()</code>.数据流只有在该方法调用时才可能懒加载.
   */
  InputStream getStream() throws IOException;

  /**
   * 获取一个打开的数据流,那么必须负责关闭它.可以考虑这样使用:
   * <pre>
   *   Reader reader = stream.getReader();
   *   try {
   *     // use the reader...
   *   }
   *   finally {
   *     IOUtils.closeQuietly(reader);
   *   }
   *  </pre>
   *
    *  为了保证正在运行,只有在第一次时调用<code>getStream()</code> 或者 <code>getReader()</code>,
   *  其他调用的运行行为时不明确的.
   *
   * 注意: 在属性(name, contentType, etc)保证被赋值之前,必须调用<code>getStream()</code> 或者
   *  <code>getReader()</code>.数据流只有在该方法调用时才可能懒加载.
   */
  Reader getReader() throws IOException;
}

抽象类:ContentStreamBase.java 及其实现类URLStream.java,FileStream.java,StringStream.java,ByteArrayStream.java

package org.apache.solr.common.util;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Locale;

/**
 * 三个ContentStream的具体实现 - 一个 File/URL/String
 *
 *
 * @since solr 1.2
 */
public abstract class ContentStreamBase implements ContentStream
{
  public static final String DEFAULT_CHARSET = "utf-8";

  protected String name;
  protected String sourceInfo;
  protected String contentType;
  protected Long size;

  //---------------------------------------------------------------------
  //---------------------------------------------------------------------
  /**
   * 从 内容类型字符串中得到字符编码设置,没有的话,返回null
   * @param contentType
   * @return
   */
  public static String getCharsetFromContentType( String contentType )
  {
    if( contentType != null ) {
        //获取contentType中"charset="的索引下标
      int idx = contentType.toLowerCase(Locale.ROOT).indexOf( "charset=" );
      if( idx > 0 ) {
        return contentType.substring( idx + "charset=".length() ).trim();
      }
    }
    return null;
  }

  //------------------------------------------------------------------------
  //------------------------------------------------------------------------

  /**
   * 根据<code>URL</code> 构造<code>ContentStream</code>
   *
   * 这里使用<code>URLConnection</code>来获得数据流的内容
   * @see  URLConnection
   */
  public static class URLStream extends ContentStreamBase
  {
    private final URL url;

    public URLStream( URL url ) {
      this.url = url;
      sourceInfo = "url";
    }

    @Override
    public InputStream getStream() throws IOException {
      URLConnection conn = this.url.openConnection();

      contentType = conn.getContentType();
      name = url.toExternalForm();
      size = new Long( conn.getContentLength() );
      return conn.getInputStream();
    }
  }

  /**
   * 根据<code>File</code> 构造<code>ContentStream</code>
   */
  public static class FileStream extends ContentStreamBase
  {
    private final File file;

    public FileStream( File f ) {
      file = f; 

      contentType = null; // ??
      name = file.getName();
      size = file.length();
      sourceInfo = file.toURI().toString();
    }

    @Override
    public String getContentType() {
      if(contentType==null) {
        InputStream stream = null;
        try {
          stream = new FileInputStream(file);
          char first = (char)stream.read();
          if(first == ‘<‘) {
            return "application/xml";
          }
          if(first == ‘{‘) {
            return "application/json";
          }
        } catch(Exception ex) {
        } finally {
          if (stream != null) try {
            stream.close();
          } catch (IOException ioe) {}
        }
      }
      return contentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      return new FileInputStream( file );
    }
  }

  /**
   * 根据<code>String</code> 构造<code>ContentStream</code>
   */
  public static class StringStream extends ContentStreamBase
  {
    private final String str;

    public StringStream( String str ) {
      this.str = str; 

      contentType = null;
      name = null;
      size = new Long( str.length() );
      sourceInfo = "string";
    }

    @Override
    public String getContentType() {
      if(contentType==null && str.length() > 0) {
        char first = str.charAt(0);
        if(first == ‘<‘) {
          return "application/xml";
        }
        if(first == ‘{‘) {
          return "application/json";
        }
        // find a comma? for CSV?
      }
      return contentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      return new ByteArrayInputStream( str.getBytes(DEFAULT_CHARSET) );
    }

    /**
     * 如果contentType中没有定义charset编码,则使用StringReader
     */
    @Override
    public Reader getReader() throws IOException {
      String charset = getCharsetFromContentType( contentType );
      return charset == null
        ? new StringReader( str )
        : new InputStreamReader( getStream(), charset );
    }
  }

  /**
   * 基本的reader 实现. 如果contentType声明了一个charset,
   * 则使用charset中的编码,否则使用utf-8.
   */
  @Override
  public Reader getReader() throws IOException {
    String charset = getCharsetFromContentType( getContentType() );
    return charset == null
      ? new InputStreamReader( getStream(), DEFAULT_CHARSET )
      : new InputStreamReader( getStream(), charset );
  }

  //------------------------------------------------------------------
  // Getters / Setters for overrideable attributes
  //------------------------------------------------------------------

  @Override
  public String getContentType() {
    return contentType;
  }

  public void setContentType(String contentType) {
    this.contentType = contentType;
  }

  @Override
  public String getName() {
    return name;
  }

  public void setName(String name) {
    this.name = name;
  }

  @Override
  public Long getSize() {
    return size;
  }

  public void setSize(Long size) {
    this.size = size;
  }

  @Override
  public String getSourceInfo() {
    return sourceInfo;
  }

  public void setSourceInfo(String sourceInfo) {
    this.sourceInfo = sourceInfo;
  }

  /**
   *  根据<code>File</code> 构造<code>ContentStream</code>
   */
  public static class ByteArrayStream extends ContentStreamBase
  {
    private final byte[] bytes;

    public ByteArrayStream( byte[] bytes, String source ) {
      this.bytes = bytes; 

      this.contentType = null;
      name = source;
      size = new Long(bytes.length);
      sourceInfo = source;
    }

    @Override
    public InputStream getStream() throws IOException {
      return new ByteArrayInputStream( bytes );
    }
  }
}

时间： 2024-07-29 03:17:44

org.apache.solr.common.util.ContentStream.java及其实现类

org.apache.solr.common.util.ContentStream.java及其实现类的相关文章

solrj:org.apache.solr.common.util.NamedList.java

mavne install 报错org.apache.maven.surefire.util.SurefireReflectionException: java.lang.reflect.InvocationTargetException

solr启动时报错org.apache.solr.common.SolrException: undefined field text的解决办法

Apache Solr采用Java开发、基于Lucene的全文搜索服务器

玩转大数据系列之Apache Pig如何与Apache Solr集成（二）

solr环境搭建及java小demo

Apache Solr 之使用SolrJ操作索引库

Apache Solr 之环境搭建

02 Apache Solr: 概览 Solr在信息系统架构中的位置