2013-10-02 44 views
7

SQL'in Hive'daki "ofset" i ile aynı işlevselliği nasıl sağlayabilirim?Ofset işlevselliği Hive

SELECT * from table LIMIT 20 OFFSET 30

teşekkürler!

+0

Bu, 'MySQL'de sınır 20, 30 'işleviyle aynı işleve sahiptir. – nervosol

+0

http://stackoverflow.com/questions/11750312/hive-ql-limiting-number-of-rows-per-each-item –

cevap

12

Ben bu davranışı taklit edecek yerleşik bir fonksiyon ya UDF habersiz ama HIVE 0.13 kullanıyorsanız, istenilen sonucu elde etmek için bir tur hakkında şekilde row_number() işlevini kullanabilirsiniz.

select pk, col_1, col_2, ... , col_n 
from (
    select pk, col_1, col_2, ... , col_n, row_number() OVER (ORDER by pk) as rank 
    from some_database.some_table 
    ) x 
where rank between 31 and 50 
0
public class CountRatingQueryBuilder { 

private static final String SCORING_TABLE_NAME = "web_resource_rating"; 

private final Connection connection; 
private final ScoringMetadata scoringMetadata; 

private final SelectSelectStep select; 
private final Factory create; 

public CountRatingQueryBuilder(Connection connection, ScoringMetadata scoringMetadata){ 
    this.connection = connection; 
    this.scoringMetadata = scoringMetadata; 

    create = new Factory(this.connection, SQLDialect.MYSQL); 
    select = create.select(); 

    withSelectFieldsClause(); 
} 

public CountRatingQueryBuilder withLimit(int limit){ 
    select.limit(limit); 
    return this; 
} 

public CountRatingQueryBuilder withRegionId(Integer regionId){ 
    select.where(REGION_ID.field().equal(regionId)); 
    return this; 
} 

public CountRatingQueryBuilder withResourceTypeId(int resourceTypeId){ 
    select.where(RESOURCE_TYPE_ID.field().equal(resourceTypeId)); 
    return this; 
} 

public CountRatingQueryBuilder withRequestTimeBetween(long beginTimestamp, long endTimestamp){ 
    select.where(REQUEST_TIME.field().between(beginTimestamp, endTimestamp)); 
    return this; 
} 

public CountRatingQueryBuilder withResourceId(int resourceId){ 
    select.where(RESOURCE_ID.field().equal(resourceId)); 
    return this; 
} 



protected void withGroupByClause(){ 
    select.groupBy(REGION_ID.field()); 
    select.groupBy(RESOURCE_TYPE_ID.field()); 
    select.groupBy(RESOURCE_ID.field()); 
    select.groupBy(CONTENT_ID.field()); 
} 

protected void withSelectFieldsClause(){ 
    select.select(REGION_ID.field()); 
    select.select(RESOURCE_TYPE_ID.field()); 
    select.select(CONTENT_ID.field()); 
    select.select(RESOURCE_ID.field()); 
    select.select(Factory.count(HIT_COUNT.field()).as(SUM_HIT_COUNT.fieldName())); 
} 

protected void withFromClause(){ 
    select.from(SCORING_TABLE_NAME); 
} 

protected void withOrderByClause(){ 
    select.orderBy(SUM_HIT_COUNT.field().desc()); 
} 

public String build(){ 
    withGroupByClause(); 
    withOrderByClause(); 
    withFromClause(); 
    return select.getSQL().replace("offset ?","");//dirty hack for MySQL dialect. TODO: we can try to implement our own SQL dialect for Hive :) 

} 

public List<ResultRow> buildAndFetch(){ 
    String sqlWithPlaceholders = build(); 

    List<ResultRow> scoringResults = new ArrayList<ResultRow>(100); 
    List<Record> recordResults = create.fetch(sqlWithPlaceholders, ArrayUtils.subarray(select.getBindValues().toArray(new Object[select.getBindValues().size()]),0, select.getBindValues().size()-1));//select.fetch(); 
    for(Record record : recordResults){ 
     ResultRowBuilder resultRowBuilder = ResultRowBuilder.create(); 

     resultRowBuilder.withContentType(scoringMetadata.getResourceType(record.getValue(RESOURCE_TYPE_ID.fieldName(), Integer.class))); 
     resultRowBuilder.withHitCount(record.getValue(SUM_HIT_COUNT.fieldName(), Long.class)); 
     resultRowBuilder.withUrl(record.getValue(CONTENT_ID.fieldName(), String.class)); 
     scoringResults.add(resultRowBuilder.build()); 
    } 
    return scoringResults; 
} 

} 

Umut bu aşağıdaki bağlantıdan kopyalanır doğru cevap: ayrıntılı olarak anlamak için jooq extend existing dialect. Adopt MySQL dialect to apache Hive dialect bakınız.