Tuesday, November 12, 2013

Configuring Hue 3.0


Configuring Hue with hadoop and hive in a pseudo distributed mode.

Prerequisites:

  • Hue 3.0
  • Aapche Hadoop 2.2
  • Apache Hive 0.12.0
Note:
Hue 3.0 only works with Hive 0.12.0
Hadoop should have the webhdfs feature.
Hive should have the hiveserver2.

Configurations:


core-site.xml

<configuration>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://localhost:9000</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/usr/local/hadoop-2.2.0/tmp</value>
  </property> 
<!-- Hue WebHDFS proxy user setting -->
  <property>
    <name>hadoop.proxyuser.hue.hosts</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.hue.groups</name>
    <value>*</value>
  </property>
</configuration>

hdfs-site.xml

<configuration>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
</property>
  <property>
    <name>dfs.permissions</name>
    <value>false</value>
</property>
  <property>
    <name>dfs.webhdfs.enabled</name>
    <value>true</value>
  </property>
</configuration>



mapred-site.xml

<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
 </property>
</configuration>



yarn-site.xml

<configuration>
<!-- Site specific YARN configuration properties -->
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>localhost:8025</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>localhost:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address</name>
    <value>localhost:8060</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
    <description>In case you do not want to use the default scheduler</description>
  </property>
  <property>
    <name>yarn.nodemanager.address</name>
    <value>localhost:8050</value>
    <description>the nodemanagers bind to this port</description>
  </property>
</configuration>


hue.ini

#####################################
# DEVELOPMENT EDITION
#####################################

# Hue configuration file
# ===================================
#
# For complete documentation about the contents of this file, run
#       $ /build/env/bin/hue config_help
#
# All .ini files under the current directory are treated equally.  Their
# contents are merged to form the Hue configuration, which can
# can be viewed on the Hue at
#       http://:/dump_config


###########################################################################
# General configuration for core Desktop features (authentication, etc)
###########################################################################

[desktop]

  send_dbug_messages=1

  # To show database transactions, set database_logging to 1
  database_logging=0

  # Set this to a random string, the longer the better.
  # This is used for secure hashing in the session store.
  secret_key=abcdefghijklmnopqrstuvwxyz1234567890

  # Webserver listens on this address and port
  http_host=0.0.0.0
  http_port=8000

  # Time zone name
  time_zone=America/Los_Angeles

  # Enable or disable Django debug mode
  ## django_debug_mode=true

  # Enable or disable backtrace for server error
  ## http_500_debug_mode=true

  # Server email for internal error messages
  ## django_server_email='hue@localhost.localdomain'

  # Email backend
  ## django_email_backend=django.core.mail.backends.smtp.EmailBackend

  # Webserver runs as this user
  ## server_user=hue
  ## server_group=hue

  # If set to false, runcpserver will not actually start the web server.
  # Used if Apache is being used as a WSGI container.
  ## enable_server=yes

  # Number of threads used by the CherryPy web server
  ## cherrypy_server_threads=10

  # Filename of SSL Certificate
  ## ssl_certificate=

  # Filename of SSL RSA Private Key
  ## ssl_private_key=

  # Default encoding for site data
  ## default_site_encoding=utf-8

  # Help improve Hue with anonymous usage analytics.
  # Use Google Analytics to see how many times an application or specific section of an application is used, nothing more.
  ## collect_usage=true

  # Administrators
  # ----------------
  [[django_admins]]
    ## [[[admin1]]]
    ## name=john
    ## email=john@doe.com

  # UI customizations
  # -------------------
  [[custom]]

  # Top banner HTML code
  ## banner_top_html=

  # Configuration options for user authentication into the web application
  # ------------------------------------------------------------------------
  [[auth]]

    # Authentication backend. Common settings are:
    # - django.contrib.auth.backends.ModelBackend (entirely Django backend)
    # - desktop.auth.backend.AllowAllBackend (allows everyone)
    # - desktop.auth.backend.AllowFirstUserDjangoBackend
    #     (Default. Relies on Django and user manager, after the first login)
    # - desktop.auth.backend.LdapBackend
    # - desktop.auth.backend.PamBackend
    # - desktop.auth.backend.SpnegoDjangoBackend
    # - desktop.auth.backend.RemoteUserDjangoBackend
    # - desktop.auth.backend.OAuthBackend
    # - libsaml.backend.SAML2Backend
    ## backend=desktop.auth.backend.AllowFirstUserDjangoBackend

    # Backend to synchronize user-group membership with
    ## user_group_membership_synchronization_backend=desktop.auth.backend.LdapSynchronizationBackend

    ## pam_service=login

    # When using the desktop.auth.backend.RemoteUserDjangoBackend, this sets
    # the normalized name of the header that contains the remote user.
    # The HTTP header in the request is converted to a key by converting
    # all characters to uppercase, replacing any hyphens with underscores
    # and adding an HTTP_ prefix to the name. So, for example, if the header
    # is called Remote-User that would be configured as HTTP_REMOTE_USER
    #
    # Defaults to HTTP_REMOTE_USER
    ## remote_user_header=HTTP_REMOTE_USER

  # Configuration options for connecting to LDAP and Active Directory
  # -------------------------------------------------------------------
  [[ldap]]

    # The search base for finding users and groups
    ## base_dn="DC=mycompany,DC=com"

    # The NT domain to connect to (only for use with Active Directory)
    ## nt_domain=mycompany.com

    # URL of the LDAP server
    ## ldap_url=ldap://auth.mycompany.com

    # A PEM-format file containing certificates for the CA's that
    # Hue will trust for authentication over TLS.
    # The certificate for the CA that signed the
    # LDAP server certificate must be included among these certificates.
    # See more here http://www.openldap.org/doc/admin24/tls.html.
    ## ldap_cert=
    ## use_start_tls=true

    # Distinguished name of the user to bind as -- not necessary if the LDAP server
    # supports anonymous searches
    ## bind_dn="CN=ServiceAccount,DC=mycompany,DC=com"

    # Password of the bind user -- not necessary if the LDAP server supports
    # anonymous searches
    ## bind_password=

    # Pattern for searching for usernames -- Use  for the parameter
    # For use when using LdapBackend for Hue authentication
    ## ldap_username_pattern="uid=,ou=People,dc=mycompany,dc=com"

    # Create users in Hue when they try to login with their LDAP credentials
    # For use when using LdapBackend for Hue authentication
    ## create_users_on_login = true

    # Use search bind authentication.
    ## search_bind_authentication=true

    [[[users]]]

      # Base filter for searching for users
      ## user_filter="objectclass=*"

      # The username attribute in the LDAP schema
      ## user_name_attr=sAMAccountName

      [[[groups]]]

      # Base filter for searching for groups
      ## group_filter="objectclass=*"

      # The username attribute in the LDAP schema
      ## group_name_attr=cn

  # Configuration options for specifying the Desktop Database.  For more info,
  # see http://docs.djangoproject.com/en/1.1/ref/settings/#database-engine
  # ------------------------------------------------------------------------
  [[database]]
    # Database engine is typically one of:
    # postgresql_psycopg2, mysql, or sqlite3
    #
    # Note that for sqlite3, 'name', below is a filename;
    # for other backends, it is the database name.
    ## engine=sqlite3
    ## host=
    ## port=
    ## user=
    ## password=
    ## name=desktop/desktop.db

  # Configuration options for specifying the Desktop session.
  # For more info, see https://docs.djangoproject.com/en/1.4/topics/http/sessions/
  # ------------------------------------------------------------------------
  [[session]]
    # The cookie containing the users' session ID will expire after this amount of time in seconds.
    ## ttl=60*60*24*14

    # The cookie containing the users' session ID will be secure.
    # Should only be enabled with HTTPS.
    ## secure=false

  # Configuration options for connecting to an external SMTP server
  # ------------------------------------------------------------------------
  [[smtp]]

    # The SMTP server information for email notification delivery
    host=localhost
    port=25
    user=
    password=

    # Whether to use a TLS (secure) connection when talking to the SMTP server
    tls=no

    # Default email address to use for various automated notification from Hue
    ## default_from_email=hue@localhost


  # Configuration options for Kerberos integration for secured Hadoop clusters
  # ------------------------------------------------------------------------
  [[kerberos]]

    # Path to Hue's Kerberos keytab file
    ## hue_keytab=
    # Kerberos principal name for Hue
    ## hue_principal=hue/hostname.foo.com
    # Path to kinit
    ## kinit_path=/path/to/kinit


  # Configuration options for using OAuthBackend login
  # ------------------------------------------------------------------------
  [[oauth]]
    # The Consumer key of the application
    ## consumer_key=XXXXXXXXXXXXXXXXXXXXX

    # The Consumer secret of the application
    ## consumer_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

    # The Request token URL
    ## request_token_url=https://api.twitter.com/oauth/request_token

    # The Access token URL
    ## access_token_url=https://api.twitter.com/oauth/access_token

    # The Authorize URL
    ## authenticate_url=https://api.twitter.com/oauth/authorize


###########################################################################
# Settings to configure SAML
###########################################################################

[libsaml]
  # Xmlsec1 binary path. This program should be executable by the user running Hue.
  ## xmlsec_binary=/usr/local/bin/xmlsec1

  # Create users from SSO on login.
  ## create_users_on_login=true

  # Required attributes to ask for from IdP.
  # This requires a comma separated list.
  ## required_attributes=uid

  # Optional attributes to ask for from IdP.
  # This requires a comma separated list.
  ## optional_attributes=

  # IdP metadata in the form of a file. This is generally an XML file containing metadata that the Identity Provider generates.
  ## metadata_file=

  # Private key to encrypt metadata with.
  ## key_file=

  # Signed certificate to send along with encrypted metadata.
  ## cert_file=

  # A mapping from attributes in the response from the IdP to django user attributes.
  ## user_attribute_mapping={'uid':'username'}

  # Have Hue initiated authn requests be signed and provide a certificate.
  ## authn_requests_signed=false

  # Have Hue initiated logout requests be signed and provide a certificate.
  ## logout_requests_signed=false

###########################################################################
# Settings to configure your Hadoop cluster.
###########################################################################

[hadoop]

  # Configuration for HDFS NameNode
  # ------------------------------------------------------------------------
  [[hdfs_clusters]]
    # HA support by using HttpFs

    [[[default]]]
      # Enter the filesystem uri
      fs_defaultfs=hdfs://localhost:9000

      # NameNode logical name.
      ## logical_name=

      # Use WebHdfs/HttpFs as the communication mechanism.
      # This should be the web service root URL, such as
      # http://namenode:50070/webhdfs/v1
      webhdfs_url=http://localhost:50070/webhdfs/v1

      # Change this if your HDFS cluster is Kerberos-secured
      ## security_enabled=false

      # Settings about this HDFS cluster. If you install HDFS in a
      # different location, you need to set the following.

      # Defaults to $HADOOP_HDFS_HOME or /usr/lib/hadoop-hdfs
      hadoop_hdfs_home=/usr/local/hadoop-2.2.0

      # Defaults to $HADOOP_BIN or /usr/bin/hadoop
      hadoop_bin=/usr/local/hadoop-2.2.0/bin/hadoop

      # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
      hadoop_conf_dir=/usr/local/hadoop-2.2.0/etc/hadoop

  # Configuration for YARN (MR2)
  # ------------------------------------------------------------------------
  [[yarn_clusters]]

    [[[default]]]
      # Enter the host on which you are running the ResourceManager
      resourcemanager_host=localhost

      # The port where the ResourceManager IPC listens on
      resourcemanager_port=8032

      # Whether to submit jobs to this cluster
      submit_to=True

      # Change this if your YARN cluster is Kerberos-secured
      ## security_enabled=false

      # Settings about this MR2 cluster. If you install MR2 in a
      # different location, you need to set the following.

      # Defaults to $HADOOP_MR2_HOME or /usr/lib/hadoop-mapreduce
      hadoop_mapred_home=/usr/local/hadoop-2.2.0

      # Defaults to $HADOOP_BIN or /usr/bin/hadoop
      hadoop_bin=/usr/local/hadoop-2.2.0/bin/hadoop

      # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
      hadoop_conf_dir=/usr/local/hadoop-2.2.0/etc/hadoop

      # URL of the ResourceManager API
      resourcemanager_api_url=http://localhost:8088

      # URL of the ProxyServer API
      proxy_api_url=http://localhost:8088

      # URL of the HistoryServer API
      history_server_api_url=http://localhost:19888

  # Configuration for MapReduce (MR1)
  # ------------------------------------------------------------------------
  #[[mapred_clusters]]

  #  [[[default]]]
      # Enter the host on which you are running the Hadoop JobTracker
      #jobtracker_host=localhost

      # The port where the JobTracker IPC listens on
      #jobtracker_port=54311

      # JobTracker logical name.
      ## logical_name=

      # Thrift plug-in port for the JobTracker
      ## thrift_port=9290

      # Whether to submit jobs to this cluster
      #submit_to=True

      # Change this if your MapReduce cluster is Kerberos-secured
      ## security_enabled=false

      # Settings about this MR1 cluster. If you install MR1 in a
      # different location, you need to set the following.

      # Defaults to $HADOOP_MR1_HOME or /usr/lib/hadoop-0.20-mapreduce
      #hadoop_mapred_home=/usr/local/hadoop

      # Defaults to $HADOOP_BIN or /usr/bin/hadoop
      #hadoop_bin=/usr/local/hadoop/bin/hadoop

      # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
      #hadoop_conf_dir=/usr/local/hadoop/conf

    # HA support by specifying multiple clusters
    # e.g.

    # [[[ha]]]
      # Enter the host on which you are running the failover JobTracker
      # jobtracker_host=localhost-ha


###########################################################################
# Settings to configure liboozie
###########################################################################

#[liboozie]
  # The URL where the Oozie service runs on. This is required in order for
  # users to submit jobs.
  ## oozie_url=http://localhost:11000/oozie

  # Requires FQDN in oozie_url if enabled
  ## security_enabled=false

  # Location on HDFS where the workflows/coordinator are deployed when submitted.
  ## remote_deployement_dir=/user/hue/oozie/deployments


###########################################################################
# Settings to configure the Oozie app
###########################################################################

#[oozie]
  # Location on local FS where the examples are stored.
  ## local_data_dir=..../examples

  # Location on local FS where the data for the examples is stored.
  ## sample_data_dir=...thirdparty/sample_data

  # Location on HDFS where the oozie examples and workflows are stored.
  ## remote_data_dir=/user/hue/oozie/workspaces

  # Maximum of Oozie workflows or coodinators to retrieve in one API call.
  ## oozie_jobs_count=100


###########################################################################
# Settings to configure Beeswax with Hive
###########################################################################

[beeswax]

  # Host where Hive server Thrift daemon is running.
  # If Kerberos security is enabled, use fully-qualified domain name (FQDN).
  hive_server_host=localhost

  # Port where HiveServer2 Thrift server runs on.
  hive_server_port=10000

  # Hive configuration directory, where hive-site.xml is located
  hive_conf_dir=/usr/local/hive-0.12.0/conf

  # Timeout in seconds for thrift calls to Hive service
  ## server_conn_timeout=120

  # Path to HiveServer2 start script
  hive_server_bin=/usr/local/hive-0.12.0/bin/hiveserver2

  # Set a LIMIT clause when browsing a partitioned table.
  # A positive value will be set as the LIMIT. If 0 or negative, do not set any limit.
  ## browse_partitioned_table_limit=250


###########################################################################
# Settings to configure Pig
###########################################################################

#[pig]
  # Location of piggybank.jar on local filesystem.
  ## local_sample_dir=/usr/share/hue/apps/pig/examples

  # Location piggybank.jar will be copied to in HDFS.
  ## remote_data_dir=/user/hue/pig/examples


###########################################################################
# Settings to configure Sqoop
###########################################################################

#[sqoop]
  # Sqoop server URL
  ## server_url=http://localhost:12000/sqoop


###########################################################################
# Settings to configure Proxy
###########################################################################

[proxy]
  # Comma-separated list of regular expressions,
  # which match 'host:port' of requested proxy target.
  ## whitelist=(localhost|127\.0\.0\.1):(50030|50070|50060|50075)

  # Comma-separated list of regular expressions,
  # which match any prefix of 'host:port/path' of requested proxy target.
  # This does not support matching GET parameters.
  ## blacklist=()


###########################################################################
# Settings to configure Impala
###########################################################################

#[impala]
  # Host of the Impala Server (one of the Impalad)
  ## server_host=localhost

  # Port of the Impala Server
  ## server_port=21050

  # Kerberos principal
  ## impala_principal=impala/hostname.foo.com

  # Turn on/off impersonation mechanism when talking to Impala
  ## impersonation_enabled=False


###########################################################################
# Settings to configure Hbase
###########################################################################

#[hbase]
  # Comma-separated list of HBase Thrift servers for
  # clusters in the format of '(name|host:port)'.
  ## hbase_clusters=(Cluster|localhost:9090)

  # Hard limit of rows or columns per row fetched before truncating.
  ## truncate_limit = 500


###########################################################################
# Settings to configure Solr Search
###########################################################################

[search]

  # URL of the Solr Server
  ## solr_url=http://localhost:8983/solr/

  # Requires FQDN in solr_url if enabled
  ## security_enabled=false

  ## Query sent when no term is entered
  ## empty_query=*:*


###########################################################################
# Settings to configure Job Designer
###########################################################################

[jobsub]

  # Location on local FS where examples and template are stored.
  ## local_data_dir=..../data

  # Location on local FS where sample data is stored
  ## sample_data_dir=...thirdparty/sample_data


###########################################################################
# Settings to configure Job Browser
###########################################################################

[jobbrowser]
  # Share submitted jobs information with all users. If set to false,
  # submitted jobs are visible only to the owner and administrators.
  ## share_jobs=true


###########################################################################
# Settings to configure the Zookeeper application.
###########################################################################

[zookeeper]

  [[clusters]]

    [[[default]]]
      # Zookeeper ensemble. Comma separated list of Host/Port.
      # e.g. localhost:2181,localhost:2182,localhost:2183
      ## host_ports=localhost:2181

      # The URL of the REST contrib service (required for znode browsing)
      ## rest_url=http://localhost:9998


###########################################################################
# Settings for the User Admin application
###########################################################################

[useradmin]
  # The name of the default user group that users will be a member of
  ## default_user_group=default



Permissions

  • The HDFS directory identified by the hadoop.tmp.dir will have to be writable by everyone.
  • The embedded metastore directory under hive should be writable by everyone.
Todo
     Coming soon, user authentication configuration in hue.

Thursday, September 12, 2013

Apache ODE on Tomcat 7 with Bitronix Transaction Manager



With the coming release of Apache ODE 1.3.6 and there on, one would need to integrate a transaction manager in Tomcat in order to use  an external database configuration for ODE.

Here I will list down the steps to integrate one of the open source transaction managers called Bitronix in tomcat and the associated changes in ODE web application.

Step 1: Download latest bitronix archive from here.

Step 2: Copy these jars available in the bitronix archive to Tomcat\lib 
  • btm-2.1.4.jar
  • btm-tomcat55-lifecycle-2.1.4.jar
  • geronimo-jta_1.1_spec-1.1.1.jar
  • slf4j-api-1.6.4.jar
  • slf4j-jdk14-1.6.4.jar 

Step 3: Follow Step 2 as instructed in the original guide of bitronix.

Step 4: Configure data sources
Create a file name named resources.properties under <tomcat>/conf/ directory with the below contents:
This config is for MYSQL database
 resource.ds2.className=bitronix.tm.resource.jdbc.lrc.LrcXADataSource  
 resource.ds2.uniqueName=jdbc/ode  
 resource.ds2.minPoolSize=10  
 resource.ds2.maxPoolSize=50  
 resource.ds2.driverProperties.driverClassName=com.mysql.jdbc.Driver  
 resource.ds2.driverProperties.url=jdbc:mysql://localhost:3306/ode  
 resource.ds2.driverProperties.user=root  
 resource.ds2.driverProperties.password=mysql  
 resource.ds2.allowLocalTransactions=true  
 resource.ds2.shareTransactionConnections=true  

Step 5: Associate the datasource created in the previous step for ODE
Create a file named ode.xml under <tomcat>/conf/Catalina/localhost/ directory with the below given contents:
 <Context reloadable="true" crossContext="true">    
      <Resource   
           name="jdbc/ode"   
           auth="Container" type="javax.sql.DataSource"   
           factory="bitronix.tm.resource.ResourceObjectFactory"   
           uniqueName="jdbc/ode" />  
 </Context>  

Step 6: Change the web.xml of ODE
Add these lines in the web.xml,
 <resource-ref>  
     <res-ref-name>jdbc/ode</res-ref-name>  
     <res-type>javax.sql.DataSource</res-type>  
     <res-auth>Container</res-auth>  
     <res-sharing-scope>Shareable</res-sharing-scope>  
 </resource-ref>  

Step 7: Remove duplicate jars
Remove geronimo-jta_1.1_spec-1.1.jar files from ode web application lib directory

Step 8: Edit ode-axis2.properties
 ode-axis2.tx.factory.class=org.apache.ode.axis2.util.TomcatFactory  
 ode-axis2.db.mode=EXTERNAL  
 ode-axis2.db.ext.dataSource=java:comp/env/jdbc/ode   

Wednesday, December 21, 2011

Understanding the Axis2 Non Blocking API


The Non blocking api of axis2 facilitates asynchronous communication at the API level and at the Transport level.
This document provides an understanding of how it works. The example in question uses dual transport channel.
For this feature to work it is necessary to enable WS-Addressing on both the client and the server hosting the external web services.

Scenario 1: The external web service being invoked provides response in the same execution thread. 

                      Fig: 1                                                  


1) The axis client program prepares the request and invokes the  sendReceiveNonBlocking(payload,callback) on the ServiceClient.

2) The call is propagated to the AxisEngine, which then starts a Http transport listener on a predefined port specified in axis2.xml and registers the callback object on the listener. The response for the request would be communicated to this callback listener at a later point.

3) A separate request execution thread is created and all the necessary objects are passed to it for further execution.

4) The call returns back to the client program. The client program can continue with processing without having to wait for the response. Thus we achieve API level asynchrony.

5) The request execution thread opens a transport out channel and sends the request data to the external web service.

6) The external web service upon completion of its operation, communicates the response back to the callback listener through another transport out channel. Thus we have dual channels for communication.

7) Finally HTTP 202 Ack is sent to the request execution thread through the initial channel in which the request was made. It is observed that the response and the 202 Ack comes back at almost the same time.

Since the request channel is expecting the acknowledgment all this time, there is possibility that it can timeout if the external web service takes more time to respond. This problem can be solved partially by setting the connection timeout to an appropriate value.

//connection timeout set to 60 seconds
options.setTimeOutInMilliSeconds(60 * 1000);


Scenario 2: The external web service being invoked provides response in a separate execution thread.

To enable this feature on a web service developed on axis2, an additional parameter has to be added in the services.xml for the web service.
<parameter name="messageReceiver.invokeOnSeparateThread"> true </parameter>
 

Fig:2
                                                                 

1) The axis client program prepares the request and invokes the sendReceiveNonBlocking(payload,callback) on the ServiceClient.

2) The call is propagated to the AxisEngine, which then starts a Http transport listener on a predefined port specified in axis2.xml and registers the callback object on the listener. The response for the request would be communicated to this callback listener at a later point.

3) A separate request execution thread is created and all the necessary objects are passed to it for further execution.

4) The call returns back to the client program. The client program can continue with processing without having to wait for the response. Thus we achieve API level asynchrony.

5) The request execution thread opens a transport out channel and sends the request data to the external web service.

6) HTTP 202 Ack is sent to the request execution thread immediately, and thus closing this channel.

7) The external web service upon completion of its operation, communicates the response back to the callback listener from a separate thread through another transport out channel. Thus we have dual channels for communication.

Since the request channel is acknowledged immediately and the response is sent from another execution thread over another transport channel, there is no connection timeout issues encountered. Thus we achieve a full asynchronous behavior.


Sample client code: EchoNonBlockingDualClient.java
        ServiceClient sender = null;
        try {
            OMElement payload = ClientUtil.getEchoOMElement();

            Options options = new Options();
            options.setTo(targetEPR);
            options.setAction("urn:echo");
            options.setTransportInProtocol(Constants.TRANSPORT_HTTP);
            options.setUseSeparateListener(true);            
            // this is the action mapping we put within the service.xml
            options.setAction("urn:echo");  

            //timeout needs to be set incase of scenario 1 explained above
            options.setTimeOutInMilliSeconds(60 * 1000);

            //Callback to handle the response
            MyAxisCallback callback = new MyAxisCallback();
           
            //Non-Blocking Invocation
            sender = new ServiceClient();
            sender.engageModule(Constants.MODULE_ADDRESSING);
            sender.setOptions(options);
            
            sender.sendReceiveNonBlocking(payload, callback);
            
            int i=0;
            //Wait till the callback receives the response.           
            while (!callback.isComplete()) {
                Thread.sleep(1000);
                System.out.println("Waiting since:"+ ++i + " seconds");                
            }
            
        } catch (AxisFault axisFault) {
            axisFault.printStackTrace();
        } catch (Exception ex) {
            ex.printStackTrace();
        } finally {
            try {
                System.out.println("cleaning called");
                sender.cleanup();
                System.out.println("cleaning completed");
            } catch (AxisFault axisFault) {
                //have to ignore this
                System.out.println("cleaning error");                 
            }
        }


The use of separate request and response channels are indicated to the axis engine by setting the options 

options.setUseSeparateListener(true);

To check whether the response has been communicated to the callback from the external web service, we can poll the callback like given below.
 

while (!callback.isComplete()) {
    Thread.sleep(1000);
    System.out.println("Waiting since:"+ ++i + " seconds");               
}

Finally, we have to close all the resources which includes the client side callback listener. This is achieved by

finally{
    try{
          sender.cleanup();
    }catch(AxisFault af){
          //ignore this
    }
}
 

Here are the steps to compile and run the sample:

1) Download the appropriate Axis2 binary from apache axis2 website.The binary used to run the same was axis2-1.6.0. Extract the archive to the local drive.
 
2) Make sure that the ws-addressing module is enabled in the axis2.xml under axis2-1.6.0/conf directory.
Uncomment the line <module ref="addressing" />

3) cd axis2-1.6.0/samples/userguide

4) axis2-1.6.0/samples/userguide> ant
The deployment archives are copied to the axis2-1.6.0/repository/services

5) Run the axis2 server by executing the command at the terminal. 
axis2-1.6.0/bin> ./axis2server.sh

6) Run the client program by executing the command  
axis2-1.6.0/samples/userguide> ant run.client.nonblockingdual



Sample Request-Response:
POST /axis2/services/MyService HTTP/1.1
Content-Type: text/xml; charset=UTF-8
SOAPAction: "urn:echo"
User-Agent: Axis2
Host: 127.0.0.1:8080
Transfer-Encoding: chunked

288
<?xml version='1.0' encoding='UTF-8'?>
    <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
        <soapenv:Header xmlns:wsa="http://www.w3.org/2005/08/addressing">
            <wsa:To>http://127.0.0.1:8080/axis2/services/MyService</wsa:To>
            <wsa:ReplyTo>
                <wsa:Address>http://10.10.2.134:7070/axis2/services/anonService2/</wsa:Address>
            </wsa:ReplyTo>
            <wsa:MessageID>urn:uuid:b683fc84-a02e-49a4-a6a6-a4f5ca6b962d</wsa:MessageID>
            <wsa:Action>urn:echo</wsa:Action>
        </soapenv:Header>
        <soapenv:Body>
            <example1:echo xmlns:example1="http://example1.org/example1">
                <example1:Text>Axis2 Echo String </example1:Text>
            </example1:echo>
        </soapenv:Body>
    </soapenv:Envelope>
0
Response:
<?xml version='1.0' encoding='utf-8'?>
    <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
         <soapenv:Header xmlns:wsa="http://www.w3.org/2005/08/addressing">
             <wsa:To>http://10.10.2.134:7070/axis2/services/anonService2/</wsa:To>
             <wsa:ReplyTo><wsa:Address>http://www.w3.org/2005/08/addressing/none</wsa:Address></wsa:ReplyTo>
             <wsa:MessageID>urn:uuid:bec091be-7b92-40e4-8416-2b49863ceb53</wsa:MessageID>
             <wsa:Action>urn:echoResponse</wsa:Action>
             <wsa:RelatesTo>urn:uuid:b683fc84-a02e-49a4-a6a6-a4f5ca6b962d</wsa:RelatesTo>
         </soapenv:Header>
         <soapenv:Body>
             <example1:echo xmlns:example1="http://example1.org/example1">
                 <example1:Text>Axis2 Echo String </example1:Text>
             </example1:echo>
         </soapenv:Body>
     </soapenv:Envelope>